Coverage Report

Created: 2025-10-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.1M
{
18
    /* check if pointer is at given position */
19
20
14.1M
    Py_ssize_t thisp, thatp;
21
22
14.1M
    switch (at) {
23
24
6.95M
    case SRE_AT_BEGINNING:
25
6.95M
    case SRE_AT_BEGINNING_STRING:
26
6.95M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.79M
    case SRE_AT_END:
33
4.79M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
29.6k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.79M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.41M
    case SRE_AT_END_STRING:
42
2.41M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.1M
    }
87
88
0
    return 0;
89
14.1M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.7M
{
18
    /* check if pointer is at given position */
19
20
12.7M
    Py_ssize_t thisp, thatp;
21
22
12.7M
    switch (at) {
23
24
6.92M
    case SRE_AT_BEGINNING:
25
6.92M
    case SRE_AT_BEGINNING_STRING:
26
6.92M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.41M
    case SRE_AT_END:
33
4.41M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
29.0k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.41M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.43M
    case SRE_AT_END_STRING:
42
1.43M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.7M
    }
87
88
0
    return 0;
89
12.7M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
821k
{
18
    /* check if pointer is at given position */
19
20
821k
    Py_ssize_t thisp, thatp;
21
22
821k
    switch (at) {
23
24
28.6k
    case SRE_AT_BEGINNING:
25
28.6k
    case SRE_AT_BEGINNING_STRING:
26
28.6k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
295k
    case SRE_AT_END:
33
295k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
60
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
295k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
497k
    case SRE_AT_END_STRING:
42
497k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
821k
    }
87
88
0
    return 0;
89
821k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
567k
{
18
    /* check if pointer is at given position */
19
20
567k
    Py_ssize_t thisp, thatp;
21
22
567k
    switch (at) {
23
24
3.85k
    case SRE_AT_BEGINNING:
25
3.85k
    case SRE_AT_BEGINNING_STRING:
26
3.85k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.7k
    case SRE_AT_END:
33
80.7k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
503
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.7k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
482k
    case SRE_AT_END_STRING:
42
482k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
567k
    }
87
88
0
    return 0;
89
567k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.54G
{
94
    /* check if character is a member of the given set */
95
96
1.54G
    int ok = 1;
97
98
3.41G
    for (;;) {
99
3.41G
        switch (*set++) {
100
101
997M
        case SRE_OP_FAILURE:
102
997M
            return !ok;
103
104
977M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
977M
            if (ch == set[0])
107
4.34M
                return ok;
108
973M
            set++;
109
973M
            break;
110
111
12.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.0M
            if (sre_category(set[0], (int) ch))
114
8.06M
                return ok;
115
4.01M
            set++;
116
4.01M
            break;
117
118
781M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
781M
            if (ch < 256 &&
121
680M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
339M
                return ok;
123
441M
            set += 256/SRE_CODE_BITS;
124
441M
            break;
125
126
324M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
324M
            if (set[0] <= ch && ch <= set[1])
129
198M
                return ok;
130
126M
            set += 2;
131
126M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
326M
        case SRE_OP_NEGATE:
148
326M
            ok = !ok;
149
326M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.41G
        }
175
3.41G
    }
176
1.54G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
311M
{
94
    /* check if character is a member of the given set */
95
96
311M
    int ok = 1;
97
98
629M
    for (;;) {
99
629M
        switch (*set++) {
100
101
165M
        case SRE_OP_FAILURE:
102
165M
            return !ok;
103
104
180M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
180M
            if (ch == set[0])
107
2.05M
                return ok;
108
178M
            set++;
109
178M
            break;
110
111
11.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.4M
            if (sre_category(set[0], (int) ch))
114
7.43M
                return ok;
115
4.00M
            set++;
116
4.00M
            break;
117
118
81.6M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
81.6M
            if (ch < 256 &&
121
81.6M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
41.8M
                return ok;
123
39.8M
            set += 256/SRE_CODE_BITS;
124
39.8M
            break;
125
126
155M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
155M
            if (set[0] <= ch && ch <= set[1])
129
93.9M
                return ok;
130
61.8M
            set += 2;
131
61.8M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
34.0M
        case SRE_OP_NEGATE:
148
34.0M
            ok = !ok;
149
34.0M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
629M
        }
175
629M
    }
176
311M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
694M
{
94
    /* check if character is a member of the given set */
95
96
694M
    int ok = 1;
97
98
1.62G
    for (;;) {
99
1.62G
        switch (*set++) {
100
101
488M
        case SRE_OP_FAILURE:
102
488M
            return !ok;
103
104
544M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
544M
            if (ch == set[0])
107
1.40M
                return ok;
108
542M
            set++;
109
542M
            break;
110
111
207k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
207k
            if (sre_category(set[0], (int) ch))
114
199k
                return ok;
115
7.62k
            set++;
116
7.62k
            break;
117
118
301M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
301M
            if (ch < 256 &&
121
244M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
112M
                return ok;
123
189M
            set += 256/SRE_CODE_BITS;
124
189M
            break;
125
126
147M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
147M
            if (set[0] <= ch && ch <= set[1])
129
91.7M
                return ok;
130
56.1M
            set += 2;
131
56.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
141M
        case SRE_OP_NEGATE:
148
141M
            ok = !ok;
149
141M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.62G
        }
175
1.62G
    }
176
694M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
542M
{
94
    /* check if character is a member of the given set */
95
96
542M
    int ok = 1;
97
98
1.16G
    for (;;) {
99
1.16G
        switch (*set++) {
100
101
343M
        case SRE_OP_FAILURE:
102
343M
            return !ok;
103
104
253M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
253M
            if (ch == set[0])
107
879k
                return ok;
108
252M
            set++;
109
252M
            break;
110
111
437k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
437k
            if (sre_category(set[0], (int) ch))
114
436k
                return ok;
115
820
            set++;
116
820
            break;
117
118
397M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
397M
            if (ch < 256 &&
121
354M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
185M
                return ok;
123
212M
            set += 256/SRE_CODE_BITS;
124
212M
            break;
125
126
20.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
20.6M
            if (set[0] <= ch && ch <= set[1])
129
12.6M
                return ok;
130
8.05M
            set += 2;
131
8.05M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
150M
        case SRE_OP_NEGATE:
148
150M
            ok = !ok;
149
150M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.16G
        }
175
1.16G
    }
176
542M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
578M
{
195
578M
    SRE_CODE chr;
196
578M
    SRE_CHAR c;
197
578M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
578M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
578M
    Py_ssize_t i;
200
578M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
578M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.8M
        end = ptr + maxcount;
205
206
578M
    switch (pattern[0]) {
207
208
515M
    case SRE_OP_IN:
209
        /* repeated set */
210
515M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
860M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
344M
            ptr++;
213
515M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
57.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
57.2M
        chr = pattern[1];
232
57.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
57.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
54.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
54.4M
        else
238
54.4M
#endif
239
60.8M
        while (ptr < end && *ptr == c)
240
3.56M
            ptr++;
241
57.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
6.11M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
6.11M
        chr = pattern[1];
270
6.11M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
6.11M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
2.27M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.27M
        else
276
2.27M
#endif
277
37.4M
        while (ptr < end && *ptr != c)
278
31.3M
            ptr++;
279
6.11M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
578M
    }
319
320
578M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
578M
           ptr - (SRE_CHAR*) state->ptr));
322
578M
    return ptr - (SRE_CHAR*) state->ptr;
323
578M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
136M
{
195
136M
    SRE_CODE chr;
196
136M
    SRE_CHAR c;
197
136M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
136M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
136M
    Py_ssize_t i;
200
136M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
136M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
2.64M
        end = ptr + maxcount;
205
206
136M
    switch (pattern[0]) {
207
208
89.2M
    case SRE_OP_IN:
209
        /* repeated set */
210
89.2M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
199M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
110M
            ptr++;
213
89.2M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
47.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
47.3M
        chr = pattern[1];
232
47.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
47.3M
        c = (SRE_CHAR) chr;
234
47.3M
#if SIZEOF_SRE_CHAR < 4
235
47.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
47.3M
        else
238
47.3M
#endif
239
48.9M
        while (ptr < end && *ptr == c)
240
1.55M
            ptr++;
241
47.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
241k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
241k
        chr = pattern[1];
270
241k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
241k
        c = (SRE_CHAR) chr;
272
241k
#if SIZEOF_SRE_CHAR < 4
273
241k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
241k
        else
276
241k
#endif
277
6.98M
        while (ptr < end && *ptr != c)
278
6.74M
            ptr++;
279
241k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
136M
    }
319
320
136M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
136M
           ptr - (SRE_CHAR*) state->ptr));
322
136M
    return ptr - (SRE_CHAR*) state->ptr;
323
136M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
249M
{
195
249M
    SRE_CODE chr;
196
249M
    SRE_CHAR c;
197
249M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
249M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
249M
    Py_ssize_t i;
200
249M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
249M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.59M
        end = ptr + maxcount;
205
206
249M
    switch (pattern[0]) {
207
208
240M
    case SRE_OP_IN:
209
        /* repeated set */
210
240M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
355M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
114M
            ptr++;
213
240M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
7.04M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
7.04M
        chr = pattern[1];
232
7.04M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
7.04M
        c = (SRE_CHAR) chr;
234
7.04M
#if SIZEOF_SRE_CHAR < 4
235
7.04M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
7.04M
        else
238
7.04M
#endif
239
8.80M
        while (ptr < end && *ptr == c)
240
1.76M
            ptr++;
241
7.04M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.03M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.03M
        chr = pattern[1];
270
2.03M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.03M
        c = (SRE_CHAR) chr;
272
2.03M
#if SIZEOF_SRE_CHAR < 4
273
2.03M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.03M
        else
276
2.03M
#endif
277
11.3M
        while (ptr < end && *ptr != c)
278
9.30M
            ptr++;
279
2.03M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
249M
    }
319
320
249M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
249M
           ptr - (SRE_CHAR*) state->ptr));
322
249M
    return ptr - (SRE_CHAR*) state->ptr;
323
249M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
192M
{
195
192M
    SRE_CODE chr;
196
192M
    SRE_CHAR c;
197
192M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
192M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
192M
    Py_ssize_t i;
200
192M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
192M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.65M
        end = ptr + maxcount;
205
206
192M
    switch (pattern[0]) {
207
208
185M
    case SRE_OP_IN:
209
        /* repeated set */
210
185M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
305M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
119M
            ptr++;
213
185M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.82M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.82M
        chr = pattern[1];
232
2.82M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.82M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.07M
        while (ptr < end && *ptr == c)
240
250k
            ptr++;
241
2.82M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.83M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.83M
        chr = pattern[1];
270
3.83M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.83M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
19.1M
        while (ptr < end && *ptr != c)
278
15.3M
            ptr++;
279
3.83M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
192M
    }
319
320
192M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
192M
           ptr - (SRE_CHAR*) state->ptr));
322
192M
    return ptr - (SRE_CHAR*) state->ptr;
323
192M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
665M
    do { \
355
665M
        ctx->lastmark = state->lastmark; \
356
665M
        ctx->lastindex = state->lastindex; \
357
665M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
213M
    do { \
360
213M
        state->lastmark = ctx->lastmark; \
361
213M
        state->lastindex = ctx->lastindex; \
362
213M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
224M
    do { \
366
224M
        TRACE(("push last_ptr: %zd", \
367
224M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
224M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
224M
    } while (0)
370
#define LAST_PTR_POP()  \
371
224M
    do { \
372
224M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
224M
        TRACE(("pop last_ptr: %zd", \
374
224M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
224M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
538M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
840M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.28G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
120M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
79.8M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.37G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.37G
do { \
390
1.37G
    alloc_pos = state->data_stack_base; \
391
1.37G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.37G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.37G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
157M
        int j = data_stack_grow(state, sizeof(type)); \
395
157M
        if (j < 0) return j; \
396
157M
        if (ctx_pos != -1) \
397
157M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
157M
    } \
399
1.37G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.37G
    state->data_stack_base += sizeof(type); \
401
1.37G
} while (0)
402
403
1.49G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.49G
do { \
405
1.49G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.49G
    ptr = (type*)(state->data_stack+pos); \
407
1.49G
} while (0)
408
409
564M
#define DATA_STACK_PUSH(state, data, size) \
410
564M
do { \
411
564M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
564M
           data, state->data_stack_base, size)); \
413
564M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
64.7k
        int j = data_stack_grow(state, size); \
415
64.7k
        if (j < 0) return j; \
416
64.7k
        if (ctx_pos != -1) \
417
64.7k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
64.7k
    } \
419
564M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
564M
    state->data_stack_base += size; \
421
564M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
336M
#define DATA_STACK_POP(state, data, size, discard) \
427
336M
do { \
428
336M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
336M
           data, state->data_stack_base-size, size)); \
430
336M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
336M
    if (discard) \
432
336M
        state->data_stack_base -= size; \
433
336M
} while (0)
434
435
1.60G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.60G
do { \
437
1.60G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.60G
           state->data_stack_base-size, size)); \
439
1.60G
    state->data_stack_base -= size; \
440
1.60G
} while(0)
441
442
#define DATA_PUSH(x) \
443
224M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
224M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.37G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.37G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.49G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
552M
    do if (lastmark >= 0) { \
473
340M
        MARK_TRACE("push", (lastmark)); \
474
340M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
340M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
552M
    } while (0)
477
#define MARK_POP(lastmark) \
478
148M
    do if (lastmark >= 0) { \
479
109M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
109M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
109M
        MARK_TRACE("pop", (lastmark)); \
482
148M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.73M
    do if (lastmark >= 0) { \
485
1.73M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.73M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.73M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.73M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
404M
    do if (lastmark >= 0) { \
491
230M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
230M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
230M
        MARK_TRACE("pop discard", (lastmark)); \
494
404M
    } while (0)
495
496
460M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
224M
#define JUMP_MAX_UNTIL_2     2
499
120M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
119M
#define JUMP_REPEAT          7
504
15.2M
#define JUMP_REPEAT_ONE_1    8
505
164M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
154M
#define JUMP_BRANCH          11
508
79.8M
#define JUMP_ASSERT          12
509
39.3M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
918M
    ctx->pattern = pattern; \
516
918M
    ctx->ptr = ptr; \
517
918M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
918M
    nextctx->pattern = nextpattern; \
519
918M
    nextctx->toplevel = toplevel_; \
520
918M
    nextctx->jump = jumpvalue; \
521
918M
    nextctx->last_ctx_pos = ctx_pos; \
522
918M
    pattern = nextpattern; \
523
918M
    ctx_pos = alloc_pos; \
524
918M
    ctx = nextctx; \
525
918M
    goto entrance; \
526
918M
    jumplabel: \
527
918M
    pattern = ctx->pattern; \
528
918M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
799M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
119M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.42G
    do {                                                           \
553
2.42G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.42G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.42G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.52G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.42G
        do {                               \
588
2.42G
            MAYBE_CHECK_SIGNALS;           \
589
2.42G
            goto *sre_targets[*pattern++]; \
590
2.42G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
460M
{
601
460M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
460M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
460M
    Py_ssize_t ret = 0;
604
460M
    int jump;
605
460M
    unsigned int sigcount = state->sigcount;
606
607
460M
    SRE(match_context)* ctx;
608
460M
    SRE(match_context)* nextctx;
609
460M
    INIT_TRACE(state);
610
611
460M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
460M
    DATA_ALLOC(SRE(match_context), ctx);
614
460M
    ctx->last_ctx_pos = -1;
615
460M
    ctx->jump = JUMP_NONE;
616
460M
    ctx->toplevel = toplevel;
617
460M
    ctx_pos = alloc_pos;
618
619
460M
#if USE_COMPUTED_GOTOS
620
460M
#include "sre_targets.h"
621
460M
#endif
622
623
1.37G
entrance:
624
625
1.37G
    ;  // Fashion statement.
626
1.37G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.37G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
76.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.72M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.72M
                   end - ptr, (size_t) pattern[3]));
634
5.72M
            RETURN_FAILURE;
635
5.72M
        }
636
71.1M
        pattern += pattern[1] + 1;
637
71.1M
    }
638
639
1.37G
#if USE_COMPUTED_GOTOS
640
1.37G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.37G
    {
647
648
1.37G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
481M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
481M
                   ptr, pattern[0]));
653
481M
            {
654
481M
                int i = pattern[0];
655
481M
                if (i & 1)
656
73.1M
                    state->lastindex = i/2 + 1;
657
481M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
473M
                    int j = state->lastmark + 1;
663
480M
                    while (j < i)
664
7.39M
                        state->mark[j++] = NULL;
665
473M
                    state->lastmark = i;
666
473M
                }
667
481M
                state->mark[i] = ptr;
668
481M
            }
669
481M
            pattern++;
670
481M
            DISPATCH;
671
672
481M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
146M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
146M
                   ptr, *pattern));
677
146M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
51.8M
                RETURN_FAILURE;
679
94.9M
            pattern++;
680
94.9M
            ptr++;
681
94.9M
            DISPATCH;
682
683
94.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
216M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
216M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
216M
            if (ctx->toplevel &&
698
61.2M
                ((state->match_all && ptr != state->end) ||
699
61.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
216M
            state->ptr = ptr;
704
216M
            RETURN_SUCCESS;
705
706
14.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.1M
            if (!SRE(at)(state, ptr, *pattern))
711
3.37M
                RETURN_FAILURE;
712
10.7M
            pattern++;
713
10.7M
            DISPATCH;
714
715
10.7M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
261M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
261M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
261M
            if (ptr >= end ||
749
261M
                !SRE(charset)(state, pattern + 1, *ptr))
750
7.19M
                RETURN_FAILURE;
751
254M
            pattern += pattern[0];
752
254M
            ptr++;
753
254M
            DISPATCH;
754
755
254M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.56M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.56M
                   pattern, ptr, pattern[0]));
758
4.56M
            if (ptr >= end ||
759
4.56M
                sre_lower_ascii(*ptr) != *pattern)
760
130k
                RETURN_FAILURE;
761
4.43M
            pattern++;
762
4.43M
            ptr++;
763
4.43M
            DISPATCH;
764
765
4.43M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
94.4M
        TARGET(SRE_OP_JUMP):
845
94.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
94.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
94.4M
                   ptr, pattern[0]));
850
94.4M
            pattern += pattern[0];
851
94.4M
            DISPATCH;
852
853
154M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
154M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
154M
            LASTMARK_SAVE();
858
154M
            if (state->repeat)
859
111M
                MARK_PUSH(ctx->lastmark);
860
370M
            for (; pattern[0]; pattern += pattern[0]) {
861
308M
                if (pattern[1] == SRE_OP_LITERAL &&
862
147M
                    (ptr >= end ||
863
147M
                     (SRE_CODE) *ptr != pattern[2]))
864
76.2M
                    continue;
865
231M
                if (pattern[1] == SRE_OP_IN &&
866
105M
                    (ptr >= end ||
867
105M
                     !SRE(charset)(state, pattern + 3,
868
105M
                                   (SRE_CODE) *ptr)))
869
77.3M
                    continue;
870
154M
                state->ptr = ptr;
871
154M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
154M
                if (ret) {
873
92.0M
                    if (state->repeat)
874
73.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
92.0M
                    RETURN_ON_ERROR(ret);
876
92.0M
                    RETURN_SUCCESS;
877
92.0M
                }
878
62.4M
                if (state->repeat)
879
18.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
62.4M
                LASTMARK_RESTORE();
881
62.4M
            }
882
62.3M
            if (state->repeat)
883
37.9M
                MARK_POP_DISCARD(ctx->lastmark);
884
62.3M
            RETURN_FAILURE;
885
886
579M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
579M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
579M
                   pattern[1], pattern[2]));
898
899
579M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.15M
                RETURN_FAILURE; /* cannot match */
901
902
578M
            state->ptr = ptr;
903
904
578M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
578M
            RETURN_ON_ERROR(ret);
906
578M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
578M
            ctx->count = ret;
908
578M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
578M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
331M
                RETURN_FAILURE;
917
918
247M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.08M
                ptr == state->end &&
920
78.1k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
78.1k
            {
922
                /* tail is empty.  we're finished */
923
78.1k
                state->ptr = ptr;
924
78.1k
                RETURN_SUCCESS;
925
78.1k
            }
926
927
247M
            LASTMARK_SAVE();
928
247M
            if (state->repeat)
929
177M
                MARK_PUSH(ctx->lastmark);
930
931
247M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
84.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
84.8M
                for (;;) {
936
195M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
125M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
110M
                        ptr--;
939
110M
                        ctx->count--;
940
110M
                    }
941
84.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
69.6M
                        break;
943
15.2M
                    state->ptr = ptr;
944
15.2M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
15.2M
                            pattern+pattern[0]);
946
15.2M
                    if (ret) {
947
15.2M
                        if (state->repeat)
948
13.5M
                            MARK_POP_DISCARD(ctx->lastmark);
949
15.2M
                        RETURN_ON_ERROR(ret);
950
15.2M
                        RETURN_SUCCESS;
951
15.2M
                    }
952
500
                    if (state->repeat)
953
500
                        MARK_POP_KEEP(ctx->lastmark);
954
500
                    LASTMARK_RESTORE();
955
956
500
                    ptr--;
957
500
                    ctx->count--;
958
500
                }
959
69.6M
                if (state->repeat)
960
68.3M
                    MARK_POP_DISCARD(ctx->lastmark);
961
162M
            } else {
962
                /* general case */
963
165M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
164M
                    state->ptr = ptr;
965
164M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
164M
                            pattern+pattern[0]);
967
164M
                    if (ret) {
968
161M
                        if (state->repeat)
969
94.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
161M
                        RETURN_ON_ERROR(ret);
971
161M
                        RETURN_SUCCESS;
972
161M
                    }
973
3.22M
                    if (state->repeat)
974
1.71M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.22M
                    LASTMARK_RESTORE();
976
977
3.22M
                    ptr--;
978
3.22M
                    ctx->count--;
979
3.22M
                }
980
1.11M
                if (state->repeat)
981
911k
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.11M
            }
983
70.7M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
119M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
119M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
119M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
119M
            ctx->u.rep = repeat_pool_malloc(state);
1127
119M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
119M
            ctx->u.rep->count = -1;
1131
119M
            ctx->u.rep->pattern = pattern;
1132
119M
            ctx->u.rep->prev = state->repeat;
1133
119M
            ctx->u.rep->last_ptr = NULL;
1134
119M
            state->repeat = ctx->u.rep;
1135
1136
119M
            state->ptr = ptr;
1137
119M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
119M
            state->repeat = ctx->u.rep->prev;
1139
119M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
119M
            if (ret) {
1142
119M
                RETURN_ON_ERROR(ret);
1143
119M
                RETURN_SUCCESS;
1144
119M
            }
1145
111k
            RETURN_FAILURE;
1146
1147
236M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
236M
            ctx->u.rep = state->repeat;
1155
236M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
236M
            state->ptr = ptr;
1159
1160
236M
            ctx->count = ctx->u.rep->count+1;
1161
1162
236M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
236M
                   ptr, ctx->count));
1164
1165
236M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
236M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
11.5M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
224M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
224M
                ctx->u.rep->count = ctx->count;
1185
224M
                LASTMARK_SAVE();
1186
224M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
224M
                LAST_PTR_PUSH();
1189
224M
                ctx->u.rep->last_ptr = state->ptr;
1190
224M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
224M
                        ctx->u.rep->pattern+3);
1192
224M
                LAST_PTR_POP();
1193
224M
                if (ret) {
1194
116M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
116M
                    RETURN_ON_ERROR(ret);
1196
116M
                    RETURN_SUCCESS;
1197
116M
                }
1198
108M
                MARK_POP(ctx->lastmark);
1199
108M
                LASTMARK_RESTORE();
1200
108M
                ctx->u.rep->count = ctx->count-1;
1201
108M
                state->ptr = ptr;
1202
108M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
120M
            state->repeat = ctx->u.rep->prev;
1207
120M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
120M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
120M
            RETURN_ON_SUCCESS(ret);
1211
975k
            state->ptr = ptr;
1212
975k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
79.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
79.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
79.8M
                   ptr, pattern[1]));
1565
79.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
79.8M
            state->ptr = ptr - pattern[1];
1568
79.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
79.8M
            RETURN_ON_FAILURE(ret);
1570
76.1M
            pattern += pattern[0];
1571
76.1M
            DISPATCH;
1572
1573
76.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
39.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
39.3M
                   ptr, pattern[1]));
1578
39.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
39.3M
                state->ptr = ptr - pattern[1];
1580
39.3M
                LASTMARK_SAVE();
1581
39.3M
                if (state->repeat)
1582
39.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
78.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
78.6M
                if (ret) {
1586
13.3k
                    if (state->repeat)
1587
13.3k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
13.3k
                    RETURN_ON_ERROR(ret);
1589
13.3k
                    RETURN_FAILURE;
1590
13.3k
                }
1591
39.3M
                if (state->repeat)
1592
39.3M
                    MARK_POP(ctx->lastmark);
1593
39.3M
                LASTMARK_RESTORE();
1594
39.3M
            }
1595
39.3M
            pattern += pattern[0];
1596
39.3M
            DISPATCH;
1597
1598
39.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.37G
exit:
1620
1.37G
    ctx_pos = ctx->last_ctx_pos;
1621
1.37G
    jump = ctx->jump;
1622
1.37G
    DATA_POP_DISCARD(ctx);
1623
1.37G
    if (ctx_pos == -1) {
1624
460M
        state->sigcount = sigcount;
1625
460M
        return ret;
1626
460M
    }
1627
918M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
918M
    switch (jump) {
1630
224M
        case JUMP_MAX_UNTIL_2:
1631
224M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
224M
            goto jump_max_until_2;
1633
120M
        case JUMP_MAX_UNTIL_3:
1634
120M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
120M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
154M
        case JUMP_BRANCH:
1643
154M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
154M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
119M
        case JUMP_REPEAT:
1658
119M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
119M
            goto jump_repeat;
1660
15.2M
        case JUMP_REPEAT_ONE_1:
1661
15.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
15.2M
            goto jump_repeat_one_1;
1663
164M
        case JUMP_REPEAT_ONE_2:
1664
164M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
164M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
79.8M
        case JUMP_ASSERT:
1673
79.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
79.8M
            goto jump_assert;
1675
39.3M
        case JUMP_ASSERT_NOT:
1676
39.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
39.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
918M
    }
1683
1684
0
    return ret; /* should never get here */
1685
918M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
140M
{
601
140M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
140M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
140M
    Py_ssize_t ret = 0;
604
140M
    int jump;
605
140M
    unsigned int sigcount = state->sigcount;
606
607
140M
    SRE(match_context)* ctx;
608
140M
    SRE(match_context)* nextctx;
609
140M
    INIT_TRACE(state);
610
611
140M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
140M
    DATA_ALLOC(SRE(match_context), ctx);
614
140M
    ctx->last_ctx_pos = -1;
615
140M
    ctx->jump = JUMP_NONE;
616
140M
    ctx->toplevel = toplevel;
617
140M
    ctx_pos = alloc_pos;
618
619
140M
#if USE_COMPUTED_GOTOS
620
140M
#include "sre_targets.h"
621
140M
#endif
622
623
285M
entrance:
624
625
285M
    ;  // Fashion statement.
626
285M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
285M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.72M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.72M
                   end - ptr, (size_t) pattern[3]));
634
5.72M
            RETURN_FAILURE;
635
5.72M
        }
636
24.1M
        pattern += pattern[1] + 1;
637
24.1M
    }
638
639
279M
#if USE_COMPUTED_GOTOS
640
279M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
279M
    {
647
648
279M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
123M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
123M
                   ptr, pattern[0]));
653
123M
            {
654
123M
                int i = pattern[0];
655
123M
                if (i & 1)
656
20.4M
                    state->lastindex = i/2 + 1;
657
123M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
119M
                    int j = state->lastmark + 1;
663
124M
                    while (j < i)
664
4.01M
                        state->mark[j++] = NULL;
665
119M
                    state->lastmark = i;
666
119M
                }
667
123M
                state->mark[i] = ptr;
668
123M
            }
669
123M
            pattern++;
670
123M
            DISPATCH;
671
672
123M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.9M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.9M
                   ptr, *pattern));
677
53.9M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.7M
                RETURN_FAILURE;
679
36.1M
            pattern++;
680
36.1M
            ptr++;
681
36.1M
            DISPATCH;
682
683
36.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
46.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
46.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
46.7M
            if (ctx->toplevel &&
698
17.6M
                ((state->match_all && ptr != state->end) ||
699
17.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
46.7M
            state->ptr = ptr;
704
46.7M
            RETURN_SUCCESS;
705
706
12.7M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.7M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.7M
            if (!SRE(at)(state, ptr, *pattern))
711
2.02M
                RETURN_FAILURE;
712
10.7M
            pattern++;
713
10.7M
            DISPATCH;
714
715
10.7M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
32.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
32.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
32.6M
            if (ptr >= end ||
749
32.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
399k
                RETURN_FAILURE;
751
32.2M
            pattern += pattern[0];
752
32.2M
            ptr++;
753
32.2M
            DISPATCH;
754
755
32.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
600k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
600k
                   pattern, ptr, pattern[0]));
758
600k
            if (ptr >= end ||
759
600k
                sre_lower_ascii(*ptr) != *pattern)
760
90.1k
                RETURN_FAILURE;
761
510k
            pattern++;
762
510k
            ptr++;
763
510k
            DISPATCH;
764
765
510k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.6M
        TARGET(SRE_OP_JUMP):
845
25.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.6M
                   ptr, pattern[0]));
850
25.6M
            pattern += pattern[0];
851
25.6M
            DISPATCH;
852
853
46.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
46.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
46.4M
            LASTMARK_SAVE();
858
46.4M
            if (state->repeat)
859
10.6M
                MARK_PUSH(ctx->lastmark);
860
133M
            for (; pattern[0]; pattern += pattern[0]) {
861
111M
                if (pattern[1] == SRE_OP_LITERAL &&
862
53.1M
                    (ptr >= end ||
863
53.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
23.3M
                    continue;
865
88.0M
                if (pattern[1] == SRE_OP_IN &&
866
11.6M
                    (ptr >= end ||
867
11.6M
                     !SRE(charset)(state, pattern + 3,
868
11.6M
                                   (SRE_CODE) *ptr)))
869
6.48M
                    continue;
870
81.5M
                state->ptr = ptr;
871
81.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
81.5M
                if (ret) {
873
24.2M
                    if (state->repeat)
874
10.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.2M
                    RETURN_ON_ERROR(ret);
876
24.2M
                    RETURN_SUCCESS;
877
24.2M
                }
878
57.2M
                if (state->repeat)
879
5.87k
                    MARK_POP_KEEP(ctx->lastmark);
880
57.2M
                LASTMARK_RESTORE();
881
57.2M
            }
882
22.2M
            if (state->repeat)
883
326k
                MARK_POP_DISCARD(ctx->lastmark);
884
22.2M
            RETURN_FAILURE;
885
886
137M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
137M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
137M
                   pattern[1], pattern[2]));
898
899
137M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
936k
                RETURN_FAILURE; /* cannot match */
901
902
136M
            state->ptr = ptr;
903
904
136M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
136M
            RETURN_ON_ERROR(ret);
906
136M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
136M
            ctx->count = ret;
908
136M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
136M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
109M
                RETURN_FAILURE;
917
918
26.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
657k
                ptr == state->end &&
920
55.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
55.7k
            {
922
                /* tail is empty.  we're finished */
923
55.7k
                state->ptr = ptr;
924
55.7k
                RETURN_SUCCESS;
925
55.7k
            }
926
927
26.9M
            LASTMARK_SAVE();
928
26.9M
            if (state->repeat)
929
13.0M
                MARK_PUSH(ctx->lastmark);
930
931
26.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.10M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.10M
                for (;;) {
936
16.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.8M
                        ptr--;
939
11.8M
                        ctx->count--;
940
11.8M
                    }
941
5.10M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.92M
                        break;
943
2.18M
                    state->ptr = ptr;
944
2.18M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.18M
                            pattern+pattern[0]);
946
2.18M
                    if (ret) {
947
2.18M
                        if (state->repeat)
948
552k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.18M
                        RETURN_ON_ERROR(ret);
950
2.18M
                        RETURN_SUCCESS;
951
2.18M
                    }
952
99
                    if (state->repeat)
953
99
                        MARK_POP_KEEP(ctx->lastmark);
954
99
                    LASTMARK_RESTORE();
955
956
99
                    ptr--;
957
99
                    ctx->count--;
958
99
                }
959
2.92M
                if (state->repeat)
960
1.65M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.7M
            } else {
962
                /* general case */
963
23.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
22.8M
                    state->ptr = ptr;
965
22.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
22.8M
                            pattern+pattern[0]);
967
22.8M
                    if (ret) {
968
21.0M
                        if (state->repeat)
969
10.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
21.0M
                        RETURN_ON_ERROR(ret);
971
21.0M
                        RETURN_SUCCESS;
972
21.0M
                    }
973
1.83M
                    if (state->repeat)
974
964k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.83M
                    LASTMARK_RESTORE();
976
977
1.83M
                    ptr--;
978
1.83M
                    ctx->count--;
979
1.83M
                }
980
734k
                if (state->repeat)
981
535k
                    MARK_POP_DISCARD(ctx->lastmark);
982
734k
            }
983
3.65M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
6.28M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
6.28M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
6.28M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
6.28M
            ctx->u.rep = repeat_pool_malloc(state);
1127
6.28M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
6.28M
            ctx->u.rep->count = -1;
1131
6.28M
            ctx->u.rep->pattern = pattern;
1132
6.28M
            ctx->u.rep->prev = state->repeat;
1133
6.28M
            ctx->u.rep->last_ptr = NULL;
1134
6.28M
            state->repeat = ctx->u.rep;
1135
1136
6.28M
            state->ptr = ptr;
1137
6.28M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
6.28M
            state->repeat = ctx->u.rep->prev;
1139
6.28M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
6.28M
            if (ret) {
1142
6.17M
                RETURN_ON_ERROR(ret);
1143
6.17M
                RETURN_SUCCESS;
1144
6.17M
            }
1145
110k
            RETURN_FAILURE;
1146
1147
22.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
22.0M
            ctx->u.rep = state->repeat;
1155
22.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
22.0M
            state->ptr = ptr;
1159
1160
22.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
22.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
22.0M
                   ptr, ctx->count));
1164
1165
22.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
22.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.09M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
17.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
17.9M
                ctx->u.rep->count = ctx->count;
1185
17.9M
                LASTMARK_SAVE();
1186
17.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
17.9M
                LAST_PTR_PUSH();
1189
17.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
17.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
17.9M
                        ctx->u.rep->pattern+3);
1192
17.9M
                LAST_PTR_POP();
1193
17.9M
                if (ret) {
1194
15.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
15.2M
                    RETURN_ON_ERROR(ret);
1196
15.2M
                    RETURN_SUCCESS;
1197
15.2M
                }
1198
2.67M
                MARK_POP(ctx->lastmark);
1199
2.67M
                LASTMARK_RESTORE();
1200
2.67M
                ctx->u.rep->count = ctx->count-1;
1201
2.67M
                state->ptr = ptr;
1202
2.67M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.77M
            state->repeat = ctx->u.rep->prev;
1207
6.77M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.77M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.77M
            RETURN_ON_SUCCESS(ret);
1211
598k
            state->ptr = ptr;
1212
598k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.06M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.06M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.06M
                   ptr, pattern[1]));
1565
2.06M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.06M
            state->ptr = ptr - pattern[1];
1568
2.06M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.06M
            RETURN_ON_FAILURE(ret);
1570
1.98M
            pattern += pattern[0];
1571
1.98M
            DISPATCH;
1572
1573
5.06M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.06M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.06M
                   ptr, pattern[1]));
1578
5.06M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.06M
                state->ptr = ptr - pattern[1];
1580
5.06M
                LASTMARK_SAVE();
1581
5.06M
                if (state->repeat)
1582
5.06M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.1M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.1M
                if (ret) {
1586
1.27k
                    if (state->repeat)
1587
1.27k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.27k
                    RETURN_ON_ERROR(ret);
1589
1.27k
                    RETURN_FAILURE;
1590
1.27k
                }
1591
5.06M
                if (state->repeat)
1592
5.06M
                    MARK_POP(ctx->lastmark);
1593
5.06M
                LASTMARK_RESTORE();
1594
5.06M
            }
1595
5.06M
            pattern += pattern[0];
1596
5.06M
            DISPATCH;
1597
1598
5.06M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
285M
exit:
1620
285M
    ctx_pos = ctx->last_ctx_pos;
1621
285M
    jump = ctx->jump;
1622
285M
    DATA_POP_DISCARD(ctx);
1623
285M
    if (ctx_pos == -1) {
1624
140M
        state->sigcount = sigcount;
1625
140M
        return ret;
1626
140M
    }
1627
144M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
144M
    switch (jump) {
1630
17.9M
        case JUMP_MAX_UNTIL_2:
1631
17.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
17.9M
            goto jump_max_until_2;
1633
6.77M
        case JUMP_MAX_UNTIL_3:
1634
6.77M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.77M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
81.5M
        case JUMP_BRANCH:
1643
81.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
81.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
6.28M
        case JUMP_REPEAT:
1658
6.28M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
6.28M
            goto jump_repeat;
1660
2.18M
        case JUMP_REPEAT_ONE_1:
1661
2.18M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.18M
            goto jump_repeat_one_1;
1663
22.8M
        case JUMP_REPEAT_ONE_2:
1664
22.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
22.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.06M
        case JUMP_ASSERT:
1673
2.06M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.06M
            goto jump_assert;
1675
5.06M
        case JUMP_ASSERT_NOT:
1676
5.06M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.06M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
144M
    }
1683
1684
0
    return ret; /* should never get here */
1685
144M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
225M
{
601
225M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
225M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
225M
    Py_ssize_t ret = 0;
604
225M
    int jump;
605
225M
    unsigned int sigcount = state->sigcount;
606
607
225M
    SRE(match_context)* ctx;
608
225M
    SRE(match_context)* nextctx;
609
225M
    INIT_TRACE(state);
610
611
225M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
225M
    DATA_ALLOC(SRE(match_context), ctx);
614
225M
    ctx->last_ctx_pos = -1;
615
225M
    ctx->jump = JUMP_NONE;
616
225M
    ctx->toplevel = toplevel;
617
225M
    ctx_pos = alloc_pos;
618
619
225M
#if USE_COMPUTED_GOTOS
620
225M
#include "sre_targets.h"
621
225M
#endif
622
623
550M
entrance:
624
625
550M
    ;  // Fashion statement.
626
550M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
550M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
23.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
584
            TRACE(("reject (got %tu chars, need %zu)\n",
633
584
                   end - ptr, (size_t) pattern[3]));
634
584
            RETURN_FAILURE;
635
584
        }
636
23.6M
        pattern += pattern[1] + 1;
637
23.6M
    }
638
639
550M
#if USE_COMPUTED_GOTOS
640
550M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
550M
    {
647
648
550M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
213M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
213M
                   ptr, pattern[0]));
653
213M
            {
654
213M
                int i = pattern[0];
655
213M
                if (i & 1)
656
22.0M
                    state->lastindex = i/2 + 1;
657
213M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
212M
                    int j = state->lastmark + 1;
663
213M
                    while (j < i)
664
1.10M
                        state->mark[j++] = NULL;
665
212M
                    state->lastmark = i;
666
212M
                }
667
213M
                state->mark[i] = ptr;
668
213M
            }
669
213M
            pattern++;
670
213M
            DISPATCH;
671
672
213M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
46.5M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
46.5M
                   ptr, *pattern));
677
46.5M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.7M
                RETURN_FAILURE;
679
28.7M
            pattern++;
680
28.7M
            ptr++;
681
28.7M
            DISPATCH;
682
683
28.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
94.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
94.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
94.9M
            if (ctx->toplevel &&
698
20.7M
                ((state->match_all && ptr != state->end) ||
699
20.7M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
94.9M
            state->ptr = ptr;
704
94.9M
            RETURN_SUCCESS;
705
706
821k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
821k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
821k
            if (!SRE(at)(state, ptr, *pattern))
711
792k
                RETURN_FAILURE;
712
29.5k
            pattern++;
713
29.5k
            DISPATCH;
714
715
29.5k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
116M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
116M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
116M
            if (ptr >= end ||
749
116M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.82M
                RETURN_FAILURE;
751
111M
            pattern += pattern[0];
752
111M
            ptr++;
753
111M
            DISPATCH;
754
755
111M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.82M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.82M
                   pattern, ptr, pattern[0]));
758
2.82M
            if (ptr >= end ||
759
2.82M
                sre_lower_ascii(*ptr) != *pattern)
760
20.2k
                RETURN_FAILURE;
761
2.80M
            pattern++;
762
2.80M
            ptr++;
763
2.80M
            DISPATCH;
764
765
2.80M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
29.0M
        TARGET(SRE_OP_JUMP):
845
29.0M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
29.0M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
29.0M
                   ptr, pattern[0]));
850
29.0M
            pattern += pattern[0];
851
29.0M
            DISPATCH;
852
853
45.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
45.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
45.8M
            LASTMARK_SAVE();
858
45.8M
            if (state->repeat)
859
41.9M
                MARK_PUSH(ctx->lastmark);
860
103M
            for (; pattern[0]; pattern += pattern[0]) {
861
86.1M
                if (pattern[1] == SRE_OP_LITERAL &&
862
41.5M
                    (ptr >= end ||
863
41.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
20.6M
                    continue;
865
65.5M
                if (pattern[1] == SRE_OP_IN &&
866
39.6M
                    (ptr >= end ||
867
39.6M
                     !SRE(charset)(state, pattern + 3,
868
39.6M
                                   (SRE_CODE) *ptr)))
869
32.8M
                    continue;
870
32.7M
                state->ptr = ptr;
871
32.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
32.7M
                if (ret) {
873
28.5M
                    if (state->repeat)
874
26.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
28.5M
                    RETURN_ON_ERROR(ret);
876
28.5M
                    RETURN_SUCCESS;
877
28.5M
                }
878
4.19M
                if (state->repeat)
879
5.44k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.19M
                LASTMARK_RESTORE();
881
4.19M
            }
882
17.3M
            if (state->repeat)
883
15.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
17.3M
            RETURN_FAILURE;
885
886
249M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
249M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
249M
                   pattern[1], pattern[2]));
898
899
249M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
196k
                RETURN_FAILURE; /* cannot match */
901
902
249M
            state->ptr = ptr;
903
904
249M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
249M
            RETURN_ON_ERROR(ret);
906
249M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
249M
            ctx->count = ret;
908
249M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
249M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
157M
                RETURN_FAILURE;
917
918
91.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.65M
                ptr == state->end &&
920
18.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.7k
            {
922
                /* tail is empty.  we're finished */
923
18.7k
                state->ptr = ptr;
924
18.7k
                RETURN_SUCCESS;
925
18.7k
            }
926
927
91.5M
            LASTMARK_SAVE();
928
91.5M
            if (state->repeat)
929
64.0M
                MARK_PUSH(ctx->lastmark);
930
931
91.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
31.1M
                ctx->u.chr = pattern[pattern[0]+1];
935
31.1M
                for (;;) {
936
64.5M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
37.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
33.3M
                        ptr--;
939
33.3M
                        ctx->count--;
940
33.3M
                    }
941
31.1M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
26.7M
                        break;
943
4.38M
                    state->ptr = ptr;
944
4.38M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.38M
                            pattern+pattern[0]);
946
4.38M
                    if (ret) {
947
4.38M
                        if (state->repeat)
948
4.36M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.38M
                        RETURN_ON_ERROR(ret);
950
4.38M
                        RETURN_SUCCESS;
951
4.38M
                    }
952
181
                    if (state->repeat)
953
181
                        MARK_POP_KEEP(ctx->lastmark);
954
181
                    LASTMARK_RESTORE();
955
956
181
                    ptr--;
957
181
                    ctx->count--;
958
181
                }
959
26.7M
                if (state->repeat)
960
26.7M
                    MARK_POP_DISCARD(ctx->lastmark);
961
60.3M
            } else {
962
                /* general case */
963
61.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
60.8M
                    state->ptr = ptr;
965
60.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
60.8M
                            pattern+pattern[0]);
967
60.8M
                    if (ret) {
968
60.0M
                        if (state->repeat)
969
32.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
60.0M
                        RETURN_ON_ERROR(ret);
971
60.0M
                        RETURN_SUCCESS;
972
60.0M
                    }
973
789k
                    if (state->repeat)
974
589k
                        MARK_POP_KEEP(ctx->lastmark);
975
789k
                    LASTMARK_RESTORE();
976
977
789k
                    ptr--;
978
789k
                    ctx->count--;
979
789k
                }
980
297k
                if (state->repeat)
981
294k
                    MARK_POP_DISCARD(ctx->lastmark);
982
297k
            }
983
27.0M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
46.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
46.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
46.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
46.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
46.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
46.2M
            ctx->u.rep->count = -1;
1131
46.2M
            ctx->u.rep->pattern = pattern;
1132
46.2M
            ctx->u.rep->prev = state->repeat;
1133
46.2M
            ctx->u.rep->last_ptr = NULL;
1134
46.2M
            state->repeat = ctx->u.rep;
1135
1136
46.2M
            state->ptr = ptr;
1137
46.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
46.2M
            state->repeat = ctx->u.rep->prev;
1139
46.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
46.2M
            if (ret) {
1142
46.2M
                RETURN_ON_ERROR(ret);
1143
46.2M
                RETURN_SUCCESS;
1144
46.2M
            }
1145
1.04k
            RETURN_FAILURE;
1146
1147
87.8M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
87.8M
            ctx->u.rep = state->repeat;
1155
87.8M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
87.8M
            state->ptr = ptr;
1159
1160
87.8M
            ctx->count = ctx->u.rep->count+1;
1161
1162
87.8M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
87.8M
                   ptr, ctx->count));
1164
1165
87.8M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
87.8M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.61M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
85.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
85.2M
                ctx->u.rep->count = ctx->count;
1185
85.2M
                LASTMARK_SAVE();
1186
85.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
85.2M
                LAST_PTR_PUSH();
1189
85.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
85.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
85.2M
                        ctx->u.rep->pattern+3);
1192
85.2M
                LAST_PTR_POP();
1193
85.2M
                if (ret) {
1194
41.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
41.3M
                    RETURN_ON_ERROR(ret);
1196
41.3M
                    RETURN_SUCCESS;
1197
41.3M
                }
1198
43.9M
                MARK_POP(ctx->lastmark);
1199
43.9M
                LASTMARK_RESTORE();
1200
43.9M
                ctx->u.rep->count = ctx->count-1;
1201
43.9M
                state->ptr = ptr;
1202
43.9M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
46.5M
            state->repeat = ctx->u.rep->prev;
1207
46.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
46.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
46.5M
            RETURN_ON_SUCCESS(ret);
1211
295k
            state->ptr = ptr;
1212
295k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
32.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
32.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
32.3M
                   ptr, pattern[1]));
1565
32.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
32.3M
            state->ptr = ptr - pattern[1];
1568
32.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
32.3M
            RETURN_ON_FAILURE(ret);
1570
29.4M
            pattern += pattern[0];
1571
29.4M
            DISPATCH;
1572
1573
29.4M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
17.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
17.4M
                   ptr, pattern[1]));
1578
17.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
17.4M
                state->ptr = ptr - pattern[1];
1580
17.4M
                LASTMARK_SAVE();
1581
17.4M
                if (state->repeat)
1582
17.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
34.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
34.9M
                if (ret) {
1586
5.23k
                    if (state->repeat)
1587
5.23k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.23k
                    RETURN_ON_ERROR(ret);
1589
5.23k
                    RETURN_FAILURE;
1590
5.23k
                }
1591
17.4M
                if (state->repeat)
1592
17.4M
                    MARK_POP(ctx->lastmark);
1593
17.4M
                LASTMARK_RESTORE();
1594
17.4M
            }
1595
17.4M
            pattern += pattern[0];
1596
17.4M
            DISPATCH;
1597
1598
17.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
550M
exit:
1620
550M
    ctx_pos = ctx->last_ctx_pos;
1621
550M
    jump = ctx->jump;
1622
550M
    DATA_POP_DISCARD(ctx);
1623
550M
    if (ctx_pos == -1) {
1624
225M
        state->sigcount = sigcount;
1625
225M
        return ret;
1626
225M
    }
1627
325M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
325M
    switch (jump) {
1630
85.2M
        case JUMP_MAX_UNTIL_2:
1631
85.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
85.2M
            goto jump_max_until_2;
1633
46.5M
        case JUMP_MAX_UNTIL_3:
1634
46.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
46.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
32.7M
        case JUMP_BRANCH:
1643
32.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
32.7M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
46.2M
        case JUMP_REPEAT:
1658
46.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
46.2M
            goto jump_repeat;
1660
4.38M
        case JUMP_REPEAT_ONE_1:
1661
4.38M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.38M
            goto jump_repeat_one_1;
1663
60.8M
        case JUMP_REPEAT_ONE_2:
1664
60.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
60.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
32.3M
        case JUMP_ASSERT:
1673
32.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
32.3M
            goto jump_assert;
1675
17.4M
        case JUMP_ASSERT_NOT:
1676
17.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
17.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
325M
    }
1683
1684
0
    return ret; /* should never get here */
1685
325M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
94.7M
{
601
94.7M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
94.7M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
94.7M
    Py_ssize_t ret = 0;
604
94.7M
    int jump;
605
94.7M
    unsigned int sigcount = state->sigcount;
606
607
94.7M
    SRE(match_context)* ctx;
608
94.7M
    SRE(match_context)* nextctx;
609
94.7M
    INIT_TRACE(state);
610
611
94.7M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
94.7M
    DATA_ALLOC(SRE(match_context), ctx);
614
94.7M
    ctx->last_ctx_pos = -1;
615
94.7M
    ctx->jump = JUMP_NONE;
616
94.7M
    ctx->toplevel = toplevel;
617
94.7M
    ctx_pos = alloc_pos;
618
619
94.7M
#if USE_COMPUTED_GOTOS
620
94.7M
#include "sre_targets.h"
621
94.7M
#endif
622
623
542M
entrance:
624
625
542M
    ;  // Fashion statement.
626
542M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
542M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
23.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
366
            TRACE(("reject (got %tu chars, need %zu)\n",
633
366
                   end - ptr, (size_t) pattern[3]));
634
366
            RETURN_FAILURE;
635
366
        }
636
23.4M
        pattern += pattern[1] + 1;
637
23.4M
    }
638
639
542M
#if USE_COMPUTED_GOTOS
640
542M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
542M
    {
647
648
542M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
143M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
143M
                   ptr, pattern[0]));
653
143M
            {
654
143M
                int i = pattern[0];
655
143M
                if (i & 1)
656
30.5M
                    state->lastindex = i/2 + 1;
657
143M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
141M
                    int j = state->lastmark + 1;
663
143M
                    while (j < i)
664
2.28M
                        state->mark[j++] = NULL;
665
141M
                    state->lastmark = i;
666
141M
                }
667
143M
                state->mark[i] = ptr;
668
143M
            }
669
143M
            pattern++;
670
143M
            DISPATCH;
671
672
143M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
46.3M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
46.3M
                   ptr, *pattern));
677
46.3M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
16.3M
                RETURN_FAILURE;
679
29.9M
            pattern++;
680
29.9M
            ptr++;
681
29.9M
            DISPATCH;
682
683
29.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
75.0M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
75.0M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
75.0M
            if (ctx->toplevel &&
698
22.8M
                ((state->match_all && ptr != state->end) ||
699
22.8M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
75.0M
            state->ptr = ptr;
704
75.0M
            RETURN_SUCCESS;
705
706
567k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
567k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
567k
            if (!SRE(at)(state, ptr, *pattern))
711
562k
                RETURN_FAILURE;
712
4.30k
            pattern++;
713
4.30k
            DISPATCH;
714
715
4.30k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
112M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
112M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
112M
            if (ptr >= end ||
749
112M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.96M
                RETURN_FAILURE;
751
110M
            pattern += pattern[0];
752
110M
            ptr++;
753
110M
            DISPATCH;
754
755
110M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.13M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.13M
                   pattern, ptr, pattern[0]));
758
1.13M
            if (ptr >= end ||
759
1.13M
                sre_lower_ascii(*ptr) != *pattern)
760
20.2k
                RETURN_FAILURE;
761
1.11M
            pattern++;
762
1.11M
            ptr++;
763
1.11M
            DISPATCH;
764
765
1.11M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
39.7M
        TARGET(SRE_OP_JUMP):
845
39.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
39.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
39.7M
                   ptr, pattern[0]));
850
39.7M
            pattern += pattern[0];
851
39.7M
            DISPATCH;
852
853
62.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
62.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
62.0M
            LASTMARK_SAVE();
858
62.0M
            if (state->repeat)
859
58.5M
                MARK_PUSH(ctx->lastmark);
860
133M
            for (; pattern[0]; pattern += pattern[0]) {
861
110M
                if (pattern[1] == SRE_OP_LITERAL &&
862
52.9M
                    (ptr >= end ||
863
52.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
32.2M
                    continue;
865
78.1M
                if (pattern[1] == SRE_OP_IN &&
866
53.8M
                    (ptr >= end ||
867
53.8M
                     !SRE(charset)(state, pattern + 3,
868
53.8M
                                   (SRE_CODE) *ptr)))
869
37.9M
                    continue;
870
40.2M
                state->ptr = ptr;
871
40.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
40.2M
                if (ret) {
873
39.2M
                    if (state->repeat)
874
36.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
39.2M
                    RETURN_ON_ERROR(ret);
876
39.2M
                    RETURN_SUCCESS;
877
39.2M
                }
878
975k
                if (state->repeat)
879
7.06k
                    MARK_POP_KEEP(ctx->lastmark);
880
975k
                LASTMARK_RESTORE();
881
975k
            }
882
22.8M
            if (state->repeat)
883
22.0M
                MARK_POP_DISCARD(ctx->lastmark);
884
22.8M
            RETURN_FAILURE;
885
886
192M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
192M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
192M
                   pattern[1], pattern[2]));
898
899
192M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
25.6k
                RETURN_FAILURE; /* cannot match */
901
902
192M
            state->ptr = ptr;
903
904
192M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
192M
            RETURN_ON_ERROR(ret);
906
192M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
192M
            ctx->count = ret;
908
192M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
192M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
63.3M
                RETURN_FAILURE;
917
918
128M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
775k
                ptr == state->end &&
920
3.57k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.57k
            {
922
                /* tail is empty.  we're finished */
923
3.57k
                state->ptr = ptr;
924
3.57k
                RETURN_SUCCESS;
925
3.57k
            }
926
927
128M
            LASTMARK_SAVE();
928
128M
            if (state->repeat)
929
100M
                MARK_PUSH(ctx->lastmark);
930
931
128M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
48.5M
                ctx->u.chr = pattern[pattern[0]+1];
935
48.5M
                for (;;) {
936
113M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
73.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
65.2M
                        ptr--;
939
65.2M
                        ctx->count--;
940
65.2M
                    }
941
48.5M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
39.9M
                        break;
943
8.64M
                    state->ptr = ptr;
944
8.64M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
8.64M
                            pattern+pattern[0]);
946
8.64M
                    if (ret) {
947
8.64M
                        if (state->repeat)
948
8.63M
                            MARK_POP_DISCARD(ctx->lastmark);
949
8.64M
                        RETURN_ON_ERROR(ret);
950
8.64M
                        RETURN_SUCCESS;
951
8.64M
                    }
952
220
                    if (state->repeat)
953
220
                        MARK_POP_KEEP(ctx->lastmark);
954
220
                    LASTMARK_RESTORE();
955
956
220
                    ptr--;
957
220
                    ctx->count--;
958
220
                }
959
39.9M
                if (state->repeat)
960
39.9M
                    MARK_POP_DISCARD(ctx->lastmark);
961
80.3M
            } else {
962
                /* general case */
963
80.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
80.9M
                    state->ptr = ptr;
965
80.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
80.9M
                            pattern+pattern[0]);
967
80.9M
                    if (ret) {
968
80.3M
                        if (state->repeat)
969
51.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
80.3M
                        RETURN_ON_ERROR(ret);
971
80.3M
                        RETURN_SUCCESS;
972
80.3M
                    }
973
597k
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
597k
                    LASTMARK_RESTORE();
976
977
597k
                    ptr--;
978
597k
                    ctx->count--;
979
597k
                }
980
81.0k
                if (state->repeat)
981
80.3k
                    MARK_POP_DISCARD(ctx->lastmark);
982
81.0k
            }
983
40.0M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
67.0M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
67.0M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
67.0M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
67.0M
            ctx->u.rep = repeat_pool_malloc(state);
1127
67.0M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
67.0M
            ctx->u.rep->count = -1;
1131
67.0M
            ctx->u.rep->pattern = pattern;
1132
67.0M
            ctx->u.rep->prev = state->repeat;
1133
67.0M
            ctx->u.rep->last_ptr = NULL;
1134
67.0M
            state->repeat = ctx->u.rep;
1135
1136
67.0M
            state->ptr = ptr;
1137
67.0M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
67.0M
            state->repeat = ctx->u.rep->prev;
1139
67.0M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
67.0M
            if (ret) {
1142
67.0M
                RETURN_ON_ERROR(ret);
1143
67.0M
                RETURN_SUCCESS;
1144
67.0M
            }
1145
735
            RETURN_FAILURE;
1146
1147
126M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
126M
            ctx->u.rep = state->repeat;
1155
126M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
126M
            state->ptr = ptr;
1159
1160
126M
            ctx->count = ctx->u.rep->count+1;
1161
1162
126M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
126M
                   ptr, ctx->count));
1164
1165
126M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
126M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.88M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
121M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
121M
                ctx->u.rep->count = ctx->count;
1185
121M
                LASTMARK_SAVE();
1186
121M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
121M
                LAST_PTR_PUSH();
1189
121M
                ctx->u.rep->last_ptr = state->ptr;
1190
121M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
121M
                        ctx->u.rep->pattern+3);
1192
121M
                LAST_PTR_POP();
1193
121M
                if (ret) {
1194
59.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
59.4M
                    RETURN_ON_ERROR(ret);
1196
59.4M
                    RETURN_SUCCESS;
1197
59.4M
                }
1198
62.2M
                MARK_POP(ctx->lastmark);
1199
62.2M
                LASTMARK_RESTORE();
1200
62.2M
                ctx->u.rep->count = ctx->count-1;
1201
62.2M
                state->ptr = ptr;
1202
62.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
67.0M
            state->repeat = ctx->u.rep->prev;
1207
67.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
67.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
67.0M
            RETURN_ON_SUCCESS(ret);
1211
80.8k
            state->ptr = ptr;
1212
80.8k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
45.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
45.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
45.3M
                   ptr, pattern[1]));
1565
45.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
45.3M
            state->ptr = ptr - pattern[1];
1568
45.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
45.3M
            RETURN_ON_FAILURE(ret);
1570
44.7M
            pattern += pattern[0];
1571
44.7M
            DISPATCH;
1572
1573
44.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
16.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
16.8M
                   ptr, pattern[1]));
1578
16.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
16.8M
                state->ptr = ptr - pattern[1];
1580
16.8M
                LASTMARK_SAVE();
1581
16.8M
                if (state->repeat)
1582
16.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
33.6M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
33.6M
                if (ret) {
1586
6.81k
                    if (state->repeat)
1587
6.81k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.81k
                    RETURN_ON_ERROR(ret);
1589
6.81k
                    RETURN_FAILURE;
1590
6.81k
                }
1591
16.7M
                if (state->repeat)
1592
16.7M
                    MARK_POP(ctx->lastmark);
1593
16.7M
                LASTMARK_RESTORE();
1594
16.7M
            }
1595
16.7M
            pattern += pattern[0];
1596
16.7M
            DISPATCH;
1597
1598
16.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
542M
exit:
1620
542M
    ctx_pos = ctx->last_ctx_pos;
1621
542M
    jump = ctx->jump;
1622
542M
    DATA_POP_DISCARD(ctx);
1623
542M
    if (ctx_pos == -1) {
1624
94.7M
        state->sigcount = sigcount;
1625
94.7M
        return ret;
1626
94.7M
    }
1627
447M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
447M
    switch (jump) {
1630
121M
        case JUMP_MAX_UNTIL_2:
1631
121M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
121M
            goto jump_max_until_2;
1633
67.0M
        case JUMP_MAX_UNTIL_3:
1634
67.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
67.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
40.2M
        case JUMP_BRANCH:
1643
40.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
40.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
67.0M
        case JUMP_REPEAT:
1658
67.0M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
67.0M
            goto jump_repeat;
1660
8.64M
        case JUMP_REPEAT_ONE_1:
1661
8.64M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
8.64M
            goto jump_repeat_one_1;
1663
80.9M
        case JUMP_REPEAT_ONE_2:
1664
80.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
80.9M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
45.3M
        case JUMP_ASSERT:
1673
45.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
45.3M
            goto jump_assert;
1675
16.8M
        case JUMP_ASSERT_NOT:
1676
16.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
16.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
447M
    }
1683
1684
0
    return ret; /* should never get here */
1685
447M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
303M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
86.3M
{
1694
86.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
86.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
86.3M
    Py_ssize_t status = 0;
1697
86.3M
    Py_ssize_t prefix_len = 0;
1698
86.3M
    Py_ssize_t prefix_skip = 0;
1699
86.3M
    SRE_CODE* prefix = NULL;
1700
86.3M
    SRE_CODE* charset = NULL;
1701
86.3M
    SRE_CODE* overlap = NULL;
1702
86.3M
    int flags = 0;
1703
86.3M
    INIT_TRACE(state);
1704
1705
86.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
86.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
86.3M
        flags = pattern[2];
1713
1714
86.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.58M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.58M
                   end - ptr, (size_t) pattern[3]));
1717
1.58M
            return 0;
1718
1.58M
        }
1719
84.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.22M
            end -= pattern[3] - 1;
1723
7.22M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.22M
        }
1726
1727
84.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.22M
            prefix_len = pattern[5];
1731
7.22M
            prefix_skip = pattern[6];
1732
7.22M
            prefix = pattern + 7;
1733
7.22M
            overlap = prefix + prefix_len - 1;
1734
77.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
72.7M
            charset = pattern + 5;
1738
1739
84.7M
        pattern += 1 + pattern[1];
1740
84.7M
    }
1741
1742
84.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
84.7M
           prefix, prefix_len, prefix_skip));
1744
84.7M
    TRACE(("charset = %p\n", charset));
1745
1746
84.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.83M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.14M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.14M
#endif
1753
4.14M
        end = (SRE_CHAR *)state->end;
1754
4.14M
        state->must_advance = 0;
1755
7.42M
        while (ptr < end) {
1756
91.2M
            while (*ptr != c) {
1757
84.3M
                if (++ptr >= end)
1758
582k
                    return 0;
1759
84.3M
            }
1760
6.82M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.82M
            state->start = ptr;
1762
6.82M
            state->ptr = ptr + prefix_skip;
1763
6.82M
            if (flags & SRE_INFO_LITERAL)
1764
3.11k
                return 1; /* we got all of it */
1765
6.82M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.82M
            if (status != 0)
1767
6.24M
                return status;
1768
583k
            ++ptr;
1769
583k
            RESET_CAPTURE_GROUP();
1770
583k
        }
1771
10.5k
        return 0;
1772
4.14M
    }
1773
1774
77.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
384k
        Py_ssize_t i = 0;
1778
1779
384k
        end = (SRE_CHAR *)state->end;
1780
384k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.06M
        for (i = 0; i < prefix_len; i++)
1784
707k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
353k
#endif
1787
1.01M
        while (ptr < end) {
1788
1.01M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.68M
            while (*ptr++ != c) {
1790
5.67M
                if (ptr >= end)
1791
294
                    return 0;
1792
5.67M
            }
1793
1.01M
            if (ptr >= end)
1794
51
                return 0;
1795
1796
1.01M
            i = 1;
1797
1.01M
            state->must_advance = 0;
1798
1.01M
            do {
1799
1.01M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
950k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
950k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
950k
                    state->start = ptr - (prefix_len - 1);
1808
950k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
950k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
950k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
950k
                    if (status != 0)
1813
383k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
566k
                    if (++ptr >= end)
1816
27
                        return 0;
1817
566k
                    RESET_CAPTURE_GROUP();
1818
566k
                }
1819
631k
                i = overlap[i];
1820
631k
            } while (i != 0);
1821
1.01M
        }
1822
0
        return 0;
1823
384k
    }
1824
1825
77.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
72.7M
        end = (SRE_CHAR *)state->end;
1828
72.7M
        state->must_advance = 0;
1829
75.1M
        for (;;) {
1830
328M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
253M
                ptr++;
1832
75.1M
            if (ptr >= end)
1833
4.02M
                return 0;
1834
71.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
71.1M
            state->start = ptr;
1836
71.1M
            state->ptr = ptr;
1837
71.1M
            status = SRE(match)(state, pattern, 0);
1838
71.1M
            if (status != 0)
1839
68.7M
                break;
1840
2.40M
            ptr++;
1841
2.40M
            RESET_CAPTURE_GROUP();
1842
2.40M
        }
1843
72.7M
    } else {
1844
        /* general case */
1845
4.71M
        assert(ptr <= end);
1846
4.71M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.71M
        state->start = state->ptr = ptr;
1848
4.71M
        status = SRE(match)(state, pattern, 1);
1849
4.71M
        state->must_advance = 0;
1850
4.71M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
304M
        while (status == 0 && ptr < end) {
1858
300M
            ptr++;
1859
300M
            RESET_CAPTURE_GROUP();
1860
300M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
300M
            state->start = state->ptr = ptr;
1862
300M
            status = SRE(match)(state, pattern, 0);
1863
300M
        }
1864
4.71M
    }
1865
1866
73.4M
    return status;
1867
77.5M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
32.6M
{
1694
32.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
32.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
32.6M
    Py_ssize_t status = 0;
1697
32.6M
    Py_ssize_t prefix_len = 0;
1698
32.6M
    Py_ssize_t prefix_skip = 0;
1699
32.6M
    SRE_CODE* prefix = NULL;
1700
32.6M
    SRE_CODE* charset = NULL;
1701
32.6M
    SRE_CODE* overlap = NULL;
1702
32.6M
    int flags = 0;
1703
32.6M
    INIT_TRACE(state);
1704
1705
32.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
32.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
32.6M
        flags = pattern[2];
1713
1714
32.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.45M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.45M
                   end - ptr, (size_t) pattern[3]));
1717
1.45M
            return 0;
1718
1.45M
        }
1719
31.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.92M
            end -= pattern[3] - 1;
1723
1.92M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.92M
        }
1726
1727
31.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.92M
            prefix_len = pattern[5];
1731
1.92M
            prefix_skip = pattern[6];
1732
1.92M
            prefix = pattern + 7;
1733
1.92M
            overlap = prefix + prefix_len - 1;
1734
29.2M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
25.7M
            charset = pattern + 5;
1738
1739
31.1M
        pattern += 1 + pattern[1];
1740
31.1M
    }
1741
1742
31.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
31.1M
           prefix, prefix_len, prefix_skip));
1744
31.1M
    TRACE(("charset = %p\n", charset));
1745
1746
31.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.91M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.91M
#if SIZEOF_SRE_CHAR < 4
1750
1.91M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.91M
#endif
1753
1.91M
        end = (SRE_CHAR *)state->end;
1754
1.91M
        state->must_advance = 0;
1755
2.05M
        while (ptr < end) {
1756
25.9M
            while (*ptr != c) {
1757
24.4M
                if (++ptr >= end)
1758
514k
                    return 0;
1759
24.4M
            }
1760
1.52M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.52M
            state->start = ptr;
1762
1.52M
            state->ptr = ptr + prefix_skip;
1763
1.52M
            if (flags & SRE_INFO_LITERAL)
1764
252
                return 1; /* we got all of it */
1765
1.52M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.52M
            if (status != 0)
1767
1.38M
                return status;
1768
142k
            ++ptr;
1769
142k
            RESET_CAPTURE_GROUP();
1770
142k
        }
1771
8.30k
        return 0;
1772
1.91M
    }
1773
1774
29.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
16.1k
        Py_ssize_t i = 0;
1778
1779
16.1k
        end = (SRE_CHAR *)state->end;
1780
16.1k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
16.1k
#if SIZEOF_SRE_CHAR < 4
1783
48.5k
        for (i = 0; i < prefix_len; i++)
1784
32.3k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
16.1k
#endif
1787
166k
        while (ptr < end) {
1788
166k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.41M
            while (*ptr++ != c) {
1790
1.24M
                if (ptr >= end)
1791
55
                    return 0;
1792
1.24M
            }
1793
165k
            if (ptr >= end)
1794
26
                return 0;
1795
1796
165k
            i = 1;
1797
165k
            state->must_advance = 0;
1798
166k
            do {
1799
166k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
128k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
128k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
128k
                    state->start = ptr - (prefix_len - 1);
1808
128k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
128k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
128k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
128k
                    if (status != 0)
1813
16.0k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
112k
                    if (++ptr >= end)
1816
12
                        return 0;
1817
112k
                    RESET_CAPTURE_GROUP();
1818
112k
                }
1819
150k
                i = overlap[i];
1820
150k
            } while (i != 0);
1821
165k
        }
1822
0
        return 0;
1823
16.1k
    }
1824
1825
29.2M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
25.7M
        end = (SRE_CHAR *)state->end;
1828
25.7M
        state->must_advance = 0;
1829
27.1M
        for (;;) {
1830
74.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
47.1M
                ptr++;
1832
27.1M
            if (ptr >= end)
1833
2.85M
                return 0;
1834
24.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
24.3M
            state->start = ptr;
1836
24.3M
            state->ptr = ptr;
1837
24.3M
            status = SRE(match)(state, pattern, 0);
1838
24.3M
            if (status != 0)
1839
22.8M
                break;
1840
1.42M
            ptr++;
1841
1.42M
            RESET_CAPTURE_GROUP();
1842
1.42M
        }
1843
25.7M
    } else {
1844
        /* general case */
1845
3.47M
        assert(ptr <= end);
1846
3.47M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.47M
        state->start = state->ptr = ptr;
1848
3.47M
        status = SRE(match)(state, pattern, 1);
1849
3.47M
        state->must_advance = 0;
1850
3.47M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
84.8M
        while (status == 0 && ptr < end) {
1858
81.3M
            ptr++;
1859
81.3M
            RESET_CAPTURE_GROUP();
1860
81.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
81.3M
            state->start = state->ptr = ptr;
1862
81.3M
            status = SRE(match)(state, pattern, 0);
1863
81.3M
        }
1864
3.47M
    }
1865
1866
26.3M
    return status;
1867
29.2M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
46.1M
{
1694
46.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
46.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
46.1M
    Py_ssize_t status = 0;
1697
46.1M
    Py_ssize_t prefix_len = 0;
1698
46.1M
    Py_ssize_t prefix_skip = 0;
1699
46.1M
    SRE_CODE* prefix = NULL;
1700
46.1M
    SRE_CODE* charset = NULL;
1701
46.1M
    SRE_CODE* overlap = NULL;
1702
46.1M
    int flags = 0;
1703
46.1M
    INIT_TRACE(state);
1704
1705
46.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
46.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
46.1M
        flags = pattern[2];
1713
1714
46.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
116k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
116k
                   end - ptr, (size_t) pattern[3]));
1717
116k
            return 0;
1718
116k
        }
1719
46.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.56M
            end -= pattern[3] - 1;
1723
2.56M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.56M
        }
1726
1727
46.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.57M
            prefix_len = pattern[5];
1731
2.57M
            prefix_skip = pattern[6];
1732
2.57M
            prefix = pattern + 7;
1733
2.57M
            overlap = prefix + prefix_len - 1;
1734
43.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
42.4M
            charset = pattern + 5;
1738
1739
46.0M
        pattern += 1 + pattern[1];
1740
46.0M
    }
1741
1742
46.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
46.0M
           prefix, prefix_len, prefix_skip));
1744
46.0M
    TRACE(("charset = %p\n", charset));
1745
1746
46.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.23M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.23M
#if SIZEOF_SRE_CHAR < 4
1750
2.23M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.23M
#endif
1753
2.23M
        end = (SRE_CHAR *)state->end;
1754
2.23M
        state->must_advance = 0;
1755
2.40M
        while (ptr < end) {
1756
43.5M
            while (*ptr != c) {
1757
41.2M
                if (++ptr >= end)
1758
64.4k
                    return 0;
1759
41.2M
            }
1760
2.33M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.33M
            state->start = ptr;
1762
2.33M
            state->ptr = ptr + prefix_skip;
1763
2.33M
            if (flags & SRE_INFO_LITERAL)
1764
758
                return 1; /* we got all of it */
1765
2.33M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.33M
            if (status != 0)
1767
2.16M
                return status;
1768
171k
            ++ptr;
1769
171k
            RESET_CAPTURE_GROUP();
1770
171k
        }
1771
1.26k
        return 0;
1772
2.23M
    }
1773
1774
43.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
337k
        Py_ssize_t i = 0;
1778
1779
337k
        end = (SRE_CHAR *)state->end;
1780
337k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
337k
#if SIZEOF_SRE_CHAR < 4
1783
1.01M
        for (i = 0; i < prefix_len; i++)
1784
674k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
337k
#endif
1787
602k
        while (ptr < end) {
1788
602k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.55M
            while (*ptr++ != c) {
1790
1.94M
                if (ptr >= end)
1791
116
                    return 0;
1792
1.94M
            }
1793
602k
            if (ptr >= end)
1794
10
                return 0;
1795
1796
602k
            i = 1;
1797
602k
            state->must_advance = 0;
1798
602k
            do {
1799
602k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
584k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
584k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
584k
                    state->start = ptr - (prefix_len - 1);
1808
584k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
584k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
584k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
584k
                    if (status != 0)
1813
337k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
247k
                    if (++ptr >= end)
1816
10
                        return 0;
1817
247k
                    RESET_CAPTURE_GROUP();
1818
247k
                }
1819
265k
                i = overlap[i];
1820
265k
            } while (i != 0);
1821
602k
        }
1822
0
        return 0;
1823
337k
    }
1824
1825
43.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
42.4M
        end = (SRE_CHAR *)state->end;
1828
42.4M
        state->must_advance = 0;
1829
42.9M
        for (;;) {
1830
184M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
141M
                ptr++;
1832
42.9M
            if (ptr >= end)
1833
1.11M
                return 0;
1834
41.8M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
41.8M
            state->start = ptr;
1836
41.8M
            state->ptr = ptr;
1837
41.8M
            status = SRE(match)(state, pattern, 0);
1838
41.8M
            if (status != 0)
1839
41.3M
                break;
1840
497k
            ptr++;
1841
497k
            RESET_CAPTURE_GROUP();
1842
497k
        }
1843
42.4M
    } else {
1844
        /* general case */
1845
1.00M
        assert(ptr <= end);
1846
1.00M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.00M
        state->start = state->ptr = ptr;
1848
1.00M
        status = SRE(match)(state, pattern, 1);
1849
1.00M
        state->must_advance = 0;
1850
1.00M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
156M
        while (status == 0 && ptr < end) {
1858
155M
            ptr++;
1859
155M
            RESET_CAPTURE_GROUP();
1860
155M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
155M
            state->start = state->ptr = ptr;
1862
155M
            status = SRE(match)(state, pattern, 0);
1863
155M
        }
1864
1.00M
    }
1865
1866
42.3M
    return status;
1867
43.4M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.52M
{
1694
7.52M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.52M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.52M
    Py_ssize_t status = 0;
1697
7.52M
    Py_ssize_t prefix_len = 0;
1698
7.52M
    Py_ssize_t prefix_skip = 0;
1699
7.52M
    SRE_CODE* prefix = NULL;
1700
7.52M
    SRE_CODE* charset = NULL;
1701
7.52M
    SRE_CODE* overlap = NULL;
1702
7.52M
    int flags = 0;
1703
7.52M
    INIT_TRACE(state);
1704
1705
7.52M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.52M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.52M
        flags = pattern[2];
1713
1714
7.52M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.99k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.99k
                   end - ptr, (size_t) pattern[3]));
1717
6.99k
            return 0;
1718
6.99k
        }
1719
7.51M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.72M
            end -= pattern[3] - 1;
1723
2.72M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.72M
        }
1726
1727
7.51M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.72M
            prefix_len = pattern[5];
1731
2.72M
            prefix_skip = pattern[6];
1732
2.72M
            prefix = pattern + 7;
1733
2.72M
            overlap = prefix + prefix_len - 1;
1734
4.79M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.56M
            charset = pattern + 5;
1738
1739
7.51M
        pattern += 1 + pattern[1];
1740
7.51M
    }
1741
1742
7.51M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.51M
           prefix, prefix_len, prefix_skip));
1744
7.51M
    TRACE(("charset = %p\n", charset));
1745
1746
7.51M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.69M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.69M
        end = (SRE_CHAR *)state->end;
1754
2.69M
        state->must_advance = 0;
1755
2.96M
        while (ptr < end) {
1756
21.6M
            while (*ptr != c) {
1757
18.7M
                if (++ptr >= end)
1758
4.16k
                    return 0;
1759
18.7M
            }
1760
2.96M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.96M
            state->start = ptr;
1762
2.96M
            state->ptr = ptr + prefix_skip;
1763
2.96M
            if (flags & SRE_INFO_LITERAL)
1764
2.10k
                return 1; /* we got all of it */
1765
2.95M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.95M
            if (status != 0)
1767
2.68M
                return status;
1768
269k
            ++ptr;
1769
269k
            RESET_CAPTURE_GROUP();
1770
269k
        }
1771
946
        return 0;
1772
2.69M
    }
1773
1774
4.82M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
30.8k
        Py_ssize_t i = 0;
1778
1779
30.8k
        end = (SRE_CHAR *)state->end;
1780
30.8k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
245k
        while (ptr < end) {
1788
245k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.72M
            while (*ptr++ != c) {
1790
2.47M
                if (ptr >= end)
1791
123
                    return 0;
1792
2.47M
            }
1793
245k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
245k
            i = 1;
1797
245k
            state->must_advance = 0;
1798
246k
            do {
1799
246k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
237k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
237k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
237k
                    state->start = ptr - (prefix_len - 1);
1808
237k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
237k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
237k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
237k
                    if (status != 0)
1813
30.7k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
206k
                    if (++ptr >= end)
1816
5
                        return 0;
1817
206k
                    RESET_CAPTURE_GROUP();
1818
206k
                }
1819
215k
                i = overlap[i];
1820
215k
            } while (i != 0);
1821
245k
        }
1822
0
        return 0;
1823
30.8k
    }
1824
1825
4.79M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.56M
        end = (SRE_CHAR *)state->end;
1828
4.56M
        state->must_advance = 0;
1829
5.04M
        for (;;) {
1830
70.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
65.3M
                ptr++;
1832
5.04M
            if (ptr >= end)
1833
54.1k
                return 0;
1834
4.98M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.98M
            state->start = ptr;
1836
4.98M
            state->ptr = ptr;
1837
4.98M
            status = SRE(match)(state, pattern, 0);
1838
4.98M
            if (status != 0)
1839
4.50M
                break;
1840
482k
            ptr++;
1841
482k
            RESET_CAPTURE_GROUP();
1842
482k
        }
1843
4.56M
    } else {
1844
        /* general case */
1845
231k
        assert(ptr <= end);
1846
231k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
231k
        state->start = state->ptr = ptr;
1848
231k
        status = SRE(match)(state, pattern, 1);
1849
231k
        state->must_advance = 0;
1850
231k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
63.1M
        while (status == 0 && ptr < end) {
1858
62.9M
            ptr++;
1859
62.9M
            RESET_CAPTURE_GROUP();
1860
62.9M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
62.9M
            state->start = state->ptr = ptr;
1862
62.9M
            status = SRE(match)(state, pattern, 0);
1863
62.9M
        }
1864
231k
    }
1865
1866
4.73M
    return status;
1867
4.79M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/