Coverage Report

Created: 2025-10-12 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.2M
{
18
    /* check if pointer is at given position */
19
20
14.2M
    Py_ssize_t thisp, thatp;
21
22
14.2M
    switch (at) {
23
24
7.01M
    case SRE_AT_BEGINNING:
25
7.01M
    case SRE_AT_BEGINNING_STRING:
26
7.01M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.80M
    case SRE_AT_END:
33
4.80M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
29.9k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.80M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.41M
    case SRE_AT_END_STRING:
42
2.41M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.2M
    }
87
88
0
    return 0;
89
14.2M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.8M
{
18
    /* check if pointer is at given position */
19
20
12.8M
    Py_ssize_t thisp, thatp;
21
22
12.8M
    switch (at) {
23
24
6.98M
    case SRE_AT_BEGINNING:
25
6.98M
    case SRE_AT_BEGINNING_STRING:
26
6.98M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.42M
    case SRE_AT_END:
33
4.42M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
29.3k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.42M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.42M
    case SRE_AT_END_STRING:
42
1.42M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.8M
    }
87
88
0
    return 0;
89
12.8M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
830k
{
18
    /* check if pointer is at given position */
19
20
830k
    Py_ssize_t thisp, thatp;
21
22
830k
    switch (at) {
23
24
27.9k
    case SRE_AT_BEGINNING:
25
27.9k
    case SRE_AT_BEGINNING_STRING:
26
27.9k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
295k
    case SRE_AT_END:
33
295k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
60
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
295k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
506k
    case SRE_AT_END_STRING:
42
506k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
830k
    }
87
88
0
    return 0;
89
830k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
567k
{
18
    /* check if pointer is at given position */
19
20
567k
    Py_ssize_t thisp, thatp;
21
22
567k
    switch (at) {
23
24
3.92k
    case SRE_AT_BEGINNING:
25
3.92k
    case SRE_AT_BEGINNING_STRING:
26
3.92k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.8k
    case SRE_AT_END:
33
80.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
503
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
482k
    case SRE_AT_END_STRING:
42
482k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
567k
    }
87
88
0
    return 0;
89
567k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.77G
{
94
    /* check if character is a member of the given set */
95
96
1.77G
    int ok = 1;
97
98
3.84G
    for (;;) {
99
3.84G
        switch (*set++) {
100
101
1.11G
        case SRE_OP_FAILURE:
102
1.11G
            return !ok;
103
104
1.01G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.01G
            if (ch == set[0])
107
4.41M
                return ok;
108
1.01G
            set++;
109
1.01G
            break;
110
111
12.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.1M
            if (sre_category(set[0], (int) ch))
114
8.16M
                return ok;
115
4.03M
            set++;
116
4.03M
            break;
117
118
977M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
977M
            if (ch < 256 &&
121
857M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
441M
                return ok;
123
535M
            set += 256/SRE_CODE_BITS;
124
535M
            break;
125
126
337M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
337M
            if (set[0] <= ch && ch <= set[1])
129
209M
                return ok;
130
128M
            set += 2;
131
128M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
386M
        case SRE_OP_NEGATE:
148
386M
            ok = !ok;
149
386M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.84G
        }
175
3.84G
    }
176
1.77G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
321M
{
94
    /* check if character is a member of the given set */
95
96
321M
    int ok = 1;
97
98
644M
    for (;;) {
99
644M
        switch (*set++) {
100
101
168M
        case SRE_OP_FAILURE:
102
168M
            return !ok;
103
104
186M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
186M
            if (ch == set[0])
107
2.07M
                return ok;
108
184M
            set++;
109
184M
            break;
110
111
11.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.5M
            if (sre_category(set[0], (int) ch))
114
7.52M
                return ok;
115
4.02M
            set++;
116
4.02M
            break;
117
118
79.8M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
79.8M
            if (ch < 256 &&
121
79.8M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
40.9M
                return ok;
123
38.8M
            set += 256/SRE_CODE_BITS;
124
38.8M
            break;
125
126
164M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
164M
            if (set[0] <= ch && ch <= set[1])
129
102M
                return ok;
130
62.3M
            set += 2;
131
62.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
33.2M
        case SRE_OP_NEGATE:
148
33.2M
            ok = !ok;
149
33.2M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
644M
        }
175
644M
    }
176
321M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
751M
{
94
    /* check if character is a member of the given set */
95
96
751M
    int ok = 1;
97
98
1.74G
    for (;;) {
99
1.74G
        switch (*set++) {
100
101
526M
        case SRE_OP_FAILURE:
102
526M
            return !ok;
103
104
558M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
558M
            if (ch == set[0])
107
1.42M
                return ok;
108
556M
            set++;
109
556M
            break;
110
111
208k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
208k
            if (sre_category(set[0], (int) ch))
114
200k
                return ok;
115
7.77k
            set++;
116
7.77k
            break;
117
118
348M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
348M
            if (ch < 256 &&
121
280M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
130M
                return ok;
123
218M
            set += 256/SRE_CODE_BITS;
124
218M
            break;
125
126
152M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
152M
            if (set[0] <= ch && ch <= set[1])
129
94.0M
                return ok;
130
58.3M
            set += 2;
131
58.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
160M
        case SRE_OP_NEGATE:
148
160M
            ok = !ok;
149
160M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.74G
        }
175
1.74G
    }
176
751M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
703M
{
94
    /* check if character is a member of the given set */
95
96
703M
    int ok = 1;
97
98
1.45G
    for (;;) {
99
1.45G
        switch (*set++) {
100
101
418M
        case SRE_OP_FAILURE:
102
418M
            return !ok;
103
104
273M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
273M
            if (ch == set[0])
107
909k
                return ok;
108
272M
            set++;
109
272M
            break;
110
111
437k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
437k
            if (sre_category(set[0], (int) ch))
114
436k
                return ok;
115
825
            set++;
116
825
            break;
117
118
548M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
548M
            if (ch < 256 &&
121
496M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
270M
                return ok;
123
277M
            set += 256/SRE_CODE_BITS;
124
277M
            break;
125
126
20.8M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
20.8M
            if (set[0] <= ch && ch <= set[1])
129
12.8M
                return ok;
130
7.91M
            set += 2;
131
7.91M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
192M
        case SRE_OP_NEGATE:
148
192M
            ok = !ok;
149
192M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.45G
        }
175
1.45G
    }
176
703M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
667M
{
195
667M
    SRE_CODE chr;
196
667M
    SRE_CHAR c;
197
667M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
667M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
667M
    Py_ssize_t i;
200
667M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
667M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
13.5M
        end = ptr + maxcount;
205
206
667M
    switch (pattern[0]) {
207
208
599M
    case SRE_OP_IN:
209
        /* repeated set */
210
599M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
982M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
383M
            ptr++;
213
599M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
61.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
61.3M
        chr = pattern[1];
232
61.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
61.3M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
58.5M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
58.5M
        else
238
58.5M
#endif
239
65.6M
        while (ptr < end && *ptr == c)
240
4.35M
            ptr++;
241
61.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
6.16M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
6.16M
        chr = pattern[1];
270
6.16M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
6.16M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
2.44M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.44M
        else
276
2.44M
#endif
277
37.3M
        while (ptr < end && *ptr != c)
278
31.1M
            ptr++;
279
6.16M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
667M
    }
319
320
667M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
667M
           ptr - (SRE_CHAR*) state->ptr));
322
667M
    return ptr - (SRE_CHAR*) state->ptr;
323
667M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
143M
{
195
143M
    SRE_CODE chr;
196
143M
    SRE_CHAR c;
197
143M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
143M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
143M
    Py_ssize_t i;
200
143M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
143M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
2.61M
        end = ptr + maxcount;
205
206
143M
    switch (pattern[0]) {
207
208
92.3M
    case SRE_OP_IN:
209
        /* repeated set */
210
92.3M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
205M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
112M
            ptr++;
213
92.3M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
51.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
51.3M
        chr = pattern[1];
232
51.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
51.3M
        c = (SRE_CHAR) chr;
234
51.3M
#if SIZEOF_SRE_CHAR < 4
235
51.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
51.3M
        else
238
51.3M
#endif
239
53.2M
        while (ptr < end && *ptr == c)
240
1.87M
            ptr++;
241
51.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
218k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
218k
        chr = pattern[1];
270
218k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
218k
        c = (SRE_CHAR) chr;
272
218k
#if SIZEOF_SRE_CHAR < 4
273
218k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
218k
        else
276
218k
#endif
277
6.96M
        while (ptr < end && *ptr != c)
278
6.74M
            ptr++;
279
218k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
143M
    }
319
320
143M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
143M
           ptr - (SRE_CHAR*) state->ptr));
322
143M
    return ptr - (SRE_CHAR*) state->ptr;
323
143M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
270M
{
195
270M
    SRE_CODE chr;
196
270M
    SRE_CHAR c;
197
270M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
270M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
270M
    Py_ssize_t i;
200
270M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
270M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.23M
        end = ptr + maxcount;
205
206
270M
    switch (pattern[0]) {
207
208
260M
    case SRE_OP_IN:
209
        /* repeated set */
210
260M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
385M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
124M
            ptr++;
213
260M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
7.11M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
7.11M
        chr = pattern[1];
232
7.11M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
7.11M
        c = (SRE_CHAR) chr;
234
7.11M
#if SIZEOF_SRE_CHAR < 4
235
7.11M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
7.11M
        else
238
7.11M
#endif
239
9.36M
        while (ptr < end && *ptr == c)
240
2.24M
            ptr++;
241
7.11M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.22M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.22M
        chr = pattern[1];
270
2.22M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.22M
        c = (SRE_CHAR) chr;
272
2.22M
#if SIZEOF_SRE_CHAR < 4
273
2.22M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.22M
        else
276
2.22M
#endif
277
9.96M
        while (ptr < end && *ptr != c)
278
7.73M
            ptr++;
279
2.22M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
270M
    }
319
320
270M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
270M
           ptr - (SRE_CHAR*) state->ptr));
322
270M
    return ptr - (SRE_CHAR*) state->ptr;
323
270M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
253M
{
195
253M
    SRE_CODE chr;
196
253M
    SRE_CHAR c;
197
253M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
253M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
253M
    Py_ssize_t i;
200
253M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
253M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.66M
        end = ptr + maxcount;
205
206
253M
    switch (pattern[0]) {
207
208
246M
    case SRE_OP_IN:
209
        /* repeated set */
210
246M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
392M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
146M
            ptr++;
213
246M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.80M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.80M
        chr = pattern[1];
232
2.80M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.80M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.03M
        while (ptr < end && *ptr == c)
240
228k
            ptr++;
241
2.80M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.72M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.72M
        chr = pattern[1];
270
3.72M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.72M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
20.4M
        while (ptr < end && *ptr != c)
278
16.7M
            ptr++;
279
3.72M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
253M
    }
319
320
253M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
253M
           ptr - (SRE_CHAR*) state->ptr));
322
253M
    return ptr - (SRE_CHAR*) state->ptr;
323
253M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
838M
    do { \
355
838M
        ctx->lastmark = state->lastmark; \
356
838M
        ctx->lastindex = state->lastindex; \
357
838M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
264M
    do { \
360
264M
        state->lastmark = ctx->lastmark; \
361
264M
        state->lastindex = ctx->lastindex; \
362
264M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
293M
    do { \
366
293M
        TRACE(("push last_ptr: %zd", \
367
293M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
293M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
293M
    } while (0)
370
#define LAST_PTR_POP()  \
371
293M
    do { \
372
293M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
293M
        TRACE(("pop last_ptr: %zd", \
374
293M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
293M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
607M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
1.05G
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.56G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
162M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
108M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.65G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.65G
do { \
390
1.65G
    alloc_pos = state->data_stack_base; \
391
1.65G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.65G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.65G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
175M
        int j = data_stack_grow(state, sizeof(type)); \
395
175M
        if (j < 0) return j; \
396
175M
        if (ctx_pos != -1) \
397
175M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
175M
    } \
399
1.65G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.65G
    state->data_stack_base += sizeof(type); \
401
1.65G
} while (0)
402
403
1.83G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.83G
do { \
405
1.83G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.83G
    ptr = (type*)(state->data_stack+pos); \
407
1.83G
} while (0)
408
409
721M
#define DATA_STACK_PUSH(state, data, size) \
410
721M
do { \
411
721M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
721M
           data, state->data_stack_base, size)); \
413
721M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
64.1k
        int j = data_stack_grow(state, size); \
415
64.1k
        if (j < 0) return j; \
416
64.1k
        if (ctx_pos != -1) \
417
64.1k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
64.1k
    } \
419
721M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
721M
    state->data_stack_base += size; \
421
721M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
438M
#define DATA_STACK_POP(state, data, size, discard) \
427
438M
do { \
428
438M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
438M
           data, state->data_stack_base-size, size)); \
430
438M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
438M
    if (discard) \
432
438M
        state->data_stack_base -= size; \
433
438M
} while (0)
434
435
1.94G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.94G
do { \
437
1.94G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.94G
           state->data_stack_base-size, size)); \
439
1.94G
    state->data_stack_base -= size; \
440
1.94G
} while(0)
441
442
#define DATA_PUSH(x) \
443
293M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
293M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.65G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.65G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.83G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
709M
    do if (lastmark >= 0) { \
473
428M
        MARK_TRACE("push", (lastmark)); \
474
428M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
428M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
709M
    } while (0)
477
#define MARK_POP(lastmark) \
478
195M
    do if (lastmark >= 0) { \
479
142M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
142M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
142M
        MARK_TRACE("pop", (lastmark)); \
482
195M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.72M
    do if (lastmark >= 0) { \
485
1.72M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.72M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.72M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.72M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
514M
    do if (lastmark >= 0) { \
491
285M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
285M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
285M
        MARK_TRACE("pop discard", (lastmark)); \
494
514M
    } while (0)
495
496
494M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
293M
#define JUMP_MAX_UNTIL_2     2
499
162M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
161M
#define JUMP_REPEAT          7
504
15.2M
#define JUMP_REPEAT_ONE_1    8
505
207M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
171M
#define JUMP_BRANCH          11
508
108M
#define JUMP_ASSERT          12
509
44.4M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.16G
    ctx->pattern = pattern; \
516
1.16G
    ctx->ptr = ptr; \
517
1.16G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.16G
    nextctx->pattern = nextpattern; \
519
1.16G
    nextctx->toplevel = toplevel_; \
520
1.16G
    nextctx->jump = jumpvalue; \
521
1.16G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.16G
    pattern = nextpattern; \
523
1.16G
    ctx_pos = alloc_pos; \
524
1.16G
    ctx = nextctx; \
525
1.16G
    goto entrance; \
526
1.16G
    jumplabel: \
527
1.16G
    pattern = ctx->pattern; \
528
1.16G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
1.01G
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
152M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.88G
    do {                                                           \
553
2.88G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.88G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.88G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.99G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.88G
        do {                               \
588
2.88G
            MAYBE_CHECK_SIGNALS;           \
589
2.88G
            goto *sre_targets[*pattern++]; \
590
2.88G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
494M
{
601
494M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
494M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
494M
    Py_ssize_t ret = 0;
604
494M
    int jump;
605
494M
    unsigned int sigcount = state->sigcount;
606
607
494M
    SRE(match_context)* ctx;
608
494M
    SRE(match_context)* nextctx;
609
494M
    INIT_TRACE(state);
610
611
494M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
494M
    DATA_ALLOC(SRE(match_context), ctx);
614
494M
    ctx->last_ctx_pos = -1;
615
494M
    ctx->jump = JUMP_NONE;
616
494M
    ctx->toplevel = toplevel;
617
494M
    ctx_pos = alloc_pos;
618
619
494M
#if USE_COMPUTED_GOTOS
620
494M
#include "sre_targets.h"
621
494M
#endif
622
623
1.65G
entrance:
624
625
1.65G
    ;  // Fashion statement.
626
1.65G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.65G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
92.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.72M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.72M
                   end - ptr, (size_t) pattern[3]));
634
5.72M
            RETURN_FAILURE;
635
5.72M
        }
636
86.9M
        pattern += pattern[1] + 1;
637
86.9M
    }
638
639
1.65G
#if USE_COMPUTED_GOTOS
640
1.65G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.65G
    {
647
648
1.65G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
538M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
538M
                   ptr, pattern[0]));
653
538M
            {
654
538M
                int i = pattern[0];
655
538M
                if (i & 1)
656
87.0M
                    state->lastindex = i/2 + 1;
657
538M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
531M
                    int j = state->lastmark + 1;
663
538M
                    while (j < i)
664
7.46M
                        state->mark[j++] = NULL;
665
531M
                    state->lastmark = i;
666
531M
                }
667
538M
                state->mark[i] = ptr;
668
538M
            }
669
538M
            pattern++;
670
538M
            DISPATCH;
671
672
538M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
157M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
157M
                   ptr, *pattern));
677
157M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
56.6M
                RETURN_FAILURE;
679
100M
            pattern++;
680
100M
            ptr++;
681
100M
            DISPATCH;
682
683
100M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
261M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
261M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
261M
            if (ctx->toplevel &&
698
76.1M
                ((state->match_all && ptr != state->end) ||
699
76.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
261M
            state->ptr = ptr;
704
261M
            RETURN_SUCCESS;
705
706
14.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.2M
            if (!SRE(at)(state, ptr, *pattern))
711
3.38M
                RETURN_FAILURE;
712
10.8M
            pattern++;
713
10.8M
            DISPATCH;
714
715
10.8M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
330M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
330M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
330M
            if (ptr >= end ||
749
330M
                !SRE(charset)(state, pattern + 1, *ptr))
750
8.13M
                RETURN_FAILURE;
751
321M
            pattern += pattern[0];
752
321M
            ptr++;
753
321M
            DISPATCH;
754
755
321M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.86M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.86M
                   pattern, ptr, pattern[0]));
758
5.86M
            if (ptr >= end ||
759
5.86M
                sre_lower_ascii(*ptr) != *pattern)
760
206k
                RETURN_FAILURE;
761
5.65M
            pattern++;
762
5.65M
            ptr++;
763
5.65M
            DISPATCH;
764
765
5.65M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
107M
        TARGET(SRE_OP_JUMP):
845
107M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
107M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
107M
                   ptr, pattern[0]));
850
107M
            pattern += pattern[0];
851
107M
            DISPATCH;
852
853
183M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
183M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
183M
            LASTMARK_SAVE();
858
183M
            if (state->repeat)
859
138M
                MARK_PUSH(ctx->lastmark);
860
436M
            for (; pattern[0]; pattern += pattern[0]) {
861
358M
                if (pattern[1] == SRE_OP_LITERAL &&
862
166M
                    (ptr >= end ||
863
166M
                     (SRE_CODE) *ptr != pattern[2]))
864
90.6M
                    continue;
865
267M
                if (pattern[1] == SRE_OP_IN &&
866
132M
                    (ptr >= end ||
867
132M
                     !SRE(charset)(state, pattern + 3,
868
132M
                                   (SRE_CODE) *ptr)))
869
96.6M
                    continue;
870
171M
                state->ptr = ptr;
871
171M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
171M
                if (ret) {
873
104M
                    if (state->repeat)
874
86.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
104M
                    RETURN_ON_ERROR(ret);
876
104M
                    RETURN_SUCCESS;
877
104M
                }
878
66.4M
                if (state->repeat)
879
17.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
66.4M
                LASTMARK_RESTORE();
881
66.4M
            }
882
78.3M
            if (state->repeat)
883
52.1M
                MARK_POP_DISCARD(ctx->lastmark);
884
78.3M
            RETURN_FAILURE;
885
886
668M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
668M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
668M
                   pattern[1], pattern[2]));
898
899
668M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.19M
                RETURN_FAILURE; /* cannot match */
901
902
667M
            state->ptr = ptr;
903
904
667M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
667M
            RETURN_ON_ERROR(ret);
906
667M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
667M
            ctx->count = ret;
908
667M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
667M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
349M
                RETURN_FAILURE;
917
918
317M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.65M
                ptr == state->end &&
920
76.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
76.7k
            {
922
                /* tail is empty.  we're finished */
923
76.7k
                state->ptr = ptr;
924
76.7k
                RETURN_SUCCESS;
925
76.7k
            }
926
927
317M
            LASTMARK_SAVE();
928
317M
            if (state->repeat)
929
232M
                MARK_PUSH(ctx->lastmark);
930
931
317M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
112M
                ctx->u.chr = pattern[pattern[0]+1];
935
112M
                for (;;) {
936
265M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
168M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
153M
                        ptr--;
939
153M
                        ctx->count--;
940
153M
                    }
941
112M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
97.0M
                        break;
943
15.2M
                    state->ptr = ptr;
944
15.2M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
15.2M
                            pattern+pattern[0]);
946
15.2M
                    if (ret) {
947
15.2M
                        if (state->repeat)
948
13.6M
                            MARK_POP_DISCARD(ctx->lastmark);
949
15.2M
                        RETURN_ON_ERROR(ret);
950
15.2M
                        RETURN_SUCCESS;
951
15.2M
                    }
952
500
                    if (state->repeat)
953
500
                        MARK_POP_KEEP(ctx->lastmark);
954
500
                    LASTMARK_RESTORE();
955
956
500
                    ptr--;
957
500
                    ctx->count--;
958
500
                }
959
97.0M
                if (state->repeat)
960
95.6M
                    MARK_POP_DISCARD(ctx->lastmark);
961
204M
            } else {
962
                /* general case */
963
208M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
207M
                    state->ptr = ptr;
965
207M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
207M
                            pattern+pattern[0]);
967
207M
                    if (ret) {
968
203M
                        if (state->repeat)
969
121M
                            MARK_POP_DISCARD(ctx->lastmark);
970
203M
                        RETURN_ON_ERROR(ret);
971
203M
                        RETURN_SUCCESS;
972
203M
                    }
973
3.19M
                    if (state->repeat)
974
1.70M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.19M
                    LASTMARK_RESTORE();
976
977
3.19M
                    ptr--;
978
3.19M
                    ctx->count--;
979
3.19M
                }
980
1.11M
                if (state->repeat)
981
909k
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.11M
            }
983
98.1M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
161M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
161M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
161M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
161M
            ctx->u.rep = repeat_pool_malloc(state);
1127
161M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
161M
            ctx->u.rep->count = -1;
1131
161M
            ctx->u.rep->pattern = pattern;
1132
161M
            ctx->u.rep->prev = state->repeat;
1133
161M
            ctx->u.rep->last_ptr = NULL;
1134
161M
            state->repeat = ctx->u.rep;
1135
1136
161M
            state->ptr = ptr;
1137
161M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
161M
            state->repeat = ctx->u.rep->prev;
1139
161M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
161M
            if (ret) {
1142
161M
                RETURN_ON_ERROR(ret);
1143
161M
                RETURN_SUCCESS;
1144
161M
            }
1145
112k
            RETURN_FAILURE;
1146
1147
305M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
305M
            ctx->u.rep = state->repeat;
1155
305M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
305M
            state->ptr = ptr;
1159
1160
305M
            ctx->count = ctx->u.rep->count+1;
1161
1162
305M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
305M
                   ptr, ctx->count));
1164
1165
305M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
305M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
11.6M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
293M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
293M
                ctx->u.rep->count = ctx->count;
1185
293M
                LASTMARK_SAVE();
1186
293M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
293M
                LAST_PTR_PUSH();
1189
293M
                ctx->u.rep->last_ptr = state->ptr;
1190
293M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
293M
                        ctx->u.rep->pattern+3);
1192
293M
                LAST_PTR_POP();
1193
293M
                if (ret) {
1194
143M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
143M
                    RETURN_ON_ERROR(ret);
1196
143M
                    RETURN_SUCCESS;
1197
143M
                }
1198
150M
                MARK_POP(ctx->lastmark);
1199
150M
                LASTMARK_RESTORE();
1200
150M
                ctx->u.rep->count = ctx->count-1;
1201
150M
                state->ptr = ptr;
1202
150M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
162M
            state->repeat = ctx->u.rep->prev;
1207
162M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
162M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
162M
            RETURN_ON_SUCCESS(ret);
1211
974k
            state->ptr = ptr;
1212
974k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
108M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
108M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
108M
                   ptr, pattern[1]));
1565
108M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
108M
            state->ptr = ptr - pattern[1];
1568
108M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
108M
            RETURN_ON_FAILURE(ret);
1570
103M
            pattern += pattern[0];
1571
103M
            DISPATCH;
1572
1573
103M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
44.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
44.4M
                   ptr, pattern[1]));
1578
44.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
44.4M
                state->ptr = ptr - pattern[1];
1580
44.4M
                LASTMARK_SAVE();
1581
44.4M
                if (state->repeat)
1582
44.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
88.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
88.8M
                if (ret) {
1586
11.8k
                    if (state->repeat)
1587
11.8k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
11.8k
                    RETURN_ON_ERROR(ret);
1589
11.8k
                    RETURN_FAILURE;
1590
11.8k
                }
1591
44.4M
                if (state->repeat)
1592
44.4M
                    MARK_POP(ctx->lastmark);
1593
44.4M
                LASTMARK_RESTORE();
1594
44.4M
            }
1595
44.4M
            pattern += pattern[0];
1596
44.4M
            DISPATCH;
1597
1598
44.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.65G
exit:
1620
1.65G
    ctx_pos = ctx->last_ctx_pos;
1621
1.65G
    jump = ctx->jump;
1622
1.65G
    DATA_POP_DISCARD(ctx);
1623
1.65G
    if (ctx_pos == -1) {
1624
494M
        state->sigcount = sigcount;
1625
494M
        return ret;
1626
494M
    }
1627
1.16G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.16G
    switch (jump) {
1630
293M
        case JUMP_MAX_UNTIL_2:
1631
293M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
293M
            goto jump_max_until_2;
1633
162M
        case JUMP_MAX_UNTIL_3:
1634
162M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
162M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
171M
        case JUMP_BRANCH:
1643
171M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
171M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
161M
        case JUMP_REPEAT:
1658
161M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
161M
            goto jump_repeat;
1660
15.2M
        case JUMP_REPEAT_ONE_1:
1661
15.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
15.2M
            goto jump_repeat_one_1;
1663
207M
        case JUMP_REPEAT_ONE_2:
1664
207M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
207M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
108M
        case JUMP_ASSERT:
1673
108M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
108M
            goto jump_assert;
1675
44.4M
        case JUMP_ASSERT_NOT:
1676
44.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
44.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.16G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.16G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
147M
{
601
147M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
147M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
147M
    Py_ssize_t ret = 0;
604
147M
    int jump;
605
147M
    unsigned int sigcount = state->sigcount;
606
607
147M
    SRE(match_context)* ctx;
608
147M
    SRE(match_context)* nextctx;
609
147M
    INIT_TRACE(state);
610
611
147M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
147M
    DATA_ALLOC(SRE(match_context), ctx);
614
147M
    ctx->last_ctx_pos = -1;
615
147M
    ctx->jump = JUMP_NONE;
616
147M
    ctx->toplevel = toplevel;
617
147M
    ctx_pos = alloc_pos;
618
619
147M
#if USE_COMPUTED_GOTOS
620
147M
#include "sre_targets.h"
621
147M
#endif
622
623
294M
entrance:
624
625
294M
    ;  // Fashion statement.
626
294M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
294M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.71M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.71M
                   end - ptr, (size_t) pattern[3]));
634
5.71M
            RETURN_FAILURE;
635
5.71M
        }
636
24.1M
        pattern += pattern[1] + 1;
637
24.1M
    }
638
639
289M
#if USE_COMPUTED_GOTOS
640
289M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
289M
    {
647
648
289M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
128M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
128M
                   ptr, pattern[0]));
653
128M
            {
654
128M
                int i = pattern[0];
655
128M
                if (i & 1)
656
20.3M
                    state->lastindex = i/2 + 1;
657
128M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
124M
                    int j = state->lastmark + 1;
663
128M
                    while (j < i)
664
4.02M
                        state->mark[j++] = NULL;
665
124M
                    state->lastmark = i;
666
124M
                }
667
128M
                state->mark[i] = ptr;
668
128M
            }
669
128M
            pattern++;
670
128M
            DISPATCH;
671
672
128M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.0M
                   ptr, *pattern));
677
53.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.4M
                RETURN_FAILURE;
679
35.5M
            pattern++;
680
35.5M
            ptr++;
681
35.5M
            DISPATCH;
682
683
35.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
48.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
48.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
48.4M
            if (ctx->toplevel &&
698
17.2M
                ((state->match_all && ptr != state->end) ||
699
17.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
48.4M
            state->ptr = ptr;
704
48.4M
            RETURN_SUCCESS;
705
706
12.8M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.8M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.8M
            if (!SRE(at)(state, ptr, *pattern))
711
2.02M
                RETURN_FAILURE;
712
10.8M
            pattern++;
713
10.8M
            DISPATCH;
714
715
10.8M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
34.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
34.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
34.6M
            if (ptr >= end ||
749
34.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
367k
                RETURN_FAILURE;
751
34.2M
            pattern += pattern[0];
752
34.2M
            ptr++;
753
34.2M
            DISPATCH;
754
755
34.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
948k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
948k
                   pattern, ptr, pattern[0]));
758
948k
            if (ptr >= end ||
759
948k
                sre_lower_ascii(*ptr) != *pattern)
760
163k
                RETURN_FAILURE;
761
784k
            pattern++;
762
784k
            ptr++;
763
784k
            DISPATCH;
764
765
784k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.3M
        TARGET(SRE_OP_JUMP):
845
25.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.3M
                   ptr, pattern[0]));
850
25.3M
            pattern += pattern[0];
851
25.3M
            DISPATCH;
852
853
47.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
47.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
47.8M
            LASTMARK_SAVE();
858
47.8M
            if (state->repeat)
859
10.5M
                MARK_PUSH(ctx->lastmark);
860
139M
            for (; pattern[0]; pattern += pattern[0]) {
861
115M
                if (pattern[1] == SRE_OP_LITERAL &&
862
53.4M
                    (ptr >= end ||
863
53.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.1M
                    continue;
865
91.6M
                if (pattern[1] == SRE_OP_IN &&
866
11.6M
                    (ptr >= end ||
867
11.6M
                     !SRE(charset)(state, pattern + 3,
868
11.6M
                                   (SRE_CODE) *ptr)))
869
6.50M
                    continue;
870
85.1M
                state->ptr = ptr;
871
85.1M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
85.1M
                if (ret) {
873
23.9M
                    if (state->repeat)
874
10.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
23.9M
                    RETURN_ON_ERROR(ret);
876
23.9M
                    RETURN_SUCCESS;
877
23.9M
                }
878
61.1M
                if (state->repeat)
879
6.21k
                    MARK_POP_KEEP(ctx->lastmark);
880
61.1M
                LASTMARK_RESTORE();
881
61.1M
            }
882
23.8M
            if (state->repeat)
883
282k
                MARK_POP_DISCARD(ctx->lastmark);
884
23.8M
            RETURN_FAILURE;
885
886
144M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
144M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
144M
                   pattern[1], pattern[2]));
898
899
144M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
967k
                RETURN_FAILURE; /* cannot match */
901
902
143M
            state->ptr = ptr;
903
904
143M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
143M
            RETURN_ON_ERROR(ret);
906
143M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
143M
            ctx->count = ret;
908
143M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
143M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
117M
                RETURN_FAILURE;
917
918
26.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
622k
                ptr == state->end &&
920
54.3k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
54.3k
            {
922
                /* tail is empty.  we're finished */
923
54.3k
                state->ptr = ptr;
924
54.3k
                RETURN_SUCCESS;
925
54.3k
            }
926
927
26.5M
            LASTMARK_SAVE();
928
26.5M
            if (state->repeat)
929
12.6M
                MARK_PUSH(ctx->lastmark);
930
931
26.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.00M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.00M
                for (;;) {
936
16.8M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.8M
                        ptr--;
939
11.8M
                        ctx->count--;
940
11.8M
                    }
941
5.00M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.92M
                        break;
943
2.07M
                    state->ptr = ptr;
944
2.07M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.07M
                            pattern+pattern[0]);
946
2.07M
                    if (ret) {
947
2.07M
                        if (state->repeat)
948
503k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.07M
                        RETURN_ON_ERROR(ret);
950
2.07M
                        RETURN_SUCCESS;
951
2.07M
                    }
952
101
                    if (state->repeat)
953
101
                        MARK_POP_KEEP(ctx->lastmark);
954
101
                    LASTMARK_RESTORE();
955
956
101
                    ptr--;
957
101
                    ctx->count--;
958
101
                }
959
2.92M
                if (state->repeat)
960
1.56M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.5M
            } else {
962
                /* general case */
963
23.3M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
22.6M
                    state->ptr = ptr;
965
22.6M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
22.6M
                            pattern+pattern[0]);
967
22.6M
                    if (ret) {
968
20.8M
                        if (state->repeat)
969
10.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
20.8M
                        RETURN_ON_ERROR(ret);
971
20.8M
                        RETURN_SUCCESS;
972
20.8M
                    }
973
1.80M
                    if (state->repeat)
974
959k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.80M
                    LASTMARK_RESTORE();
976
977
1.80M
                    ptr--;
978
1.80M
                    ctx->count--;
979
1.80M
                }
980
736k
                if (state->repeat)
981
534k
                    MARK_POP_DISCARD(ctx->lastmark);
982
736k
            }
983
3.66M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
6.12M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
6.12M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
6.12M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
6.12M
            ctx->u.rep = repeat_pool_malloc(state);
1127
6.12M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
6.12M
            ctx->u.rep->count = -1;
1131
6.12M
            ctx->u.rep->pattern = pattern;
1132
6.12M
            ctx->u.rep->prev = state->repeat;
1133
6.12M
            ctx->u.rep->last_ptr = NULL;
1134
6.12M
            state->repeat = ctx->u.rep;
1135
1136
6.12M
            state->ptr = ptr;
1137
6.12M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
6.12M
            state->repeat = ctx->u.rep->prev;
1139
6.12M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
6.12M
            if (ret) {
1142
6.01M
                RETURN_ON_ERROR(ret);
1143
6.01M
                RETURN_SUCCESS;
1144
6.01M
            }
1145
110k
            RETURN_FAILURE;
1146
1147
21.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
21.7M
            ctx->u.rep = state->repeat;
1155
21.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
21.7M
            state->ptr = ptr;
1159
1160
21.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
21.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
21.7M
                   ptr, ctx->count));
1164
1165
21.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
21.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.06M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
17.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
17.6M
                ctx->u.rep->count = ctx->count;
1185
17.6M
                LASTMARK_SAVE();
1186
17.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
17.6M
                LAST_PTR_PUSH();
1189
17.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
17.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
17.6M
                        ctx->u.rep->pattern+3);
1192
17.6M
                LAST_PTR_POP();
1193
17.6M
                if (ret) {
1194
15.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
15.1M
                    RETURN_ON_ERROR(ret);
1196
15.1M
                    RETURN_SUCCESS;
1197
15.1M
                }
1198
2.54M
                MARK_POP(ctx->lastmark);
1199
2.54M
                LASTMARK_RESTORE();
1200
2.54M
                ctx->u.rep->count = ctx->count-1;
1201
2.54M
                state->ptr = ptr;
1202
2.54M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.61M
            state->repeat = ctx->u.rep->prev;
1207
6.61M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.61M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.61M
            RETURN_ON_SUCCESS(ret);
1211
597k
            state->ptr = ptr;
1212
597k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.92M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.92M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.92M
                   ptr, pattern[1]));
1565
1.92M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.92M
            state->ptr = ptr - pattern[1];
1568
1.92M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.92M
            RETURN_ON_FAILURE(ret);
1570
1.86M
            pattern += pattern[0];
1571
1.86M
            DISPATCH;
1572
1573
5.03M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.03M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.03M
                   ptr, pattern[1]));
1578
5.03M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.03M
                state->ptr = ptr - pattern[1];
1580
5.03M
                LASTMARK_SAVE();
1581
5.03M
                if (state->repeat)
1582
5.03M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.0M
                if (ret) {
1586
1.20k
                    if (state->repeat)
1587
1.20k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.20k
                    RETURN_ON_ERROR(ret);
1589
1.20k
                    RETURN_FAILURE;
1590
1.20k
                }
1591
5.03M
                if (state->repeat)
1592
5.03M
                    MARK_POP(ctx->lastmark);
1593
5.03M
                LASTMARK_RESTORE();
1594
5.03M
            }
1595
5.03M
            pattern += pattern[0];
1596
5.03M
            DISPATCH;
1597
1598
5.03M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
294M
exit:
1620
294M
    ctx_pos = ctx->last_ctx_pos;
1621
294M
    jump = ctx->jump;
1622
294M
    DATA_POP_DISCARD(ctx);
1623
294M
    if (ctx_pos == -1) {
1624
147M
        state->sigcount = sigcount;
1625
147M
        return ret;
1626
147M
    }
1627
147M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
147M
    switch (jump) {
1630
17.6M
        case JUMP_MAX_UNTIL_2:
1631
17.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
17.6M
            goto jump_max_until_2;
1633
6.61M
        case JUMP_MAX_UNTIL_3:
1634
6.61M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.61M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
85.1M
        case JUMP_BRANCH:
1643
85.1M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
85.1M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
6.12M
        case JUMP_REPEAT:
1658
6.12M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
6.12M
            goto jump_repeat;
1660
2.07M
        case JUMP_REPEAT_ONE_1:
1661
2.07M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.07M
            goto jump_repeat_one_1;
1663
22.6M
        case JUMP_REPEAT_ONE_2:
1664
22.6M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
22.6M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.92M
        case JUMP_ASSERT:
1673
1.92M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.92M
            goto jump_assert;
1675
5.03M
        case JUMP_ASSERT_NOT:
1676
5.03M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.03M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
147M
    }
1683
1684
0
    return ret; /* should never get here */
1685
147M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
233M
{
601
233M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
233M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
233M
    Py_ssize_t ret = 0;
604
233M
    int jump;
605
233M
    unsigned int sigcount = state->sigcount;
606
607
233M
    SRE(match_context)* ctx;
608
233M
    SRE(match_context)* nextctx;
609
233M
    INIT_TRACE(state);
610
611
233M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
233M
    DATA_ALLOC(SRE(match_context), ctx);
614
233M
    ctx->last_ctx_pos = -1;
615
233M
    ctx->jump = JUMP_NONE;
616
233M
    ctx->toplevel = toplevel;
617
233M
    ctx_pos = alloc_pos;
618
619
233M
#if USE_COMPUTED_GOTOS
620
233M
#include "sre_targets.h"
621
233M
#endif
622
623
621M
entrance:
624
625
621M
    ;  // Fashion statement.
626
621M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
621M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
28.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
587
            TRACE(("reject (got %tu chars, need %zu)\n",
633
587
                   end - ptr, (size_t) pattern[3]));
634
587
            RETURN_FAILURE;
635
587
        }
636
28.8M
        pattern += pattern[1] + 1;
637
28.8M
    }
638
639
621M
#if USE_COMPUTED_GOTOS
640
621M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
621M
    {
647
648
621M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
226M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
226M
                   ptr, pattern[0]));
653
226M
            {
654
226M
                int i = pattern[0];
655
226M
                if (i & 1)
656
25.4M
                    state->lastindex = i/2 + 1;
657
226M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
225M
                    int j = state->lastmark + 1;
663
226M
                    while (j < i)
664
1.17M
                        state->mark[j++] = NULL;
665
225M
                    state->lastmark = i;
666
225M
                }
667
226M
                state->mark[i] = ptr;
668
226M
            }
669
226M
            pattern++;
670
226M
            DISPATCH;
671
672
226M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.8M
                   ptr, *pattern));
677
53.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
20.6M
                RETURN_FAILURE;
679
33.1M
            pattern++;
680
33.1M
            ptr++;
681
33.1M
            DISPATCH;
682
683
33.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
106M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
106M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
106M
            if (ctx->toplevel &&
698
25.4M
                ((state->match_all && ptr != state->end) ||
699
25.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
106M
            state->ptr = ptr;
704
106M
            RETURN_SUCCESS;
705
706
830k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
830k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
830k
            if (!SRE(at)(state, ptr, *pattern))
711
801k
                RETURN_FAILURE;
712
28.9k
            pattern++;
713
28.9k
            DISPATCH;
714
715
28.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
132M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
132M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
132M
            if (ptr >= end ||
749
132M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.84M
                RETURN_FAILURE;
751
126M
            pattern += pattern[0];
752
126M
            ptr++;
753
126M
            DISPATCH;
754
755
126M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.73M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.73M
                   pattern, ptr, pattern[0]));
758
3.73M
            if (ptr >= end ||
759
3.73M
                sre_lower_ascii(*ptr) != *pattern)
760
22.4k
                RETURN_FAILURE;
761
3.71M
            pattern++;
762
3.71M
            ptr++;
763
3.71M
            DISPATCH;
764
765
3.71M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
31.9M
        TARGET(SRE_OP_JUMP):
845
31.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
31.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
31.9M
                   ptr, pattern[0]));
850
31.9M
            pattern += pattern[0];
851
31.9M
            DISPATCH;
852
853
52.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
52.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
52.4M
            LASTMARK_SAVE();
858
52.4M
            if (state->repeat)
859
48.8M
                MARK_PUSH(ctx->lastmark);
860
119M
            for (; pattern[0]; pattern += pattern[0]) {
861
98.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
47.5M
                    (ptr >= end ||
863
47.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
23.5M
                    continue;
865
75.0M
                if (pattern[1] == SRE_OP_IN &&
866
46.3M
                    (ptr >= end ||
867
46.3M
                     !SRE(charset)(state, pattern + 3,
868
46.3M
                                   (SRE_CODE) *ptr)))
869
39.3M
                    continue;
870
35.6M
                state->ptr = ptr;
871
35.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
35.6M
                if (ret) {
873
31.4M
                    if (state->repeat)
874
29.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
31.4M
                    RETURN_ON_ERROR(ret);
876
31.4M
                    RETURN_SUCCESS;
877
31.4M
                }
878
4.22M
                if (state->repeat)
879
5.46k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.22M
                LASTMARK_RESTORE();
881
4.22M
            }
882
20.9M
            if (state->repeat)
883
19.2M
                MARK_POP_DISCARD(ctx->lastmark);
884
20.9M
            RETURN_FAILURE;
885
886
270M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
270M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
270M
                   pattern[1], pattern[2]));
898
899
270M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
198k
                RETURN_FAILURE; /* cannot match */
901
902
270M
            state->ptr = ptr;
903
904
270M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
270M
            RETURN_ON_ERROR(ret);
906
270M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
270M
            ctx->count = ret;
908
270M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
270M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
161M
                RETURN_FAILURE;
917
918
108M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.24M
                ptr == state->end &&
920
18.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.8k
            {
922
                /* tail is empty.  we're finished */
923
18.8k
                state->ptr = ptr;
924
18.8k
                RETURN_SUCCESS;
925
18.8k
            }
926
927
108M
            LASTMARK_SAVE();
928
108M
            if (state->repeat)
929
77.1M
                MARK_PUSH(ctx->lastmark);
930
931
108M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
37.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
37.6M
                for (;;) {
936
76.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
43.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
38.4M
                        ptr--;
939
38.4M
                        ctx->count--;
940
38.4M
                    }
941
37.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
32.8M
                        break;
943
4.78M
                    state->ptr = ptr;
944
4.78M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.78M
                            pattern+pattern[0]);
946
4.78M
                    if (ret) {
947
4.78M
                        if (state->repeat)
948
4.76M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.78M
                        RETURN_ON_ERROR(ret);
950
4.78M
                        RETURN_SUCCESS;
951
4.78M
                    }
952
178
                    if (state->repeat)
953
178
                        MARK_POP_KEEP(ctx->lastmark);
954
178
                    LASTMARK_RESTORE();
955
956
178
                    ptr--;
957
178
                    ctx->count--;
958
178
                }
959
32.8M
                if (state->repeat)
960
32.8M
                    MARK_POP_DISCARD(ctx->lastmark);
961
71.2M
            } else {
962
                /* general case */
963
72.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
71.7M
                    state->ptr = ptr;
965
71.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
71.7M
                            pattern+pattern[0]);
967
71.7M
                    if (ret) {
968
71.0M
                        if (state->repeat)
969
39.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
71.0M
                        RETURN_ON_ERROR(ret);
971
71.0M
                        RETURN_SUCCESS;
972
71.0M
                    }
973
790k
                    if (state->repeat)
974
589k
                        MARK_POP_KEEP(ctx->lastmark);
975
790k
                    LASTMARK_RESTORE();
976
977
790k
                    ptr--;
978
790k
                    ctx->count--;
979
790k
                }
980
297k
                if (state->repeat)
981
294k
                    MARK_POP_DISCARD(ctx->lastmark);
982
297k
            }
983
33.1M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
56.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
56.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
56.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
56.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
56.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
56.6M
            ctx->u.rep->count = -1;
1131
56.6M
            ctx->u.rep->pattern = pattern;
1132
56.6M
            ctx->u.rep->prev = state->repeat;
1133
56.6M
            ctx->u.rep->last_ptr = NULL;
1134
56.6M
            state->repeat = ctx->u.rep;
1135
1136
56.6M
            state->ptr = ptr;
1137
56.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
56.6M
            state->repeat = ctx->u.rep->prev;
1139
56.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
56.6M
            if (ret) {
1142
56.6M
                RETURN_ON_ERROR(ret);
1143
56.6M
                RETURN_SUCCESS;
1144
56.6M
            }
1145
1.06k
            RETURN_FAILURE;
1146
1147
104M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
104M
            ctx->u.rep = state->repeat;
1155
104M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
104M
            state->ptr = ptr;
1159
1160
104M
            ctx->count = ctx->u.rep->count+1;
1161
1162
104M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
104M
                   ptr, ctx->count));
1164
1165
104M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
104M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.83M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
101M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
101M
                ctx->u.rep->count = ctx->count;
1185
101M
                LASTMARK_SAVE();
1186
101M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
101M
                LAST_PTR_PUSH();
1189
101M
                ctx->u.rep->last_ptr = state->ptr;
1190
101M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
101M
                        ctx->u.rep->pattern+3);
1192
101M
                LAST_PTR_POP();
1193
101M
                if (ret) {
1194
47.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
47.8M
                    RETURN_ON_ERROR(ret);
1196
47.8M
                    RETURN_SUCCESS;
1197
47.8M
                }
1198
54.1M
                MARK_POP(ctx->lastmark);
1199
54.1M
                LASTMARK_RESTORE();
1200
54.1M
                ctx->u.rep->count = ctx->count-1;
1201
54.1M
                state->ptr = ptr;
1202
54.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
56.9M
            state->repeat = ctx->u.rep->prev;
1207
56.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
56.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
56.9M
            RETURN_ON_SUCCESS(ret);
1211
295k
            state->ptr = ptr;
1212
295k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
39.6M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
39.6M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
39.6M
                   ptr, pattern[1]));
1565
39.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
39.6M
            state->ptr = ptr - pattern[1];
1568
39.6M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
39.6M
            RETURN_ON_FAILURE(ret);
1570
35.9M
            pattern += pattern[0];
1571
35.9M
            DISPATCH;
1572
1573
35.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
20.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
20.3M
                   ptr, pattern[1]));
1578
20.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
20.3M
                state->ptr = ptr - pattern[1];
1580
20.3M
                LASTMARK_SAVE();
1581
20.3M
                if (state->repeat)
1582
20.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
40.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
40.7M
                if (ret) {
1586
5.25k
                    if (state->repeat)
1587
5.25k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.25k
                    RETURN_ON_ERROR(ret);
1589
5.25k
                    RETURN_FAILURE;
1590
5.25k
                }
1591
20.3M
                if (state->repeat)
1592
20.3M
                    MARK_POP(ctx->lastmark);
1593
20.3M
                LASTMARK_RESTORE();
1594
20.3M
            }
1595
20.3M
            pattern += pattern[0];
1596
20.3M
            DISPATCH;
1597
1598
20.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
621M
exit:
1620
621M
    ctx_pos = ctx->last_ctx_pos;
1621
621M
    jump = ctx->jump;
1622
621M
    DATA_POP_DISCARD(ctx);
1623
621M
    if (ctx_pos == -1) {
1624
233M
        state->sigcount = sigcount;
1625
233M
        return ret;
1626
233M
    }
1627
387M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
387M
    switch (jump) {
1630
101M
        case JUMP_MAX_UNTIL_2:
1631
101M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
101M
            goto jump_max_until_2;
1633
56.9M
        case JUMP_MAX_UNTIL_3:
1634
56.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
56.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
35.6M
        case JUMP_BRANCH:
1643
35.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
35.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
56.6M
        case JUMP_REPEAT:
1658
56.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
56.6M
            goto jump_repeat;
1660
4.78M
        case JUMP_REPEAT_ONE_1:
1661
4.78M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.78M
            goto jump_repeat_one_1;
1663
71.7M
        case JUMP_REPEAT_ONE_2:
1664
71.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
71.7M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
39.6M
        case JUMP_ASSERT:
1673
39.6M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
39.6M
            goto jump_assert;
1675
20.3M
        case JUMP_ASSERT_NOT:
1676
20.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
20.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
387M
    }
1683
1684
0
    return ret; /* should never get here */
1685
387M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
113M
{
601
113M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
113M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
113M
    Py_ssize_t ret = 0;
604
113M
    int jump;
605
113M
    unsigned int sigcount = state->sigcount;
606
607
113M
    SRE(match_context)* ctx;
608
113M
    SRE(match_context)* nextctx;
609
113M
    INIT_TRACE(state);
610
611
113M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
113M
    DATA_ALLOC(SRE(match_context), ctx);
614
113M
    ctx->last_ctx_pos = -1;
615
113M
    ctx->jump = JUMP_NONE;
616
113M
    ctx->toplevel = toplevel;
617
113M
    ctx_pos = alloc_pos;
618
619
113M
#if USE_COMPUTED_GOTOS
620
113M
#include "sre_targets.h"
621
113M
#endif
622
623
742M
entrance:
624
625
742M
    ;  // Fashion statement.
626
742M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
742M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
33.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
364
            TRACE(("reject (got %tu chars, need %zu)\n",
633
364
                   end - ptr, (size_t) pattern[3]));
634
364
            RETURN_FAILURE;
635
364
        }
636
33.9M
        pattern += pattern[1] + 1;
637
33.9M
    }
638
639
742M
#if USE_COMPUTED_GOTOS
640
742M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
742M
    {
647
648
742M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
183M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
183M
                   ptr, pattern[0]));
653
183M
            {
654
183M
                int i = pattern[0];
655
183M
                if (i & 1)
656
41.2M
                    state->lastindex = i/2 + 1;
657
183M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
181M
                    int j = state->lastmark + 1;
663
183M
                    while (j < i)
664
2.26M
                        state->mark[j++] = NULL;
665
181M
                    state->lastmark = i;
666
181M
                }
667
183M
                state->mark[i] = ptr;
668
183M
            }
669
183M
            pattern++;
670
183M
            DISPATCH;
671
672
183M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
50.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
50.4M
                   ptr, *pattern));
677
50.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
18.5M
                RETURN_FAILURE;
679
31.8M
            pattern++;
680
31.8M
            ptr++;
681
31.8M
            DISPATCH;
682
683
31.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
106M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
106M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
106M
            if (ctx->toplevel &&
698
33.4M
                ((state->match_all && ptr != state->end) ||
699
33.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
106M
            state->ptr = ptr;
704
106M
            RETURN_SUCCESS;
705
706
567k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
567k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
567k
            if (!SRE(at)(state, ptr, *pattern))
711
562k
                RETURN_FAILURE;
712
4.40k
            pattern++;
713
4.40k
            DISPATCH;
714
715
4.40k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
163M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
163M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
163M
            if (ptr >= end ||
749
163M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.92M
                RETURN_FAILURE;
751
161M
            pattern += pattern[0];
752
161M
            ptr++;
753
161M
            DISPATCH;
754
755
161M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.17M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.17M
                   pattern, ptr, pattern[0]));
758
1.17M
            if (ptr >= end ||
759
1.17M
                sre_lower_ascii(*ptr) != *pattern)
760
20.6k
                RETURN_FAILURE;
761
1.15M
            pattern++;
762
1.15M
            ptr++;
763
1.15M
            DISPATCH;
764
765
1.15M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
49.9M
        TARGET(SRE_OP_JUMP):
845
49.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
49.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
49.9M
                   ptr, pattern[0]));
850
49.9M
            pattern += pattern[0];
851
49.9M
            DISPATCH;
852
853
82.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
82.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
82.8M
            LASTMARK_SAVE();
858
82.8M
            if (state->repeat)
859
79.4M
                MARK_PUSH(ctx->lastmark);
860
177M
            for (; pattern[0]; pattern += pattern[0]) {
861
144M
                if (pattern[1] == SRE_OP_LITERAL &&
862
65.8M
                    (ptr >= end ||
863
65.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
43.0M
                    continue;
865
101M
                if (pattern[1] == SRE_OP_IN &&
866
74.8M
                    (ptr >= end ||
867
74.8M
                     !SRE(charset)(state, pattern + 3,
868
74.8M
                                   (SRE_CODE) *ptr)))
869
50.7M
                    continue;
870
50.4M
                state->ptr = ptr;
871
50.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
50.4M
                if (ret) {
873
49.4M
                    if (state->repeat)
874
46.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
49.4M
                    RETURN_ON_ERROR(ret);
876
49.4M
                    RETURN_SUCCESS;
877
49.4M
                }
878
1.00M
                if (state->repeat)
879
5.62k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.00M
                LASTMARK_RESTORE();
881
1.00M
            }
882
33.4M
            if (state->repeat)
883
32.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
33.4M
            RETURN_FAILURE;
885
886
253M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
253M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
253M
                   pattern[1], pattern[2]));
898
899
253M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
32.8k
                RETURN_FAILURE; /* cannot match */
901
902
253M
            state->ptr = ptr;
903
904
253M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
253M
            RETURN_ON_ERROR(ret);
906
253M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
253M
            ctx->count = ret;
908
253M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
253M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
71.3M
                RETURN_FAILURE;
917
918
181M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
788k
                ptr == state->end &&
920
3.51k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.51k
            {
922
                /* tail is empty.  we're finished */
923
3.51k
                state->ptr = ptr;
924
3.51k
                RETURN_SUCCESS;
925
3.51k
            }
926
927
181M
            LASTMARK_SAVE();
928
181M
            if (state->repeat)
929
142M
                MARK_PUSH(ctx->lastmark);
930
931
181M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
69.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
69.6M
                for (;;) {
936
172M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
111M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
103M
                        ptr--;
939
103M
                        ctx->count--;
940
103M
                    }
941
69.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
61.2M
                        break;
943
8.42M
                    state->ptr = ptr;
944
8.42M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
8.42M
                            pattern+pattern[0]);
946
8.42M
                    if (ret) {
947
8.42M
                        if (state->repeat)
948
8.41M
                            MARK_POP_DISCARD(ctx->lastmark);
949
8.42M
                        RETURN_ON_ERROR(ret);
950
8.42M
                        RETURN_SUCCESS;
951
8.42M
                    }
952
221
                    if (state->repeat)
953
221
                        MARK_POP_KEEP(ctx->lastmark);
954
221
                    LASTMARK_RESTORE();
955
956
221
                    ptr--;
957
221
                    ctx->count--;
958
221
                }
959
61.2M
                if (state->repeat)
960
61.2M
                    MARK_POP_DISCARD(ctx->lastmark);
961
112M
            } else {
962
                /* general case */
963
112M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
112M
                    state->ptr = ptr;
965
112M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
112M
                            pattern+pattern[0]);
967
112M
                    if (ret) {
968
111M
                        if (state->repeat)
969
72.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
111M
                        RETURN_ON_ERROR(ret);
971
111M
                        RETURN_SUCCESS;
972
111M
                    }
973
597k
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
597k
                    LASTMARK_RESTORE();
976
977
597k
                    ptr--;
978
597k
                    ctx->count--;
979
597k
                }
980
81.1k
                if (state->repeat)
981
80.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
81.1k
            }
983
61.3M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
98.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
98.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
98.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
98.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
98.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
98.8M
            ctx->u.rep->count = -1;
1131
98.8M
            ctx->u.rep->pattern = pattern;
1132
98.8M
            ctx->u.rep->prev = state->repeat;
1133
98.8M
            ctx->u.rep->last_ptr = NULL;
1134
98.8M
            state->repeat = ctx->u.rep;
1135
1136
98.8M
            state->ptr = ptr;
1137
98.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
98.8M
            state->repeat = ctx->u.rep->prev;
1139
98.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
98.8M
            if (ret) {
1142
98.8M
                RETURN_ON_ERROR(ret);
1143
98.8M
                RETURN_SUCCESS;
1144
98.8M
            }
1145
738
            RETURN_FAILURE;
1146
1147
179M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
179M
            ctx->u.rep = state->repeat;
1155
179M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
179M
            state->ptr = ptr;
1159
1160
179M
            ctx->count = ctx->u.rep->count+1;
1161
1162
179M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
179M
                   ptr, ctx->count));
1164
1165
179M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
179M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.77M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
174M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
174M
                ctx->u.rep->count = ctx->count;
1185
174M
                LASTMARK_SAVE();
1186
174M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
174M
                LAST_PTR_PUSH();
1189
174M
                ctx->u.rep->last_ptr = state->ptr;
1190
174M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
174M
                        ctx->u.rep->pattern+3);
1192
174M
                LAST_PTR_POP();
1193
174M
                if (ret) {
1194
80.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
80.2M
                    RETURN_ON_ERROR(ret);
1196
80.2M
                    RETURN_SUCCESS;
1197
80.2M
                }
1198
94.1M
                MARK_POP(ctx->lastmark);
1199
94.1M
                LASTMARK_RESTORE();
1200
94.1M
                ctx->u.rep->count = ctx->count-1;
1201
94.1M
                state->ptr = ptr;
1202
94.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
98.9M
            state->repeat = ctx->u.rep->prev;
1207
98.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
98.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
98.9M
            RETURN_ON_SUCCESS(ret);
1211
80.9k
            state->ptr = ptr;
1212
80.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
66.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
66.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
66.5M
                   ptr, pattern[1]));
1565
66.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
66.5M
            state->ptr = ptr - pattern[1];
1568
66.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
66.5M
            RETURN_ON_FAILURE(ret);
1570
65.9M
            pattern += pattern[0];
1571
65.9M
            DISPATCH;
1572
1573
65.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
19.0M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
19.0M
                   ptr, pattern[1]));
1578
19.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
19.0M
                state->ptr = ptr - pattern[1];
1580
19.0M
                LASTMARK_SAVE();
1581
19.0M
                if (state->repeat)
1582
19.0M
                    MARK_PUSH(ctx->lastmark);
1583
1584
38.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
38.0M
                if (ret) {
1586
5.37k
                    if (state->repeat)
1587
5.37k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.37k
                    RETURN_ON_ERROR(ret);
1589
5.37k
                    RETURN_FAILURE;
1590
5.37k
                }
1591
18.9M
                if (state->repeat)
1592
18.9M
                    MARK_POP(ctx->lastmark);
1593
18.9M
                LASTMARK_RESTORE();
1594
18.9M
            }
1595
18.9M
            pattern += pattern[0];
1596
18.9M
            DISPATCH;
1597
1598
18.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
742M
exit:
1620
742M
    ctx_pos = ctx->last_ctx_pos;
1621
742M
    jump = ctx->jump;
1622
742M
    DATA_POP_DISCARD(ctx);
1623
742M
    if (ctx_pos == -1) {
1624
113M
        state->sigcount = sigcount;
1625
113M
        return ret;
1626
113M
    }
1627
629M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
629M
    switch (jump) {
1630
174M
        case JUMP_MAX_UNTIL_2:
1631
174M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
174M
            goto jump_max_until_2;
1633
98.9M
        case JUMP_MAX_UNTIL_3:
1634
98.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
98.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
50.4M
        case JUMP_BRANCH:
1643
50.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
50.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
98.8M
        case JUMP_REPEAT:
1658
98.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
98.8M
            goto jump_repeat;
1660
8.42M
        case JUMP_REPEAT_ONE_1:
1661
8.42M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
8.42M
            goto jump_repeat_one_1;
1663
112M
        case JUMP_REPEAT_ONE_2:
1664
112M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
112M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
66.5M
        case JUMP_ASSERT:
1673
66.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
66.5M
            goto jump_assert;
1675
19.0M
        case JUMP_ASSERT_NOT:
1676
19.0M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
19.0M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
629M
    }
1683
1684
0
    return ret; /* should never get here */
1685
629M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
319M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
89.1M
{
1694
89.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
89.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
89.1M
    Py_ssize_t status = 0;
1697
89.1M
    Py_ssize_t prefix_len = 0;
1698
89.1M
    Py_ssize_t prefix_skip = 0;
1699
89.1M
    SRE_CODE* prefix = NULL;
1700
89.1M
    SRE_CODE* charset = NULL;
1701
89.1M
    SRE_CODE* overlap = NULL;
1702
89.1M
    int flags = 0;
1703
89.1M
    INIT_TRACE(state);
1704
1705
89.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
89.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
89.1M
        flags = pattern[2];
1713
1714
89.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.65M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.65M
                   end - ptr, (size_t) pattern[3]));
1717
1.65M
            return 0;
1718
1.65M
        }
1719
87.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.01M
            end -= pattern[3] - 1;
1723
7.01M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.01M
        }
1726
1727
87.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.02M
            prefix_len = pattern[5];
1731
7.02M
            prefix_skip = pattern[6];
1732
7.02M
            prefix = pattern + 7;
1733
7.02M
            overlap = prefix + prefix_len - 1;
1734
80.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
75.6M
            charset = pattern + 5;
1738
1739
87.4M
        pattern += 1 + pattern[1];
1740
87.4M
    }
1741
1742
87.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
87.4M
           prefix, prefix_len, prefix_skip));
1744
87.4M
    TRACE(("charset = %p\n", charset));
1745
1746
87.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.45M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
3.73M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.73M
#endif
1753
3.73M
        end = (SRE_CHAR *)state->end;
1754
3.73M
        state->must_advance = 0;
1755
7.04M
        while (ptr < end) {
1756
88.9M
            while (*ptr != c) {
1757
82.4M
                if (++ptr >= end)
1758
579k
                    return 0;
1759
82.4M
            }
1760
6.45M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.45M
            state->start = ptr;
1762
6.45M
            state->ptr = ptr + prefix_skip;
1763
6.45M
            if (flags & SRE_INFO_LITERAL)
1764
3.14k
                return 1; /* we got all of it */
1765
6.45M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.45M
            if (status != 0)
1767
5.86M
                return status;
1768
592k
            ++ptr;
1769
592k
            RESET_CAPTURE_GROUP();
1770
592k
        }
1771
11.0k
        return 0;
1772
3.73M
    }
1773
1774
81.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
563k
        Py_ssize_t i = 0;
1778
1779
563k
        end = (SRE_CHAR *)state->end;
1780
563k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.59M
        for (i = 0; i < prefix_len; i++)
1784
1.06M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
532k
#endif
1787
1.31M
        while (ptr < end) {
1788
1.31M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.94M
            while (*ptr++ != c) {
1790
6.63M
                if (ptr >= end)
1791
316
                    return 0;
1792
6.63M
            }
1793
1.31M
            if (ptr >= end)
1794
53
                return 0;
1795
1796
1.31M
            i = 1;
1797
1.31M
            state->must_advance = 0;
1798
1.31M
            do {
1799
1.31M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.21M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.21M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.21M
                    state->start = ptr - (prefix_len - 1);
1808
1.21M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.21M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.21M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.21M
                    if (status != 0)
1813
563k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
647k
                    if (++ptr >= end)
1816
27
                        return 0;
1817
647k
                    RESET_CAPTURE_GROUP();
1818
647k
                }
1819
752k
                i = overlap[i];
1820
752k
            } while (i != 0);
1821
1.31M
        }
1822
0
        return 0;
1823
563k
    }
1824
1825
80.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
75.6M
        end = (SRE_CHAR *)state->end;
1828
75.6M
        state->must_advance = 0;
1829
78.0M
        for (;;) {
1830
338M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
260M
                ptr++;
1832
78.0M
            if (ptr >= end)
1833
4.12M
                return 0;
1834
73.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
73.9M
            state->start = ptr;
1836
73.9M
            state->ptr = ptr;
1837
73.9M
            status = SRE(match)(state, pattern, 0);
1838
73.9M
            if (status != 0)
1839
71.5M
                break;
1840
2.41M
            ptr++;
1841
2.41M
            RESET_CAPTURE_GROUP();
1842
2.41M
        }
1843
75.6M
    } else {
1844
        /* general case */
1845
4.77M
        assert(ptr <= end);
1846
4.77M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.77M
        state->start = state->ptr = ptr;
1848
4.77M
        status = SRE(match)(state, pattern, 1);
1849
4.77M
        state->must_advance = 0;
1850
4.77M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
320M
        while (status == 0 && ptr < end) {
1858
315M
            ptr++;
1859
315M
            RESET_CAPTURE_GROUP();
1860
315M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
315M
            state->start = state->ptr = ptr;
1862
315M
            status = SRE(match)(state, pattern, 0);
1863
315M
        }
1864
4.77M
    }
1865
1866
76.3M
    return status;
1867
80.4M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
35.0M
{
1694
35.0M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
35.0M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
35.0M
    Py_ssize_t status = 0;
1697
35.0M
    Py_ssize_t prefix_len = 0;
1698
35.0M
    Py_ssize_t prefix_skip = 0;
1699
35.0M
    SRE_CODE* prefix = NULL;
1700
35.0M
    SRE_CODE* charset = NULL;
1701
35.0M
    SRE_CODE* overlap = NULL;
1702
35.0M
    int flags = 0;
1703
35.0M
    INIT_TRACE(state);
1704
1705
35.0M
    if (ptr > end)
1706
0
        return 0;
1707
1708
35.0M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
35.0M
        flags = pattern[2];
1713
1714
35.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.52M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.52M
                   end - ptr, (size_t) pattern[3]));
1717
1.52M
            return 0;
1718
1.52M
        }
1719
33.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.94M
            end -= pattern[3] - 1;
1723
1.94M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.94M
        }
1726
1727
33.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.94M
            prefix_len = pattern[5];
1731
1.94M
            prefix_skip = pattern[6];
1732
1.94M
            prefix = pattern + 7;
1733
1.94M
            overlap = prefix + prefix_len - 1;
1734
31.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
28.0M
            charset = pattern + 5;
1738
1739
33.4M
        pattern += 1 + pattern[1];
1740
33.4M
    }
1741
1742
33.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.4M
           prefix, prefix_len, prefix_skip));
1744
33.4M
    TRACE(("charset = %p\n", charset));
1745
1746
33.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.93M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.93M
#if SIZEOF_SRE_CHAR < 4
1750
1.93M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.93M
#endif
1753
1.93M
        end = (SRE_CHAR *)state->end;
1754
1.93M
        state->must_advance = 0;
1755
2.06M
        while (ptr < end) {
1756
27.7M
            while (*ptr != c) {
1757
26.2M
                if (++ptr >= end)
1758
518k
                    return 0;
1759
26.2M
            }
1760
1.54M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.54M
            state->start = ptr;
1762
1.54M
            state->ptr = ptr + prefix_skip;
1763
1.54M
            if (flags & SRE_INFO_LITERAL)
1764
258
                return 1; /* we got all of it */
1765
1.54M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.54M
            if (status != 0)
1767
1.40M
                return status;
1768
131k
            ++ptr;
1769
131k
            RESET_CAPTURE_GROUP();
1770
131k
        }
1771
8.75k
        return 0;
1772
1.93M
    }
1773
1774
31.5M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
12.5k
        Py_ssize_t i = 0;
1778
1779
12.5k
        end = (SRE_CHAR *)state->end;
1780
12.5k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
12.5k
#if SIZEOF_SRE_CHAR < 4
1783
37.7k
        for (i = 0; i < prefix_len; i++)
1784
25.1k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
12.5k
#endif
1787
266k
        while (ptr < end) {
1788
266k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.04M
            while (*ptr++ != c) {
1790
1.77M
                if (ptr >= end)
1791
62
                    return 0;
1792
1.77M
            }
1793
266k
            if (ptr >= end)
1794
28
                return 0;
1795
1796
266k
            i = 1;
1797
266k
            state->must_advance = 0;
1798
266k
            do {
1799
266k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
198k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
198k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
198k
                    state->start = ptr - (prefix_len - 1);
1808
198k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
198k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
198k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
198k
                    if (status != 0)
1813
12.4k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
185k
                    if (++ptr >= end)
1816
12
                        return 0;
1817
185k
                    RESET_CAPTURE_GROUP();
1818
185k
                }
1819
254k
                i = overlap[i];
1820
254k
            } while (i != 0);
1821
266k
        }
1822
0
        return 0;
1823
12.5k
    }
1824
1825
31.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
28.0M
        end = (SRE_CHAR *)state->end;
1828
28.0M
        state->must_advance = 0;
1829
29.4M
        for (;;) {
1830
76.6M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
47.1M
                ptr++;
1832
29.4M
            if (ptr >= end)
1833
2.93M
                return 0;
1834
26.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
26.5M
            state->start = ptr;
1836
26.5M
            state->ptr = ptr;
1837
26.5M
            status = SRE(match)(state, pattern, 0);
1838
26.5M
            if (status != 0)
1839
25.1M
                break;
1840
1.42M
            ptr++;
1841
1.42M
            RESET_CAPTURE_GROUP();
1842
1.42M
        }
1843
28.0M
    } else {
1844
        /* general case */
1845
3.50M
        assert(ptr <= end);
1846
3.50M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.50M
        state->start = state->ptr = ptr;
1848
3.50M
        status = SRE(match)(state, pattern, 1);
1849
3.50M
        state->must_advance = 0;
1850
3.50M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
89.5M
        while (status == 0 && ptr < end) {
1858
86.0M
            ptr++;
1859
86.0M
            RESET_CAPTURE_GROUP();
1860
86.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
86.0M
            state->start = state->ptr = ptr;
1862
86.0M
            status = SRE(match)(state, pattern, 0);
1863
86.0M
        }
1864
3.50M
    }
1865
1866
28.6M
    return status;
1867
31.5M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
46.5M
{
1694
46.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
46.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
46.5M
    Py_ssize_t status = 0;
1697
46.5M
    Py_ssize_t prefix_len = 0;
1698
46.5M
    Py_ssize_t prefix_skip = 0;
1699
46.5M
    SRE_CODE* prefix = NULL;
1700
46.5M
    SRE_CODE* charset = NULL;
1701
46.5M
    SRE_CODE* overlap = NULL;
1702
46.5M
    int flags = 0;
1703
46.5M
    INIT_TRACE(state);
1704
1705
46.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
46.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
46.5M
        flags = pattern[2];
1713
1714
46.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
115k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
115k
                   end - ptr, (size_t) pattern[3]));
1717
115k
            return 0;
1718
115k
        }
1719
46.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.31M
            end -= pattern[3] - 1;
1723
2.31M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.31M
        }
1726
1727
46.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.31M
            prefix_len = pattern[5];
1731
2.31M
            prefix_skip = pattern[6];
1732
2.31M
            prefix = pattern + 7;
1733
2.31M
            overlap = prefix + prefix_len - 1;
1734
44.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
43.0M
            charset = pattern + 5;
1738
1739
46.4M
        pattern += 1 + pattern[1];
1740
46.4M
    }
1741
1742
46.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
46.4M
           prefix, prefix_len, prefix_skip));
1744
46.4M
    TRACE(("charset = %p\n", charset));
1745
1746
46.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.79M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.79M
#if SIZEOF_SRE_CHAR < 4
1750
1.79M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.79M
#endif
1753
1.79M
        end = (SRE_CHAR *)state->end;
1754
1.79M
        state->must_advance = 0;
1755
1.98M
        while (ptr < end) {
1756
39.8M
            while (*ptr != c) {
1757
37.9M
                if (++ptr >= end)
1758
57.2k
                    return 0;
1759
37.9M
            }
1760
1.92M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.92M
            state->start = ptr;
1762
1.92M
            state->ptr = ptr + prefix_skip;
1763
1.92M
            if (flags & SRE_INFO_LITERAL)
1764
844
                return 1; /* we got all of it */
1765
1.92M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.92M
            if (status != 0)
1767
1.73M
                return status;
1768
184k
            ++ptr;
1769
184k
            RESET_CAPTURE_GROUP();
1770
184k
        }
1771
1.36k
        return 0;
1772
1.79M
    }
1773
1774
44.6M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
519k
        Py_ssize_t i = 0;
1778
1779
519k
        end = (SRE_CHAR *)state->end;
1780
519k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
519k
#if SIZEOF_SRE_CHAR < 4
1783
1.55M
        for (i = 0; i < prefix_len; i++)
1784
1.03M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
519k
#endif
1787
792k
        while (ptr < end) {
1788
792k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.78M
            while (*ptr++ != c) {
1790
1.99M
                if (ptr >= end)
1791
122
                    return 0;
1792
1.99M
            }
1793
792k
            if (ptr >= end)
1794
10
                return 0;
1795
1796
792k
            i = 1;
1797
792k
            state->must_advance = 0;
1798
792k
            do {
1799
792k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
767k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
767k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
767k
                    state->start = ptr - (prefix_len - 1);
1808
767k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
767k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
767k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
767k
                    if (status != 0)
1813
519k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
248k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
248k
                    RESET_CAPTURE_GROUP();
1818
248k
                }
1819
272k
                i = overlap[i];
1820
272k
            } while (i != 0);
1821
792k
        }
1822
0
        return 0;
1823
519k
    }
1824
1825
44.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
43.0M
        end = (SRE_CHAR *)state->end;
1828
43.0M
        state->must_advance = 0;
1829
43.6M
        for (;;) {
1830
189M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
145M
                ptr++;
1832
43.6M
            if (ptr >= end)
1833
1.13M
                return 0;
1834
42.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
42.4M
            state->start = ptr;
1836
42.4M
            state->ptr = ptr;
1837
42.4M
            status = SRE(match)(state, pattern, 0);
1838
42.4M
            if (status != 0)
1839
41.9M
                break;
1840
506k
            ptr++;
1841
506k
            RESET_CAPTURE_GROUP();
1842
506k
        }
1843
43.0M
    } else {
1844
        /* general case */
1845
1.02M
        assert(ptr <= end);
1846
1.02M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.02M
        state->start = state->ptr = ptr;
1848
1.02M
        status = SRE(match)(state, pattern, 1);
1849
1.02M
        state->must_advance = 0;
1850
1.02M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
159M
        while (status == 0 && ptr < end) {
1858
158M
            ptr++;
1859
158M
            RESET_CAPTURE_GROUP();
1860
158M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
158M
            state->start = state->ptr = ptr;
1862
158M
            status = SRE(match)(state, pattern, 0);
1863
158M
        }
1864
1.02M
    }
1865
1866
42.9M
    return status;
1867
44.1M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.55M
{
1694
7.55M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.55M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.55M
    Py_ssize_t status = 0;
1697
7.55M
    Py_ssize_t prefix_len = 0;
1698
7.55M
    Py_ssize_t prefix_skip = 0;
1699
7.55M
    SRE_CODE* prefix = NULL;
1700
7.55M
    SRE_CODE* charset = NULL;
1701
7.55M
    SRE_CODE* overlap = NULL;
1702
7.55M
    int flags = 0;
1703
7.55M
    INIT_TRACE(state);
1704
1705
7.55M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.55M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.55M
        flags = pattern[2];
1713
1714
7.55M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
7.06k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
7.06k
                   end - ptr, (size_t) pattern[3]));
1717
7.06k
            return 0;
1718
7.06k
        }
1719
7.54M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.75M
            end -= pattern[3] - 1;
1723
2.75M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.75M
        }
1726
1727
7.54M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.75M
            prefix_len = pattern[5];
1731
2.75M
            prefix_skip = pattern[6];
1732
2.75M
            prefix = pattern + 7;
1733
2.75M
            overlap = prefix + prefix_len - 1;
1734
4.79M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.54M
            charset = pattern + 5;
1738
1739
7.54M
        pattern += 1 + pattern[1];
1740
7.54M
    }
1741
1742
7.54M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.54M
           prefix, prefix_len, prefix_skip));
1744
7.54M
    TRACE(("charset = %p\n", charset));
1745
1746
7.54M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.72M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.72M
        end = (SRE_CHAR *)state->end;
1754
2.72M
        state->must_advance = 0;
1755
3.00M
        while (ptr < end) {
1756
21.2M
            while (*ptr != c) {
1757
18.2M
                if (++ptr >= end)
1758
3.65k
                    return 0;
1759
18.2M
            }
1760
2.99M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.99M
            state->start = ptr;
1762
2.99M
            state->ptr = ptr + prefix_skip;
1763
2.99M
            if (flags & SRE_INFO_LITERAL)
1764
2.04k
                return 1; /* we got all of it */
1765
2.99M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.99M
            if (status != 0)
1767
2.71M
                return status;
1768
276k
            ++ptr;
1769
276k
            RESET_CAPTURE_GROUP();
1770
276k
        }
1771
931
        return 0;
1772
2.72M
    }
1773
1774
4.82M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
31.4k
        Py_ssize_t i = 0;
1778
1779
31.4k
        end = (SRE_CHAR *)state->end;
1780
31.4k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
255k
        while (ptr < end) {
1788
255k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.12M
            while (*ptr++ != c) {
1790
2.86M
                if (ptr >= end)
1791
132
                    return 0;
1792
2.86M
            }
1793
255k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
255k
            i = 1;
1797
255k
            state->must_advance = 0;
1798
256k
            do {
1799
256k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
245k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
245k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
245k
                    state->start = ptr - (prefix_len - 1);
1808
245k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
245k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
245k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
245k
                    if (status != 0)
1813
31.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
213k
                    if (++ptr >= end)
1816
4
                        return 0;
1817
213k
                    RESET_CAPTURE_GROUP();
1818
213k
                }
1819
225k
                i = overlap[i];
1820
225k
            } while (i != 0);
1821
255k
        }
1822
0
        return 0;
1823
31.4k
    }
1824
1825
4.79M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.54M
        end = (SRE_CHAR *)state->end;
1828
4.54M
        state->must_advance = 0;
1829
5.03M
        for (;;) {
1830
72.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
67.3M
                ptr++;
1832
5.03M
            if (ptr >= end)
1833
55.5k
                return 0;
1834
4.97M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.97M
            state->start = ptr;
1836
4.97M
            state->ptr = ptr;
1837
4.97M
            status = SRE(match)(state, pattern, 0);
1838
4.97M
            if (status != 0)
1839
4.49M
                break;
1840
482k
            ptr++;
1841
482k
            RESET_CAPTURE_GROUP();
1842
482k
        }
1843
4.54M
    } else {
1844
        /* general case */
1845
246k
        assert(ptr <= end);
1846
246k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
246k
        state->start = state->ptr = ptr;
1848
246k
        status = SRE(match)(state, pattern, 1);
1849
246k
        state->must_advance = 0;
1850
246k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
0
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
71.1M
        while (status == 0 && ptr < end) {
1858
70.9M
            ptr++;
1859
70.9M
            RESET_CAPTURE_GROUP();
1860
70.9M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
70.9M
            state->start = state->ptr = ptr;
1862
70.9M
            status = SRE(match)(state, pattern, 0);
1863
70.9M
        }
1864
246k
    }
1865
1866
4.73M
    return status;
1867
4.79M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/