Coverage Report

Created: 2025-08-24 07:03

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.9M
{
18
    /* check if pointer is at given position */
19
20
14.9M
    Py_ssize_t thisp, thatp;
21
22
14.9M
    switch (at) {
23
24
7.25M
    case SRE_AT_BEGINNING:
25
7.25M
    case SRE_AT_BEGINNING_STRING:
26
7.25M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.65M
    case SRE_AT_END:
33
4.65M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.65M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.65M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.02M
    case SRE_AT_END_STRING:
42
3.02M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.9M
    }
87
88
0
    return 0;
89
14.9M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
13.5M
{
18
    /* check if pointer is at given position */
19
20
13.5M
    Py_ssize_t thisp, thatp;
21
22
13.5M
    switch (at) {
23
24
7.20M
    case SRE_AT_BEGINNING:
25
7.20M
    case SRE_AT_BEGINNING_STRING:
26
7.20M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.25M
    case SRE_AT_END:
33
4.25M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.25M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.25M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.11M
    case SRE_AT_END_STRING:
42
2.11M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.5M
    }
87
88
0
    return 0;
89
13.5M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
791k
{
18
    /* check if pointer is at given position */
19
20
791k
    Py_ssize_t thisp, thatp;
21
22
791k
    switch (at) {
23
24
46.7k
    case SRE_AT_BEGINNING:
25
46.7k
    case SRE_AT_BEGINNING_STRING:
26
46.7k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
320k
    case SRE_AT_END:
33
320k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
320k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
320k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
424k
    case SRE_AT_END_STRING:
42
424k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
791k
    }
87
88
0
    return 0;
89
791k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
586k
{
18
    /* check if pointer is at given position */
19
20
586k
    Py_ssize_t thisp, thatp;
21
22
586k
    switch (at) {
23
24
8.41k
    case SRE_AT_BEGINNING:
25
8.41k
    case SRE_AT_BEGINNING_STRING:
26
8.41k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
85.8k
    case SRE_AT_END:
33
85.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
85.8k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
85.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
492k
    case SRE_AT_END_STRING:
42
492k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
586k
    }
87
88
0
    return 0;
89
586k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.76G
{
94
    /* check if character is a member of the given set */
95
96
1.76G
    int ok = 1;
97
98
3.93G
    for (;;) {
99
3.93G
        switch (*set++) {
100
101
1.15G
        case SRE_OP_FAILURE:
102
1.15G
            return !ok;
103
104
1.14G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.14G
            if (ch == set[0])
107
5.40M
                return ok;
108
1.13G
            set++;
109
1.13G
            break;
110
111
11.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.5M
            if (sre_category(set[0], (int) ch))
114
7.82M
                return ok;
115
3.73M
            set++;
116
3.73M
            break;
117
118
913M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
913M
            if (ch < 256 &&
121
913M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
396M
                return ok;
123
517M
            set += 256/SRE_CODE_BITS;
124
517M
            break;
125
126
331M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
331M
            if (set[0] <= ch && ch <= set[1])
129
196M
                return ok;
130
134M
            set += 2;
131
134M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
378M
        case SRE_OP_NEGATE:
148
378M
            ok = !ok;
149
378M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.93G
        }
175
3.93G
    }
176
1.76G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
338M
{
94
    /* check if character is a member of the given set */
95
96
338M
    int ok = 1;
97
98
706M
    for (;;) {
99
706M
        switch (*set++) {
100
101
188M
        case SRE_OP_FAILURE:
102
188M
            return !ok;
103
104
230M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
230M
            if (ch == set[0])
107
2.96M
                return ok;
108
227M
            set++;
109
227M
            break;
110
111
10.7M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.7M
            if (sre_category(set[0], (int) ch))
114
7.00M
                return ok;
115
3.72M
            set++;
116
3.72M
            break;
117
118
84.4M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
84.4M
            if (ch < 256 &&
121
84.4M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
44.9M
                return ok;
123
39.5M
            set += 256/SRE_CODE_BITS;
124
39.5M
            break;
125
126
157M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
157M
            if (set[0] <= ch && ch <= set[1])
129
95.5M
                return ok;
130
62.2M
            set += 2;
131
62.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
35.1M
        case SRE_OP_NEGATE:
148
35.1M
            ok = !ok;
149
35.1M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
706M
        }
175
706M
    }
176
338M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
793M
{
94
    /* check if character is a member of the given set */
95
96
793M
    int ok = 1;
97
98
1.86G
    for (;;) {
99
1.86G
        switch (*set++) {
100
101
567M
        case SRE_OP_FAILURE:
102
567M
            return !ok;
103
104
628M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
628M
            if (ch == set[0])
107
1.42M
                return ok;
108
627M
            set++;
109
627M
            break;
110
111
176k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
176k
            if (sre_category(set[0], (int) ch))
114
169k
                return ok;
115
6.73k
            set++;
116
6.73k
            break;
117
118
357M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
357M
            if (ch < 256 &&
121
357M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
133M
                return ok;
123
224M
            set += 256/SRE_CODE_BITS;
124
224M
            break;
125
126
150M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
150M
            if (set[0] <= ch && ch <= set[1])
129
91.6M
                return ok;
130
59.2M
            set += 2;
131
59.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
164M
        case SRE_OP_NEGATE:
148
164M
            ok = !ok;
149
164M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.86G
        }
175
1.86G
    }
176
793M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
629M
{
94
    /* check if character is a member of the given set */
95
96
629M
    int ok = 1;
97
98
1.35G
    for (;;) {
99
1.35G
        switch (*set++) {
100
101
400M
        case SRE_OP_FAILURE:
102
400M
            return !ok;
103
104
281M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
281M
            if (ch == set[0])
107
1.00M
                return ok;
108
280M
            set++;
109
280M
            break;
110
111
651k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
651k
            if (sre_category(set[0], (int) ch))
114
650k
                return ok;
115
855
            set++;
116
855
            break;
117
118
471M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
471M
            if (ch < 256 &&
121
471M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
218M
                return ok;
123
253M
            set += 256/SRE_CODE_BITS;
124
253M
            break;
125
126
23.2M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
23.2M
            if (set[0] <= ch && ch <= set[1])
129
9.82M
                return ok;
130
13.3M
            set += 2;
131
13.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
178M
        case SRE_OP_NEGATE:
148
178M
            ok = !ok;
149
178M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.35G
        }
175
1.35G
    }
176
629M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
706M
{
195
706M
    SRE_CODE chr;
196
706M
    SRE_CHAR c;
197
706M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
706M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
706M
    Py_ssize_t i;
200
706M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
706M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.5M
        end = ptr + maxcount;
205
206
706M
    switch (pattern[0]) {
207
208
624M
    case SRE_OP_IN:
209
        /* repeated set */
210
624M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
998M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
373M
            ptr++;
213
624M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
74.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
74.2M
        chr = pattern[1];
232
74.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
74.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
71.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
71.3M
        else
238
71.3M
#endif
239
78.9M
        while (ptr < end && *ptr == c)
240
4.68M
            ptr++;
241
74.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
7.48M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
7.48M
        chr = pattern[1];
270
7.48M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
7.48M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
3.84M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.84M
        else
276
3.84M
#endif
277
42.1M
        while (ptr < end && *ptr != c)
278
34.6M
            ptr++;
279
7.48M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
706M
    }
319
320
706M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
706M
           ptr - (SRE_CHAR*) state->ptr));
322
706M
    return ptr - (SRE_CHAR*) state->ptr;
323
706M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
172M
{
195
172M
    SRE_CODE chr;
196
172M
    SRE_CHAR c;
197
172M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
172M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
172M
    Py_ssize_t i;
200
172M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
172M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
3.49M
        end = ptr + maxcount;
205
206
172M
    switch (pattern[0]) {
207
208
107M
    case SRE_OP_IN:
209
        /* repeated set */
210
107M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
218M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
110M
            ptr++;
213
107M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
65.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
65.2M
        chr = pattern[1];
232
65.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
65.2M
        c = (SRE_CHAR) chr;
234
65.2M
#if SIZEOF_SRE_CHAR < 4
235
65.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
65.2M
        else
238
65.2M
#endif
239
67.5M
        while (ptr < end && *ptr == c)
240
2.34M
            ptr++;
241
65.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
169k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
169k
        chr = pattern[1];
270
169k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
169k
        c = (SRE_CHAR) chr;
272
169k
#if SIZEOF_SRE_CHAR < 4
273
169k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
169k
        else
276
169k
#endif
277
5.89M
        while (ptr < end && *ptr != c)
278
5.72M
            ptr++;
279
169k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
172M
    }
319
320
172M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
172M
           ptr - (SRE_CHAR*) state->ptr));
322
172M
    return ptr - (SRE_CHAR*) state->ptr;
323
172M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
308M
{
195
308M
    SRE_CODE chr;
196
308M
    SRE_CHAR c;
197
308M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
308M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
308M
    Py_ssize_t i;
200
308M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
308M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.08M
        end = ptr + maxcount;
205
206
308M
    switch (pattern[0]) {
207
208
298M
    case SRE_OP_IN:
209
        /* repeated set */
210
298M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
424M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
126M
            ptr++;
213
298M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.17M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.17M
        chr = pattern[1];
232
6.17M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.17M
        c = (SRE_CHAR) chr;
234
6.17M
#if SIZEOF_SRE_CHAR < 4
235
6.17M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.17M
        else
238
6.17M
#endif
239
8.27M
        while (ptr < end && *ptr == c)
240
2.10M
            ptr++;
241
6.17M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.67M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.67M
        chr = pattern[1];
270
3.67M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.67M
        c = (SRE_CHAR) chr;
272
3.67M
#if SIZEOF_SRE_CHAR < 4
273
3.67M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.67M
        else
276
3.67M
#endif
277
11.5M
        while (ptr < end && *ptr != c)
278
7.91M
            ptr++;
279
3.67M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
308M
    }
319
320
308M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
308M
           ptr - (SRE_CHAR*) state->ptr));
322
308M
    return ptr - (SRE_CHAR*) state->ptr;
323
308M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
225M
{
195
225M
    SRE_CODE chr;
196
225M
    SRE_CHAR c;
197
225M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
225M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
225M
    Py_ssize_t i;
200
225M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
225M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.92M
        end = ptr + maxcount;
205
206
225M
    switch (pattern[0]) {
207
208
219M
    case SRE_OP_IN:
209
        /* repeated set */
210
219M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
355M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
136M
            ptr++;
213
219M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.87M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.87M
        chr = pattern[1];
232
2.87M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.87M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.10M
        while (ptr < end && *ptr == c)
240
235k
            ptr++;
241
2.87M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.63M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.63M
        chr = pattern[1];
270
3.63M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.63M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.6M
        while (ptr < end && *ptr != c)
278
21.0M
            ptr++;
279
3.63M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
225M
    }
319
320
225M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
225M
           ptr - (SRE_CHAR*) state->ptr));
322
225M
    return ptr - (SRE_CHAR*) state->ptr;
323
225M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
782M
    do { \
355
782M
        ctx->lastmark = state->lastmark; \
356
782M
        ctx->lastindex = state->lastindex; \
357
782M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
262M
    do { \
360
262M
        state->lastmark = ctx->lastmark; \
361
262M
        state->lastindex = ctx->lastindex; \
362
262M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
266M
    do { \
366
266M
        TRACE(("push last_ptr: %zd", \
367
266M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
266M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
266M
    } while (0)
370
#define LAST_PTR_POP()  \
371
266M
    do { \
372
266M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
266M
        TRACE(("pop last_ptr: %zd", \
374
266M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
266M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
662M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
974M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.53G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
145M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
97.4M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.63G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.63G
do { \
390
1.63G
    alloc_pos = state->data_stack_base; \
391
1.63G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.63G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.63G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
171M
        int j = data_stack_grow(state, sizeof(type)); \
395
171M
        if (j < 0) return j; \
396
171M
        if (ctx_pos != -1) \
397
171M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
171M
    } \
399
1.63G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.63G
    state->data_stack_base += sizeof(type); \
401
1.63G
} while (0)
402
403
1.79G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.79G
do { \
405
1.79G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.79G
    ptr = (type*)(state->data_stack+pos); \
407
1.79G
} while (0)
408
409
658M
#define DATA_STACK_PUSH(state, data, size) \
410
658M
do { \
411
658M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
658M
           data, state->data_stack_base, size)); \
413
658M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
91.0k
        int j = data_stack_grow(state, size); \
415
91.0k
        if (j < 0) return j; \
416
91.0k
        if (ctx_pos != -1) \
417
91.0k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
91.0k
    } \
419
658M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
658M
    state->data_stack_base += size; \
421
658M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
398M
#define DATA_STACK_POP(state, data, size, discard) \
427
398M
do { \
428
398M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
398M
           data, state->data_stack_base-size, size)); \
430
398M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
398M
    if (discard) \
432
398M
        state->data_stack_base -= size; \
433
398M
} while (0)
434
435
1.89G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.89G
do { \
437
1.89G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.89G
           state->data_stack_base-size, size)); \
439
1.89G
    state->data_stack_base -= size; \
440
1.89G
} while(0)
441
442
#define DATA_PUSH(x) \
443
266M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
266M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.63G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.63G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.79G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
651M
    do if (lastmark >= 0) { \
473
392M
        MARK_TRACE("push", (lastmark)); \
474
392M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
392M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
651M
    } while (0)
477
#define MARK_POP(lastmark) \
478
176M
    do if (lastmark >= 0) { \
479
130M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
130M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
130M
        MARK_TRACE("pop", (lastmark)); \
482
176M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.03M
    do if (lastmark >= 0) { \
485
2.03M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.03M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.03M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.03M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
475M
    do if (lastmark >= 0) { \
491
262M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
262M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
262M
        MARK_TRACE("pop discard", (lastmark)); \
494
475M
    } while (0)
495
496
546M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
266M
#define JUMP_MAX_UNTIL_2     2
499
145M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
144M
#define JUMP_REPEAT          7
504
18.0M
#define JUMP_REPEAT_ONE_1    8
505
191M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
182M
#define JUMP_BRANCH          11
508
97.4M
#define JUMP_ASSERT          12
509
43.6M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.09G
    ctx->pattern = pattern; \
516
1.09G
    ctx->ptr = ptr; \
517
1.09G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.09G
    nextctx->pattern = nextpattern; \
519
1.09G
    nextctx->toplevel = toplevel_; \
520
1.09G
    nextctx->jump = jumpvalue; \
521
1.09G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.09G
    pattern = nextpattern; \
523
1.09G
    ctx_pos = alloc_pos; \
524
1.09G
    ctx = nextctx; \
525
1.09G
    goto entrance; \
526
1.09G
    jumplabel: \
527
1.09G
    pattern = ctx->pattern; \
528
1.09G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
949M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
141M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.86G
    do {                                                           \
553
2.86G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.86G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.86G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.96G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.86G
        do {                               \
588
2.86G
            MAYBE_CHECK_SIGNALS;           \
589
2.86G
            goto *sre_targets[*pattern++]; \
590
2.86G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
546M
{
601
546M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
546M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
546M
    Py_ssize_t ret = 0;
604
546M
    int jump;
605
546M
    unsigned int sigcount = state->sigcount;
606
607
546M
    SRE(match_context)* ctx;
608
546M
    SRE(match_context)* nextctx;
609
546M
    INIT_TRACE(state);
610
611
546M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
546M
    DATA_ALLOC(SRE(match_context), ctx);
614
546M
    ctx->last_ctx_pos = -1;
615
546M
    ctx->jump = JUMP_NONE;
616
546M
    ctx->toplevel = toplevel;
617
546M
    ctx_pos = alloc_pos;
618
619
546M
#if USE_COMPUTED_GOTOS
620
546M
#include "sre_targets.h"
621
546M
#endif
622
623
1.63G
entrance:
624
625
1.63G
    ;  // Fashion statement.
626
1.63G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.63G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
89.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
7.74M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
7.74M
                   end - ptr, (size_t) pattern[3]));
634
7.74M
            RETURN_FAILURE;
635
7.74M
        }
636
82.2M
        pattern += pattern[1] + 1;
637
82.2M
    }
638
639
1.62G
#if USE_COMPUTED_GOTOS
640
1.62G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.62G
    {
647
648
1.62G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
577M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
577M
                   ptr, pattern[0]));
653
577M
            {
654
577M
                int i = pattern[0];
655
577M
                if (i & 1)
656
82.4M
                    state->lastindex = i/2 + 1;
657
577M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
569M
                    int j = state->lastmark + 1;
663
577M
                    while (j < i)
664
7.67M
                        state->mark[j++] = NULL;
665
569M
                    state->lastmark = i;
666
569M
                }
667
577M
                state->mark[i] = ptr;
668
577M
            }
669
577M
            pattern++;
670
577M
            DISPATCH;
671
672
577M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
163M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
163M
                   ptr, *pattern));
677
163M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
57.5M
                RETURN_FAILURE;
679
105M
            pattern++;
680
105M
            ptr++;
681
105M
            DISPATCH;
682
683
105M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
244M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
244M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
244M
            if (ctx->toplevel &&
698
244M
                ((state->match_all && ptr != state->end) ||
699
70.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
244M
            state->ptr = ptr;
704
244M
            RETURN_SUCCESS;
705
706
14.9M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.9M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.9M
            if (!SRE(at)(state, ptr, *pattern))
711
4.12M
                RETURN_FAILURE;
712
10.8M
            pattern++;
713
10.8M
            DISPATCH;
714
715
10.8M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
301M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
301M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
301M
            if (ptr >= end ||
749
301M
                !SRE(charset)(state, pattern + 1, *ptr))
750
7.57M
                RETURN_FAILURE;
751
294M
            pattern += pattern[0];
752
294M
            ptr++;
753
294M
            DISPATCH;
754
755
294M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.13M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.13M
                   pattern, ptr, pattern[0]));
758
6.13M
            if (ptr >= end ||
759
6.13M
                sre_lower_ascii(*ptr) != *pattern)
760
349k
                RETURN_FAILURE;
761
5.78M
            pattern++;
762
5.78M
            ptr++;
763
5.78M
            DISPATCH;
764
765
5.78M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
104M
        TARGET(SRE_OP_JUMP):
845
104M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
104M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
104M
                   ptr, pattern[0]));
850
104M
            pattern += pattern[0];
851
104M
            DISPATCH;
852
853
179M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
179M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
179M
            LASTMARK_SAVE();
858
179M
            if (state->repeat)
859
127M
                MARK_PUSH(ctx->lastmark);
860
442M
            for (; pattern[0]; pattern += pattern[0]) {
861
363M
                if (pattern[1] == SRE_OP_LITERAL &&
862
363M
                    (ptr >= end ||
863
169M
                     (SRE_CODE) *ptr != pattern[2]))
864
90.2M
                    continue;
865
273M
                if (pattern[1] == SRE_OP_IN &&
866
273M
                    (ptr >= end ||
867
120M
                     !SRE(charset)(state, pattern + 3,
868
120M
                                   (SRE_CODE) *ptr)))
869
90.3M
                    continue;
870
182M
                state->ptr = ptr;
871
182M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
182M
                if (ret) {
873
101M
                    if (state->repeat)
874
81.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
101M
                    RETURN_ON_ERROR(ret);
876
101M
                    RETURN_SUCCESS;
877
101M
                }
878
81.8M
                if (state->repeat)
879
31.7k
                    MARK_POP_KEEP(ctx->lastmark);
880
81.8M
                LASTMARK_RESTORE();
881
81.8M
            }
882
78.5M
            if (state->repeat)
883
46.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
78.5M
            RETURN_FAILURE;
885
886
707M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
707M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
707M
                   pattern[1], pattern[2]));
898
899
707M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.13M
                RETURN_FAILURE; /* cannot match */
901
902
706M
            state->ptr = ptr;
903
904
706M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
706M
            RETURN_ON_ERROR(ret);
906
706M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
706M
            ctx->count = ret;
908
706M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
706M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
413M
                RETURN_FAILURE;
917
918
293M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
293M
                ptr == state->end &&
920
293M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
76.1k
            {
922
                /* tail is empty.  we're finished */
923
76.1k
                state->ptr = ptr;
924
76.1k
                RETURN_SUCCESS;
925
76.1k
            }
926
927
293M
            LASTMARK_SAVE();
928
293M
            if (state->repeat)
929
214M
                MARK_PUSH(ctx->lastmark);
930
931
293M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
103M
                ctx->u.chr = pattern[pattern[0]+1];
935
103M
                for (;;) {
936
235M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
235M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
131M
                        ptr--;
939
131M
                        ctx->count--;
940
131M
                    }
941
103M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
85.7M
                        break;
943
18.0M
                    state->ptr = ptr;
944
18.0M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
18.0M
                            pattern+pattern[0]);
946
18.0M
                    if (ret) {
947
18.0M
                        if (state->repeat)
948
16.1M
                            MARK_POP_DISCARD(ctx->lastmark);
949
18.0M
                        RETURN_ON_ERROR(ret);
950
18.0M
                        RETURN_SUCCESS;
951
18.0M
                    }
952
551
                    if (state->repeat)
953
551
                        MARK_POP_KEEP(ctx->lastmark);
954
551
                    LASTMARK_RESTORE();
955
956
551
                    ptr--;
957
551
                    ctx->count--;
958
551
                }
959
85.7M
                if (state->repeat)
960
84.0M
                    MARK_POP_DISCARD(ctx->lastmark);
961
189M
            } else {
962
                /* general case */
963
193M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
191M
                    state->ptr = ptr;
965
191M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
191M
                            pattern+pattern[0]);
967
191M
                    if (ret) {
968
188M
                        if (state->repeat)
969
113M
                            MARK_POP_DISCARD(ctx->lastmark);
970
188M
                        RETURN_ON_ERROR(ret);
971
188M
                        RETURN_SUCCESS;
972
188M
                    }
973
3.58M
                    if (state->repeat)
974
2.00M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.58M
                    LASTMARK_RESTORE();
976
977
3.58M
                    ptr--;
978
3.58M
                    ctx->count--;
979
3.58M
                }
980
1.24M
                if (state->repeat)
981
1.04M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.24M
            }
983
86.9M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
144M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
144M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
144M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
144M
            ctx->u.rep = repeat_pool_malloc(state);
1127
144M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
144M
            ctx->u.rep->count = -1;
1131
144M
            ctx->u.rep->pattern = pattern;
1132
144M
            ctx->u.rep->prev = state->repeat;
1133
144M
            ctx->u.rep->last_ptr = NULL;
1134
144M
            state->repeat = ctx->u.rep;
1135
1136
144M
            state->ptr = ptr;
1137
144M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
144M
            state->repeat = ctx->u.rep->prev;
1139
144M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
144M
            if (ret) {
1142
144M
                RETURN_ON_ERROR(ret);
1143
144M
                RETURN_SUCCESS;
1144
144M
            }
1145
101k
            RETURN_FAILURE;
1146
1147
278M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
278M
            ctx->u.rep = state->repeat;
1155
278M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
278M
            state->ptr = ptr;
1159
1160
278M
            ctx->count = ctx->u.rep->count+1;
1161
1162
278M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
278M
                   ptr, ctx->count));
1164
1165
278M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
278M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
278M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
278M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
266M
                ctx->u.rep->count = ctx->count;
1185
266M
                LASTMARK_SAVE();
1186
266M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
266M
                LAST_PTR_PUSH();
1189
266M
                ctx->u.rep->last_ptr = state->ptr;
1190
266M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
266M
                        ctx->u.rep->pattern+3);
1192
266M
                LAST_PTR_POP();
1193
266M
                if (ret) {
1194
132M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
132M
                    RETURN_ON_ERROR(ret);
1196
132M
                    RETURN_SUCCESS;
1197
132M
                }
1198
133M
                MARK_POP(ctx->lastmark);
1199
133M
                LASTMARK_RESTORE();
1200
133M
                ctx->u.rep->count = ctx->count-1;
1201
133M
                state->ptr = ptr;
1202
133M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
145M
            state->repeat = ctx->u.rep->prev;
1207
145M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
145M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
145M
            RETURN_ON_SUCCESS(ret);
1211
1.10M
            state->ptr = ptr;
1212
1.10M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
97.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
97.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
97.4M
                   ptr, pattern[1]));
1565
97.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
97.4M
            state->ptr = ptr - pattern[1];
1568
97.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
97.4M
            RETURN_ON_FAILURE(ret);
1570
93.2M
            pattern += pattern[0];
1571
93.2M
            DISPATCH;
1572
1573
93.2M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
43.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
43.6M
                   ptr, pattern[1]));
1578
43.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
43.6M
                state->ptr = ptr - pattern[1];
1580
43.6M
                LASTMARK_SAVE();
1581
43.6M
                if (state->repeat)
1582
43.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
87.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
87.2M
                if (ret) {
1586
25.5k
                    if (state->repeat)
1587
25.5k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
25.5k
                    RETURN_ON_ERROR(ret);
1589
25.5k
                    RETURN_FAILURE;
1590
25.5k
                }
1591
43.5M
                if (state->repeat)
1592
43.5M
                    MARK_POP(ctx->lastmark);
1593
43.5M
                LASTMARK_RESTORE();
1594
43.5M
            }
1595
43.5M
            pattern += pattern[0];
1596
43.5M
            DISPATCH;
1597
1598
43.5M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.63G
exit:
1620
1.63G
    ctx_pos = ctx->last_ctx_pos;
1621
1.63G
    jump = ctx->jump;
1622
1.63G
    DATA_POP_DISCARD(ctx);
1623
1.63G
    if (ctx_pos == -1) {
1624
546M
        state->sigcount = sigcount;
1625
546M
        return ret;
1626
546M
    }
1627
1.09G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.09G
    switch (jump) {
1630
266M
        case JUMP_MAX_UNTIL_2:
1631
266M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
266M
            goto jump_max_until_2;
1633
145M
        case JUMP_MAX_UNTIL_3:
1634
145M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
145M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
182M
        case JUMP_BRANCH:
1643
182M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
182M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
144M
        case JUMP_REPEAT:
1658
144M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
144M
            goto jump_repeat;
1660
18.0M
        case JUMP_REPEAT_ONE_1:
1661
18.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
18.0M
            goto jump_repeat_one_1;
1663
191M
        case JUMP_REPEAT_ONE_2:
1664
191M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
191M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
97.4M
        case JUMP_ASSERT:
1673
97.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
97.4M
            goto jump_assert;
1675
43.6M
        case JUMP_ASSERT_NOT:
1676
43.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
43.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.09G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.09G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
172M
{
601
172M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
172M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
172M
    Py_ssize_t ret = 0;
604
172M
    int jump;
605
172M
    unsigned int sigcount = state->sigcount;
606
607
172M
    SRE(match_context)* ctx;
608
172M
    SRE(match_context)* nextctx;
609
172M
    INIT_TRACE(state);
610
611
172M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
172M
    DATA_ALLOC(SRE(match_context), ctx);
614
172M
    ctx->last_ctx_pos = -1;
615
172M
    ctx->jump = JUMP_NONE;
616
172M
    ctx->toplevel = toplevel;
617
172M
    ctx_pos = alloc_pos;
618
619
172M
#if USE_COMPUTED_GOTOS
620
172M
#include "sre_targets.h"
621
172M
#endif
622
623
333M
entrance:
624
625
333M
    ;  // Fashion statement.
626
333M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
333M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
33.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
7.74M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
7.74M
                   end - ptr, (size_t) pattern[3]));
634
7.74M
            RETURN_FAILURE;
635
7.74M
        }
636
25.9M
        pattern += pattern[1] + 1;
637
25.9M
    }
638
639
325M
#if USE_COMPUTED_GOTOS
640
325M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
325M
    {
647
648
325M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
148M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
148M
                   ptr, pattern[0]));
653
148M
            {
654
148M
                int i = pattern[0];
655
148M
                if (i & 1)
656
20.5M
                    state->lastindex = i/2 + 1;
657
148M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
145M
                    int j = state->lastmark + 1;
663
148M
                    while (j < i)
664
3.71M
                        state->mark[j++] = NULL;
665
145M
                    state->lastmark = i;
666
145M
                }
667
148M
                state->mark[i] = ptr;
668
148M
            }
669
148M
            pattern++;
670
148M
            DISPATCH;
671
672
148M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
56.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
56.2M
                   ptr, *pattern));
677
56.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
18.2M
                RETURN_FAILURE;
679
37.9M
            pattern++;
680
37.9M
            ptr++;
681
37.9M
            DISPATCH;
682
683
37.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
49.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
49.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
49.5M
            if (ctx->toplevel &&
698
49.5M
                ((state->match_all && ptr != state->end) ||
699
18.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
49.5M
            state->ptr = ptr;
704
49.5M
            RETURN_SUCCESS;
705
706
13.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.5M
            if (!SRE(at)(state, ptr, *pattern))
711
2.80M
                RETURN_FAILURE;
712
10.7M
            pattern++;
713
10.7M
            DISPATCH;
714
715
10.7M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
34.7M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
34.7M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
34.7M
            if (ptr >= end ||
749
34.7M
                !SRE(charset)(state, pattern + 1, *ptr))
750
366k
                RETURN_FAILURE;
751
34.3M
            pattern += pattern[0];
752
34.3M
            ptr++;
753
34.3M
            DISPATCH;
754
755
34.3M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
989k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
989k
                   pattern, ptr, pattern[0]));
758
989k
            if (ptr >= end ||
759
989k
                sre_lower_ascii(*ptr) != *pattern)
760
169k
                RETURN_FAILURE;
761
819k
            pattern++;
762
819k
            ptr++;
763
819k
            DISPATCH;
764
765
819k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
26.8M
        TARGET(SRE_OP_JUMP):
845
26.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
26.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
26.8M
                   ptr, pattern[0]));
850
26.8M
            pattern += pattern[0];
851
26.8M
            DISPATCH;
852
853
54.7M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
54.7M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
54.7M
            LASTMARK_SAVE();
858
54.7M
            if (state->repeat)
859
9.61M
                MARK_PUSH(ctx->lastmark);
860
166M
            for (; pattern[0]; pattern += pattern[0]) {
861
136M
                if (pattern[1] == SRE_OP_LITERAL &&
862
136M
                    (ptr >= end ||
863
60.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
28.6M
                    continue;
865
108M
                if (pattern[1] == SRE_OP_IN &&
866
108M
                    (ptr >= end ||
867
11.0M
                     !SRE(charset)(state, pattern + 3,
868
11.0M
                                   (SRE_CODE) *ptr)))
869
6.05M
                    continue;
870
101M
                state->ptr = ptr;
871
101M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
101M
                if (ret) {
873
24.6M
                    if (state->repeat)
874
9.40M
                        MARK_POP_DISCARD(ctx->lastmark);
875
24.6M
                    RETURN_ON_ERROR(ret);
876
24.6M
                    RETURN_SUCCESS;
877
24.6M
                }
878
77.2M
                if (state->repeat)
879
6.72k
                    MARK_POP_KEEP(ctx->lastmark);
880
77.2M
                LASTMARK_RESTORE();
881
77.2M
            }
882
30.0M
            if (state->repeat)
883
210k
                MARK_POP_DISCARD(ctx->lastmark);
884
30.0M
            RETURN_FAILURE;
885
886
173M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
173M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
173M
                   pattern[1], pattern[2]));
898
899
173M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
938k
                RETURN_FAILURE; /* cannot match */
901
902
172M
            state->ptr = ptr;
903
904
172M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
172M
            RETURN_ON_ERROR(ret);
906
172M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
172M
            ctx->count = ret;
908
172M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
172M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
146M
                RETURN_FAILURE;
917
918
26.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
26.4M
                ptr == state->end &&
920
26.4M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
56.2k
            {
922
                /* tail is empty.  we're finished */
923
56.2k
                state->ptr = ptr;
924
56.2k
                RETURN_SUCCESS;
925
56.2k
            }
926
927
26.4M
            LASTMARK_SAVE();
928
26.4M
            if (state->repeat)
929
12.0M
                MARK_PUSH(ctx->lastmark);
930
931
26.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.36M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.36M
                for (;;) {
936
17.3M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
17.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.9M
                        ptr--;
939
11.9M
                        ctx->count--;
940
11.9M
                    }
941
5.36M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.14M
                        break;
943
2.22M
                    state->ptr = ptr;
944
2.22M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.22M
                            pattern+pattern[0]);
946
2.22M
                    if (ret) {
947
2.22M
                        if (state->repeat)
948
403k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.22M
                        RETURN_ON_ERROR(ret);
950
2.22M
                        RETURN_SUCCESS;
951
2.22M
                    }
952
111
                    if (state->repeat)
953
111
                        MARK_POP_KEEP(ctx->lastmark);
954
111
                    LASTMARK_RESTORE();
955
956
111
                    ptr--;
957
111
                    ctx->count--;
958
111
                }
959
3.14M
                if (state->repeat)
960
1.51M
                    MARK_POP_DISCARD(ctx->lastmark);
961
21.0M
            } else {
962
                /* general case */
963
23.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
22.1M
                    state->ptr = ptr;
965
22.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
22.1M
                            pattern+pattern[0]);
967
22.1M
                    if (ret) {
968
20.2M
                        if (state->repeat)
969
9.46M
                            MARK_POP_DISCARD(ctx->lastmark);
970
20.2M
                        RETURN_ON_ERROR(ret);
971
20.2M
                        RETURN_SUCCESS;
972
20.2M
                    }
973
1.95M
                    if (state->repeat)
974
1.19M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.95M
                    LASTMARK_RESTORE();
976
977
1.95M
                    ptr--;
978
1.95M
                    ctx->count--;
979
1.95M
                }
980
839k
                if (state->repeat)
981
644k
                    MARK_POP_DISCARD(ctx->lastmark);
982
839k
            }
983
3.98M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
5.65M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
5.65M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
5.65M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
5.65M
            ctx->u.rep = repeat_pool_malloc(state);
1127
5.65M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
5.65M
            ctx->u.rep->count = -1;
1131
5.65M
            ctx->u.rep->pattern = pattern;
1132
5.65M
            ctx->u.rep->prev = state->repeat;
1133
5.65M
            ctx->u.rep->last_ptr = NULL;
1134
5.65M
            state->repeat = ctx->u.rep;
1135
1136
5.65M
            state->ptr = ptr;
1137
5.65M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
5.65M
            state->repeat = ctx->u.rep->prev;
1139
5.65M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
5.65M
            if (ret) {
1142
5.55M
                RETURN_ON_ERROR(ret);
1143
5.55M
                RETURN_SUCCESS;
1144
5.55M
            }
1145
100k
            RETURN_FAILURE;
1146
1147
20.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
20.2M
            ctx->u.rep = state->repeat;
1155
20.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
20.2M
            state->ptr = ptr;
1159
1160
20.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
20.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
20.2M
                   ptr, ctx->count));
1164
1165
20.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
20.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
20.2M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
20.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
16.3M
                ctx->u.rep->count = ctx->count;
1185
16.3M
                LASTMARK_SAVE();
1186
16.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
16.3M
                LAST_PTR_PUSH();
1189
16.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
16.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
16.3M
                        ctx->u.rep->pattern+3);
1192
16.3M
                LAST_PTR_POP();
1193
16.3M
                if (ret) {
1194
13.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
13.9M
                    RETURN_ON_ERROR(ret);
1196
13.9M
                    RETURN_SUCCESS;
1197
13.9M
                }
1198
2.37M
                MARK_POP(ctx->lastmark);
1199
2.37M
                LASTMARK_RESTORE();
1200
2.37M
                ctx->u.rep->count = ctx->count-1;
1201
2.37M
                state->ptr = ptr;
1202
2.37M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.26M
            state->repeat = ctx->u.rep->prev;
1207
6.26M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.26M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.26M
            RETURN_ON_SUCCESS(ret);
1211
702k
            state->ptr = ptr;
1212
702k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.81M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.81M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.81M
                   ptr, pattern[1]));
1565
1.81M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.81M
            state->ptr = ptr - pattern[1];
1568
1.81M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.81M
            RETURN_ON_FAILURE(ret);
1570
1.76M
            pattern += pattern[0];
1571
1.76M
            DISPATCH;
1572
1573
4.44M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
4.44M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
4.44M
                   ptr, pattern[1]));
1578
4.44M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
4.44M
                state->ptr = ptr - pattern[1];
1580
4.44M
                LASTMARK_SAVE();
1581
4.44M
                if (state->repeat)
1582
4.44M
                    MARK_PUSH(ctx->lastmark);
1583
1584
8.88M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
8.88M
                if (ret) {
1586
1.07k
                    if (state->repeat)
1587
1.07k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.07k
                    RETURN_ON_ERROR(ret);
1589
1.07k
                    RETURN_FAILURE;
1590
1.07k
                }
1591
4.44M
                if (state->repeat)
1592
4.44M
                    MARK_POP(ctx->lastmark);
1593
4.44M
                LASTMARK_RESTORE();
1594
4.44M
            }
1595
4.44M
            pattern += pattern[0];
1596
4.44M
            DISPATCH;
1597
1598
4.44M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
333M
exit:
1620
333M
    ctx_pos = ctx->last_ctx_pos;
1621
333M
    jump = ctx->jump;
1622
333M
    DATA_POP_DISCARD(ctx);
1623
333M
    if (ctx_pos == -1) {
1624
172M
        state->sigcount = sigcount;
1625
172M
        return ret;
1626
172M
    }
1627
160M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
160M
    switch (jump) {
1630
16.3M
        case JUMP_MAX_UNTIL_2:
1631
16.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
16.3M
            goto jump_max_until_2;
1633
6.26M
        case JUMP_MAX_UNTIL_3:
1634
6.26M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.26M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
101M
        case JUMP_BRANCH:
1643
101M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
101M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
5.65M
        case JUMP_REPEAT:
1658
5.65M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
5.65M
            goto jump_repeat;
1660
2.22M
        case JUMP_REPEAT_ONE_1:
1661
2.22M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.22M
            goto jump_repeat_one_1;
1663
22.1M
        case JUMP_REPEAT_ONE_2:
1664
22.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
22.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.81M
        case JUMP_ASSERT:
1673
1.81M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.81M
            goto jump_assert;
1675
4.44M
        case JUMP_ASSERT_NOT:
1676
4.44M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
4.44M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
160M
    }
1683
1684
0
    return ret; /* should never get here */
1685
160M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
265M
{
601
265M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
265M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
265M
    Py_ssize_t ret = 0;
604
265M
    int jump;
605
265M
    unsigned int sigcount = state->sigcount;
606
607
265M
    SRE(match_context)* ctx;
608
265M
    SRE(match_context)* nextctx;
609
265M
    INIT_TRACE(state);
610
611
265M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
265M
    DATA_ALLOC(SRE(match_context), ctx);
614
265M
    ctx->last_ctx_pos = -1;
615
265M
    ctx->jump = JUMP_NONE;
616
265M
    ctx->toplevel = toplevel;
617
265M
    ctx_pos = alloc_pos;
618
619
265M
#if USE_COMPUTED_GOTOS
620
265M
#include "sre_targets.h"
621
265M
#endif
622
623
664M
entrance:
624
625
664M
    ;  // Fashion statement.
626
664M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
664M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
28.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
709
            TRACE(("reject (got %tu chars, need %zu)\n",
633
709
                   end - ptr, (size_t) pattern[3]));
634
709
            RETURN_FAILURE;
635
709
        }
636
28.1M
        pattern += pattern[1] + 1;
637
28.1M
    }
638
639
664M
#if USE_COMPUTED_GOTOS
640
664M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
664M
    {
647
648
664M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
262M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
262M
                   ptr, pattern[0]));
653
262M
            {
654
262M
                int i = pattern[0];
655
262M
                if (i & 1)
656
26.6M
                    state->lastindex = i/2 + 1;
657
262M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
259M
                    int j = state->lastmark + 1;
663
261M
                    while (j < i)
664
1.95M
                        state->mark[j++] = NULL;
665
259M
                    state->lastmark = i;
666
259M
                }
667
262M
                state->mark[i] = ptr;
668
262M
            }
669
262M
            pattern++;
670
262M
            DISPATCH;
671
672
262M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
56.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
56.4M
                   ptr, *pattern));
677
56.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
20.0M
                RETURN_FAILURE;
679
36.3M
            pattern++;
680
36.3M
            ptr++;
681
36.3M
            DISPATCH;
682
683
36.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
105M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
105M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
105M
            if (ctx->toplevel &&
698
105M
                ((state->match_all && ptr != state->end) ||
699
24.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
105M
            state->ptr = ptr;
704
105M
            RETURN_SUCCESS;
705
706
791k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
791k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
791k
            if (!SRE(at)(state, ptr, *pattern))
711
743k
                RETURN_FAILURE;
712
47.8k
            pattern++;
713
47.8k
            DISPATCH;
714
715
47.8k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
133M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
133M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
133M
            if (ptr >= end ||
749
133M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.83M
                RETURN_FAILURE;
751
127M
            pattern += pattern[0];
752
127M
            ptr++;
753
127M
            DISPATCH;
754
755
127M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.22M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.22M
                   pattern, ptr, pattern[0]));
758
4.22M
            if (ptr >= end ||
759
4.22M
                sre_lower_ascii(*ptr) != *pattern)
760
161k
                RETURN_FAILURE;
761
4.06M
            pattern++;
762
4.06M
            ptr++;
763
4.06M
            DISPATCH;
764
765
4.06M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
33.8M
        TARGET(SRE_OP_JUMP):
845
33.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
33.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
33.8M
                   ptr, pattern[0]));
850
33.8M
            pattern += pattern[0];
851
33.8M
            DISPATCH;
852
853
54.1M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
54.1M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
54.1M
            LASTMARK_SAVE();
858
54.1M
            if (state->repeat)
859
51.0M
                MARK_PUSH(ctx->lastmark);
860
121M
            for (; pattern[0]; pattern += pattern[0]) {
861
100M
                if (pattern[1] == SRE_OP_LITERAL &&
862
100M
                    (ptr >= end ||
863
49.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.7M
                    continue;
865
75.7M
                if (pattern[1] == SRE_OP_IN &&
866
75.7M
                    (ptr >= end ||
867
46.9M
                     !SRE(charset)(state, pattern + 3,
868
46.9M
                                   (SRE_CODE) *ptr)))
869
38.8M
                    continue;
870
36.8M
                state->ptr = ptr;
871
36.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
36.8M
                if (ret) {
873
33.4M
                    if (state->repeat)
874
31.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
33.4M
                    RETURN_ON_ERROR(ret);
876
33.4M
                    RETURN_SUCCESS;
877
33.4M
                }
878
3.44M
                if (state->repeat)
879
8.55k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.44M
                LASTMARK_RESTORE();
881
3.44M
            }
882
20.7M
            if (state->repeat)
883
19.2M
                MARK_POP_DISCARD(ctx->lastmark);
884
20.7M
            RETURN_FAILURE;
885
886
308M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
308M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
308M
                   pattern[1], pattern[2]));
898
899
308M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
167k
                RETURN_FAILURE; /* cannot match */
901
902
308M
            state->ptr = ptr;
903
904
308M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
308M
            RETURN_ON_ERROR(ret);
906
308M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
308M
            ctx->count = ret;
908
308M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
308M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
194M
                RETURN_FAILURE;
917
918
113M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
113M
                ptr == state->end &&
920
113M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
16.2k
            {
922
                /* tail is empty.  we're finished */
923
16.2k
                state->ptr = ptr;
924
16.2k
                RETURN_SUCCESS;
925
16.2k
            }
926
927
113M
            LASTMARK_SAVE();
928
113M
            if (state->repeat)
929
82.9M
                MARK_PUSH(ctx->lastmark);
930
931
113M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
39.9M
                ctx->u.chr = pattern[pattern[0]+1];
935
39.9M
                for (;;) {
936
78.2M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
78.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
38.2M
                        ptr--;
939
38.2M
                        ctx->count--;
940
38.2M
                    }
941
39.9M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
32.0M
                        break;
943
7.95M
                    state->ptr = ptr;
944
7.95M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.95M
                            pattern+pattern[0]);
946
7.95M
                    if (ret) {
947
7.95M
                        if (state->repeat)
948
7.93M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.95M
                        RETURN_ON_ERROR(ret);
950
7.95M
                        RETURN_SUCCESS;
951
7.95M
                    }
952
216
                    if (state->repeat)
953
216
                        MARK_POP_KEEP(ctx->lastmark);
954
216
                    LASTMARK_RESTORE();
955
956
216
                    ptr--;
957
216
                    ctx->count--;
958
216
                }
959
32.0M
                if (state->repeat)
960
31.9M
                    MARK_POP_DISCARD(ctx->lastmark);
961
73.6M
            } else {
962
                /* general case */
963
74.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
74.1M
                    state->ptr = ptr;
965
74.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
74.1M
                            pattern+pattern[0]);
967
74.1M
                    if (ret) {
968
73.3M
                        if (state->repeat)
969
42.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
73.3M
                        RETURN_ON_ERROR(ret);
971
73.3M
                        RETURN_SUCCESS;
972
73.3M
                    }
973
809k
                    if (state->repeat)
974
639k
                        MARK_POP_KEEP(ctx->lastmark);
975
809k
                    LASTMARK_RESTORE();
976
977
809k
                    ptr--;
978
809k
                    ctx->count--;
979
809k
                }
980
321k
                if (state->repeat)
981
319k
                    MARK_POP_DISCARD(ctx->lastmark);
982
321k
            }
983
32.3M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
57.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
57.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
57.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
57.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
57.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
57.3M
            ctx->u.rep->count = -1;
1131
57.3M
            ctx->u.rep->pattern = pattern;
1132
57.3M
            ctx->u.rep->prev = state->repeat;
1133
57.3M
            ctx->u.rep->last_ptr = NULL;
1134
57.3M
            state->repeat = ctx->u.rep;
1135
1136
57.3M
            state->ptr = ptr;
1137
57.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
57.3M
            state->repeat = ctx->u.rep->prev;
1139
57.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
57.3M
            if (ret) {
1142
57.3M
                RETURN_ON_ERROR(ret);
1143
57.3M
                RETURN_SUCCESS;
1144
57.3M
            }
1145
896
            RETURN_FAILURE;
1146
1147
108M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
108M
            ctx->u.rep = state->repeat;
1155
108M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
108M
            state->ptr = ptr;
1159
1160
108M
            ctx->count = ctx->u.rep->count+1;
1161
1162
108M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
108M
                   ptr, ctx->count));
1164
1165
108M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
108M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
108M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
108M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
103M
                ctx->u.rep->count = ctx->count;
1185
103M
                LASTMARK_SAVE();
1186
103M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
103M
                LAST_PTR_PUSH();
1189
103M
                ctx->u.rep->last_ptr = state->ptr;
1190
103M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
103M
                        ctx->u.rep->pattern+3);
1192
103M
                LAST_PTR_POP();
1193
103M
                if (ret) {
1194
50.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
50.8M
                    RETURN_ON_ERROR(ret);
1196
50.8M
                    RETURN_SUCCESS;
1197
50.8M
                }
1198
53.0M
                MARK_POP(ctx->lastmark);
1199
53.0M
                LASTMARK_RESTORE();
1200
53.0M
                ctx->u.rep->count = ctx->count-1;
1201
53.0M
                state->ptr = ptr;
1202
53.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
57.6M
            state->repeat = ctx->u.rep->prev;
1207
57.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
57.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
57.6M
            RETURN_ON_SUCCESS(ret);
1211
320k
            state->ptr = ptr;
1212
320k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
40.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
40.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
40.2M
                   ptr, pattern[1]));
1565
40.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
40.2M
            state->ptr = ptr - pattern[1];
1568
40.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
40.2M
            RETURN_ON_FAILURE(ret);
1570
36.7M
            pattern += pattern[0];
1571
36.7M
            DISPATCH;
1572
1573
36.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
20.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
20.1M
                   ptr, pattern[1]));
1578
20.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
20.1M
                state->ptr = ptr - pattern[1];
1580
20.1M
                LASTMARK_SAVE();
1581
20.1M
                if (state->repeat)
1582
20.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
40.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
40.3M
                if (ret) {
1586
8.31k
                    if (state->repeat)
1587
8.31k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
8.31k
                    RETURN_ON_ERROR(ret);
1589
8.31k
                    RETURN_FAILURE;
1590
8.31k
                }
1591
20.1M
                if (state->repeat)
1592
20.1M
                    MARK_POP(ctx->lastmark);
1593
20.1M
                LASTMARK_RESTORE();
1594
20.1M
            }
1595
20.1M
            pattern += pattern[0];
1596
20.1M
            DISPATCH;
1597
1598
20.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
664M
exit:
1620
664M
    ctx_pos = ctx->last_ctx_pos;
1621
664M
    jump = ctx->jump;
1622
664M
    DATA_POP_DISCARD(ctx);
1623
664M
    if (ctx_pos == -1) {
1624
265M
        state->sigcount = sigcount;
1625
265M
        return ret;
1626
265M
    }
1627
398M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
398M
    switch (jump) {
1630
103M
        case JUMP_MAX_UNTIL_2:
1631
103M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
103M
            goto jump_max_until_2;
1633
57.6M
        case JUMP_MAX_UNTIL_3:
1634
57.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
57.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
36.8M
        case JUMP_BRANCH:
1643
36.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
36.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
57.3M
        case JUMP_REPEAT:
1658
57.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
57.3M
            goto jump_repeat;
1660
7.95M
        case JUMP_REPEAT_ONE_1:
1661
7.95M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.95M
            goto jump_repeat_one_1;
1663
74.1M
        case JUMP_REPEAT_ONE_2:
1664
74.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
74.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
40.2M
        case JUMP_ASSERT:
1673
40.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
40.2M
            goto jump_assert;
1675
20.1M
        case JUMP_ASSERT_NOT:
1676
20.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
20.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
398M
    }
1683
1684
0
    return ret; /* should never get here */
1685
398M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
108M
{
601
108M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
108M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
108M
    Py_ssize_t ret = 0;
604
108M
    int jump;
605
108M
    unsigned int sigcount = state->sigcount;
606
607
108M
    SRE(match_context)* ctx;
608
108M
    SRE(match_context)* nextctx;
609
108M
    INIT_TRACE(state);
610
611
108M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
108M
    DATA_ALLOC(SRE(match_context), ctx);
614
108M
    ctx->last_ctx_pos = -1;
615
108M
    ctx->jump = JUMP_NONE;
616
108M
    ctx->toplevel = toplevel;
617
108M
    ctx_pos = alloc_pos;
618
619
108M
#if USE_COMPUTED_GOTOS
620
108M
#include "sre_targets.h"
621
108M
#endif
622
623
639M
entrance:
624
625
639M
    ;  // Fashion statement.
626
639M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
639M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
28.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
346
            TRACE(("reject (got %tu chars, need %zu)\n",
633
346
                   end - ptr, (size_t) pattern[3]));
634
346
            RETURN_FAILURE;
635
346
        }
636
28.0M
        pattern += pattern[1] + 1;
637
28.0M
    }
638
639
639M
#if USE_COMPUTED_GOTOS
640
639M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
639M
    {
647
648
639M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
166M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
166M
                   ptr, pattern[0]));
653
166M
            {
654
166M
                int i = pattern[0];
655
166M
                if (i & 1)
656
35.2M
                    state->lastindex = i/2 + 1;
657
166M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
164M
                    int j = state->lastmark + 1;
663
166M
                    while (j < i)
664
2.00M
                        state->mark[j++] = NULL;
665
164M
                    state->lastmark = i;
666
164M
                }
667
166M
                state->mark[i] = ptr;
668
166M
            }
669
166M
            pattern++;
670
166M
            DISPATCH;
671
672
166M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
50.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
50.7M
                   ptr, *pattern));
677
50.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
19.1M
                RETURN_FAILURE;
679
31.5M
            pattern++;
680
31.5M
            ptr++;
681
31.5M
            DISPATCH;
682
683
31.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
89.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
89.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
89.4M
            if (ctx->toplevel &&
698
89.4M
                ((state->match_all && ptr != state->end) ||
699
27.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
89.4M
            state->ptr = ptr;
704
89.4M
            RETURN_SUCCESS;
705
706
586k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
586k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
586k
            if (!SRE(at)(state, ptr, *pattern))
711
577k
                RETURN_FAILURE;
712
8.78k
            pattern++;
713
8.78k
            DISPATCH;
714
715
8.78k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
133M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
133M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
133M
            if (ptr >= end ||
749
133M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.37M
                RETURN_FAILURE;
751
131M
            pattern += pattern[0];
752
131M
            ptr++;
753
131M
            DISPATCH;
754
755
131M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
923k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
923k
                   pattern, ptr, pattern[0]));
758
923k
            if (ptr >= end ||
759
923k
                sre_lower_ascii(*ptr) != *pattern)
760
18.4k
                RETURN_FAILURE;
761
904k
            pattern++;
762
904k
            ptr++;
763
904k
            DISPATCH;
764
765
904k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
43.4M
        TARGET(SRE_OP_JUMP):
845
43.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
43.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
43.4M
                   ptr, pattern[0]));
850
43.4M
            pattern += pattern[0];
851
43.4M
            DISPATCH;
852
853
70.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
70.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
70.6M
            LASTMARK_SAVE();
858
70.6M
            if (state->repeat)
859
67.0M
                MARK_PUSH(ctx->lastmark);
860
154M
            for (; pattern[0]; pattern += pattern[0]) {
861
126M
                if (pattern[1] == SRE_OP_LITERAL &&
862
126M
                    (ptr >= end ||
863
60.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
36.9M
                    continue;
865
89.5M
                if (pattern[1] == SRE_OP_IN &&
866
89.5M
                    (ptr >= end ||
867
62.9M
                     !SRE(charset)(state, pattern + 3,
868
62.9M
                                   (SRE_CODE) *ptr)))
869
45.4M
                    continue;
870
44.1M
                state->ptr = ptr;
871
44.1M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
44.1M
                if (ret) {
873
42.9M
                    if (state->repeat)
874
40.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
42.9M
                    RETURN_ON_ERROR(ret);
876
42.9M
                    RETURN_SUCCESS;
877
42.9M
                }
878
1.14M
                if (state->repeat)
879
16.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.14M
                LASTMARK_RESTORE();
881
1.14M
            }
882
27.6M
            if (state->repeat)
883
26.9M
                MARK_POP_DISCARD(ctx->lastmark);
884
27.6M
            RETURN_FAILURE;
885
886
225M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
225M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
225M
                   pattern[1], pattern[2]));
898
899
225M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
25.2k
                RETURN_FAILURE; /* cannot match */
901
902
225M
            state->ptr = ptr;
903
904
225M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
225M
            RETURN_ON_ERROR(ret);
906
225M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
225M
            ctx->count = ret;
908
225M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
225M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
72.5M
                RETURN_FAILURE;
917
918
153M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
153M
                ptr == state->end &&
920
153M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.66k
            {
922
                /* tail is empty.  we're finished */
923
3.66k
                state->ptr = ptr;
924
3.66k
                RETURN_SUCCESS;
925
3.66k
            }
926
927
153M
            LASTMARK_SAVE();
928
153M
            if (state->repeat)
929
119M
                MARK_PUSH(ctx->lastmark);
930
931
153M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
58.3M
                ctx->u.chr = pattern[pattern[0]+1];
935
58.3M
                for (;;) {
936
139M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
139M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
81.0M
                        ptr--;
939
81.0M
                        ctx->count--;
940
81.0M
                    }
941
58.3M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
50.5M
                        break;
943
7.82M
                    state->ptr = ptr;
944
7.82M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.82M
                            pattern+pattern[0]);
946
7.82M
                    if (ret) {
947
7.82M
                        if (state->repeat)
948
7.82M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.82M
                        RETURN_ON_ERROR(ret);
950
7.82M
                        RETURN_SUCCESS;
951
7.82M
                    }
952
224
                    if (state->repeat)
953
224
                        MARK_POP_KEEP(ctx->lastmark);
954
224
                    LASTMARK_RESTORE();
955
956
224
                    ptr--;
957
224
                    ctx->count--;
958
224
                }
959
50.5M
                if (state->repeat)
960
50.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
94.7M
            } else {
962
                /* general case */
963
95.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
95.4M
                    state->ptr = ptr;
965
95.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
95.4M
                            pattern+pattern[0]);
967
95.4M
                    if (ret) {
968
94.6M
                        if (state->repeat)
969
60.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
94.6M
                        RETURN_ON_ERROR(ret);
971
94.6M
                        RETURN_SUCCESS;
972
94.6M
                    }
973
820k
                    if (state->repeat)
974
170k
                        MARK_POP_KEEP(ctx->lastmark);
975
820k
                    LASTMARK_RESTORE();
976
977
820k
                    ptr--;
978
820k
                    ctx->count--;
979
820k
                }
980
86.1k
                if (state->repeat)
981
85.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
86.1k
            }
983
50.6M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
81.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
81.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
81.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
81.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
81.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
81.8M
            ctx->u.rep->count = -1;
1131
81.8M
            ctx->u.rep->pattern = pattern;
1132
81.8M
            ctx->u.rep->prev = state->repeat;
1133
81.8M
            ctx->u.rep->last_ptr = NULL;
1134
81.8M
            state->repeat = ctx->u.rep;
1135
1136
81.8M
            state->ptr = ptr;
1137
81.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
81.8M
            state->repeat = ctx->u.rep->prev;
1139
81.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
81.8M
            if (ret) {
1142
81.8M
                RETURN_ON_ERROR(ret);
1143
81.8M
                RETURN_SUCCESS;
1144
81.8M
            }
1145
751
            RETURN_FAILURE;
1146
1147
150M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
150M
            ctx->u.rep = state->repeat;
1155
150M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
150M
            state->ptr = ptr;
1159
1160
150M
            ctx->count = ctx->u.rep->count+1;
1161
1162
150M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
150M
                   ptr, ctx->count));
1164
1165
150M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
150M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
150M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
150M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
145M
                ctx->u.rep->count = ctx->count;
1185
145M
                LASTMARK_SAVE();
1186
145M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
145M
                LAST_PTR_PUSH();
1189
145M
                ctx->u.rep->last_ptr = state->ptr;
1190
145M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
145M
                        ctx->u.rep->pattern+3);
1192
145M
                LAST_PTR_POP();
1193
145M
                if (ret) {
1194
68.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
68.1M
                    RETURN_ON_ERROR(ret);
1196
68.1M
                    RETURN_SUCCESS;
1197
68.1M
                }
1198
77.7M
                MARK_POP(ctx->lastmark);
1199
77.7M
                LASTMARK_RESTORE();
1200
77.7M
                ctx->u.rep->count = ctx->count-1;
1201
77.7M
                state->ptr = ptr;
1202
77.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
81.9M
            state->repeat = ctx->u.rep->prev;
1207
81.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
81.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
81.9M
            RETURN_ON_SUCCESS(ret);
1211
85.9k
            state->ptr = ptr;
1212
85.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
55.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
55.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
55.3M
                   ptr, pattern[1]));
1565
55.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
55.3M
            state->ptr = ptr - pattern[1];
1568
55.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
55.3M
            RETURN_ON_FAILURE(ret);
1570
54.7M
            pattern += pattern[0];
1571
54.7M
            DISPATCH;
1572
1573
54.7M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
18.9M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
18.9M
                   ptr, pattern[1]));
1578
18.9M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
18.9M
                state->ptr = ptr - pattern[1];
1580
18.9M
                LASTMARK_SAVE();
1581
18.9M
                if (state->repeat)
1582
18.9M
                    MARK_PUSH(ctx->lastmark);
1583
1584
37.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
37.9M
                if (ret) {
1586
16.1k
                    if (state->repeat)
1587
16.1k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
16.1k
                    RETURN_ON_ERROR(ret);
1589
16.1k
                    RETURN_FAILURE;
1590
16.1k
                }
1591
18.9M
                if (state->repeat)
1592
18.9M
                    MARK_POP(ctx->lastmark);
1593
18.9M
                LASTMARK_RESTORE();
1594
18.9M
            }
1595
18.9M
            pattern += pattern[0];
1596
18.9M
            DISPATCH;
1597
1598
18.9M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
639M
exit:
1620
639M
    ctx_pos = ctx->last_ctx_pos;
1621
639M
    jump = ctx->jump;
1622
639M
    DATA_POP_DISCARD(ctx);
1623
639M
    if (ctx_pos == -1) {
1624
108M
        state->sigcount = sigcount;
1625
108M
        return ret;
1626
108M
    }
1627
531M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
531M
    switch (jump) {
1630
145M
        case JUMP_MAX_UNTIL_2:
1631
145M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
145M
            goto jump_max_until_2;
1633
81.9M
        case JUMP_MAX_UNTIL_3:
1634
81.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
81.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
44.1M
        case JUMP_BRANCH:
1643
44.1M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
44.1M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
81.8M
        case JUMP_REPEAT:
1658
81.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
81.8M
            goto jump_repeat;
1660
7.82M
        case JUMP_REPEAT_ONE_1:
1661
7.82M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.82M
            goto jump_repeat_one_1;
1663
95.4M
        case JUMP_REPEAT_ONE_2:
1664
95.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
95.4M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
55.3M
        case JUMP_ASSERT:
1673
55.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
55.3M
            goto jump_assert;
1675
18.9M
        case JUMP_ASSERT_NOT:
1676
18.9M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
18.9M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
531M
    }
1683
1684
0
    return ret; /* should never get here */
1685
531M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
375M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
87.1M
{
1694
87.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
87.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
87.1M
    Py_ssize_t status = 0;
1697
87.1M
    Py_ssize_t prefix_len = 0;
1698
87.1M
    Py_ssize_t prefix_skip = 0;
1699
87.1M
    SRE_CODE* prefix = NULL;
1700
87.1M
    SRE_CODE* charset = NULL;
1701
87.1M
    SRE_CODE* overlap = NULL;
1702
87.1M
    int flags = 0;
1703
87.1M
    INIT_TRACE(state);
1704
1705
87.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
87.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
87.1M
        flags = pattern[2];
1713
1714
87.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.67M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.67M
                   end - ptr, (size_t) pattern[3]));
1717
1.67M
            return 0;
1718
1.67M
        }
1719
85.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.21M
            end -= pattern[3] - 1;
1723
7.21M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.21M
        }
1726
1727
85.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.21M
            prefix_len = pattern[5];
1731
7.21M
            prefix_skip = pattern[6];
1732
7.21M
            prefix = pattern + 7;
1733
7.21M
            overlap = prefix + prefix_len - 1;
1734
78.2M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
74.1M
            charset = pattern + 5;
1738
1739
85.4M
        pattern += 1 + pattern[1];
1740
85.4M
    }
1741
1742
85.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
85.4M
           prefix, prefix_len, prefix_skip));
1744
85.4M
    TRACE(("charset = %p\n", charset));
1745
1746
85.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.72M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
3.86M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.86M
#endif
1753
3.86M
        end = (SRE_CHAR *)state->end;
1754
3.86M
        state->must_advance = 0;
1755
7.39M
        while (ptr < end) {
1756
101M
            while (*ptr != c) {
1757
94.8M
                if (++ptr >= end)
1758
511k
                    return 0;
1759
94.8M
            }
1760
6.87M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.87M
            state->start = ptr;
1762
6.87M
            state->ptr = ptr + prefix_skip;
1763
6.87M
            if (flags & SRE_INFO_LITERAL)
1764
4.39k
                return 1; /* we got all of it */
1765
6.86M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.86M
            if (status != 0)
1767
6.20M
                return status;
1768
664k
            ++ptr;
1769
664k
            RESET_CAPTURE_GROUP();
1770
664k
        }
1771
10.0k
        return 0;
1772
3.86M
    }
1773
1774
78.7M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
487k
        Py_ssize_t i = 0;
1778
1779
487k
        end = (SRE_CHAR *)state->end;
1780
487k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.44M
        for (i = 0; i < prefix_len; i++)
1784
966k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
483k
#endif
1787
1.42M
        while (ptr < end) {
1788
1.42M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
9.72M
            while (*ptr++ != c) {
1790
8.30M
                if (ptr >= end)
1791
296
                    return 0;
1792
8.30M
            }
1793
1.42M
            if (ptr >= end)
1794
46
                return 0;
1795
1796
1.42M
            i = 1;
1797
1.42M
            state->must_advance = 0;
1798
1.42M
            do {
1799
1.42M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.26M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.26M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.26M
                    state->start = ptr - (prefix_len - 1);
1808
1.26M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.26M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.26M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.26M
                    if (status != 0)
1813
487k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
776k
                    if (++ptr >= end)
1816
26
                        return 0;
1817
776k
                    RESET_CAPTURE_GROUP();
1818
776k
                }
1819
936k
                i = overlap[i];
1820
936k
            } while (i != 0);
1821
1.42M
        }
1822
0
        return 0;
1823
487k
    }
1824
1825
78.2M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
74.1M
        end = (SRE_CHAR *)state->end;
1828
74.1M
        state->must_advance = 0;
1829
77.1M
        for (;;) {
1830
349M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
271M
                ptr++;
1832
77.1M
            if (ptr >= end)
1833
3.73M
                return 0;
1834
73.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
73.4M
            state->start = ptr;
1836
73.4M
            state->ptr = ptr;
1837
73.4M
            status = SRE(match)(state, pattern, 0);
1838
73.4M
            if (status != 0)
1839
70.4M
                break;
1840
3.01M
            ptr++;
1841
3.01M
            RESET_CAPTURE_GROUP();
1842
3.01M
        }
1843
74.1M
    } else {
1844
        /* general case */
1845
4.06M
        assert(ptr <= end);
1846
4.06M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.06M
        state->start = state->ptr = ptr;
1848
4.06M
        status = SRE(match)(state, pattern, 1);
1849
4.06M
        state->must_advance = 0;
1850
4.06M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.06M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
374M
        while (status == 0 && ptr < end) {
1858
370M
            ptr++;
1859
370M
            RESET_CAPTURE_GROUP();
1860
370M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
370M
            state->start = state->ptr = ptr;
1862
370M
            status = SRE(match)(state, pattern, 0);
1863
370M
        }
1864
4.06M
    }
1865
1866
74.4M
    return status;
1867
78.2M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
34.9M
{
1694
34.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
34.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
34.9M
    Py_ssize_t status = 0;
1697
34.9M
    Py_ssize_t prefix_len = 0;
1698
34.9M
    Py_ssize_t prefix_skip = 0;
1699
34.9M
    SRE_CODE* prefix = NULL;
1700
34.9M
    SRE_CODE* charset = NULL;
1701
34.9M
    SRE_CODE* overlap = NULL;
1702
34.9M
    int flags = 0;
1703
34.9M
    INIT_TRACE(state);
1704
1705
34.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
34.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
34.9M
        flags = pattern[2];
1713
1714
34.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.54M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.54M
                   end - ptr, (size_t) pattern[3]));
1717
1.54M
            return 0;
1718
1.54M
        }
1719
33.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.31M
            end -= pattern[3] - 1;
1723
2.31M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.31M
        }
1726
1727
33.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.31M
            prefix_len = pattern[5];
1731
2.31M
            prefix_skip = pattern[6];
1732
2.31M
            prefix = pattern + 7;
1733
2.31M
            overlap = prefix + prefix_len - 1;
1734
31.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
28.1M
            charset = pattern + 5;
1738
1739
33.4M
        pattern += 1 + pattern[1];
1740
33.4M
    }
1741
1742
33.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.4M
           prefix, prefix_len, prefix_skip));
1744
33.4M
    TRACE(("charset = %p\n", charset));
1745
1746
33.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.30M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.30M
#if SIZEOF_SRE_CHAR < 4
1750
2.30M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.30M
#endif
1753
2.30M
        end = (SRE_CHAR *)state->end;
1754
2.30M
        state->must_advance = 0;
1755
2.48M
        while (ptr < end) {
1756
27.3M
            while (*ptr != c) {
1757
25.2M
                if (++ptr >= end)
1758
441k
                    return 0;
1759
25.2M
            }
1760
2.03M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.03M
            state->start = ptr;
1762
2.03M
            state->ptr = ptr + prefix_skip;
1763
2.03M
            if (flags & SRE_INFO_LITERAL)
1764
337
                return 1; /* we got all of it */
1765
2.03M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.03M
            if (status != 0)
1767
1.85M
                return status;
1768
182k
            ++ptr;
1769
182k
            RESET_CAPTURE_GROUP();
1770
182k
        }
1771
7.72k
        return 0;
1772
2.30M
    }
1773
1774
31.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
11.8k
        Py_ssize_t i = 0;
1778
1779
11.8k
        end = (SRE_CHAR *)state->end;
1780
11.8k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
11.8k
#if SIZEOF_SRE_CHAR < 4
1783
35.4k
        for (i = 0; i < prefix_len; i++)
1784
23.6k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
11.8k
#endif
1787
276k
        while (ptr < end) {
1788
276k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.04M
            while (*ptr++ != c) {
1790
1.77M
                if (ptr >= end)
1791
60
                    return 0;
1792
1.77M
            }
1793
276k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
276k
            i = 1;
1797
276k
            state->must_advance = 0;
1798
276k
            do {
1799
276k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
206k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
206k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
206k
                    state->start = ptr - (prefix_len - 1);
1808
206k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
206k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
206k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
206k
                    if (status != 0)
1813
11.7k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
194k
                    if (++ptr >= end)
1816
9
                        return 0;
1817
194k
                    RESET_CAPTURE_GROUP();
1818
194k
                }
1819
265k
                i = overlap[i];
1820
265k
            } while (i != 0);
1821
276k
        }
1822
0
        return 0;
1823
11.8k
    }
1824
1825
31.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
28.1M
        end = (SRE_CHAR *)state->end;
1828
28.1M
        state->must_advance = 0;
1829
30.2M
        for (;;) {
1830
81.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
50.9M
                ptr++;
1832
30.2M
            if (ptr >= end)
1833
2.68M
                return 0;
1834
27.6M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
27.6M
            state->start = ptr;
1836
27.6M
            state->ptr = ptr;
1837
27.6M
            status = SRE(match)(state, pattern, 0);
1838
27.6M
            if (status != 0)
1839
25.5M
                break;
1840
2.10M
            ptr++;
1841
2.10M
            RESET_CAPTURE_GROUP();
1842
2.10M
        }
1843
28.1M
    } else {
1844
        /* general case */
1845
2.93M
        assert(ptr <= end);
1846
2.93M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.93M
        state->start = state->ptr = ptr;
1848
2.93M
        status = SRE(match)(state, pattern, 1);
1849
2.93M
        state->must_advance = 0;
1850
2.93M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.93M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
109M
        while (status == 0 && ptr < end) {
1858
106M
            ptr++;
1859
106M
            RESET_CAPTURE_GROUP();
1860
106M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
106M
            state->start = state->ptr = ptr;
1862
106M
            status = SRE(match)(state, pattern, 0);
1863
106M
        }
1864
2.93M
    }
1865
1866
28.4M
    return status;
1867
31.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
45.0M
{
1694
45.0M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
45.0M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
45.0M
    Py_ssize_t status = 0;
1697
45.0M
    Py_ssize_t prefix_len = 0;
1698
45.0M
    Py_ssize_t prefix_skip = 0;
1699
45.0M
    SRE_CODE* prefix = NULL;
1700
45.0M
    SRE_CODE* charset = NULL;
1701
45.0M
    SRE_CODE* overlap = NULL;
1702
45.0M
    int flags = 0;
1703
45.0M
    INIT_TRACE(state);
1704
1705
45.0M
    if (ptr > end)
1706
0
        return 0;
1707
1708
45.0M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
45.0M
        flags = pattern[2];
1713
1714
45.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
116k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
116k
                   end - ptr, (size_t) pattern[3]));
1717
116k
            return 0;
1718
116k
        }
1719
44.9M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.02M
            end -= pattern[3] - 1;
1723
2.02M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.02M
        }
1726
1727
44.9M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.03M
            prefix_len = pattern[5];
1731
2.03M
            prefix_skip = pattern[6];
1732
2.03M
            prefix = pattern + 7;
1733
2.03M
            overlap = prefix + prefix_len - 1;
1734
42.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.9M
            charset = pattern + 5;
1738
1739
44.9M
        pattern += 1 + pattern[1];
1740
44.9M
    }
1741
1742
44.9M
    TRACE(("prefix = %p %zd %zd\n",
1743
44.9M
           prefix, prefix_len, prefix_skip));
1744
44.9M
    TRACE(("charset = %p\n", charset));
1745
1746
44.9M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.55M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.55M
#if SIZEOF_SRE_CHAR < 4
1750
1.55M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.55M
#endif
1753
1.55M
        end = (SRE_CHAR *)state->end;
1754
1.55M
        state->must_advance = 0;
1755
1.74M
        while (ptr < end) {
1756
50.2M
            while (*ptr != c) {
1757
48.5M
                if (++ptr >= end)
1758
65.3k
                    return 0;
1759
48.5M
            }
1760
1.68M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.68M
            state->start = ptr;
1762
1.68M
            state->ptr = ptr + prefix_skip;
1763
1.68M
            if (flags & SRE_INFO_LITERAL)
1764
1.38k
                return 1; /* we got all of it */
1765
1.68M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.68M
            if (status != 0)
1767
1.49M
                return status;
1768
190k
            ++ptr;
1769
190k
            RESET_CAPTURE_GROUP();
1770
190k
        }
1771
1.39k
        return 0;
1772
1.55M
    }
1773
1774
43.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
471k
        Py_ssize_t i = 0;
1778
1779
471k
        end = (SRE_CHAR *)state->end;
1780
471k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
471k
#if SIZEOF_SRE_CHAR < 4
1783
1.41M
        for (i = 0; i < prefix_len; i++)
1784
942k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
471k
#endif
1787
946k
        while (ptr < end) {
1788
946k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.65M
            while (*ptr++ != c) {
1790
3.70M
                if (ptr >= end)
1791
115
                    return 0;
1792
3.70M
            }
1793
946k
            if (ptr >= end)
1794
11
                return 0;
1795
1796
946k
            i = 1;
1797
946k
            state->must_advance = 0;
1798
946k
            do {
1799
946k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
865k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
865k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
865k
                    state->start = ptr - (prefix_len - 1);
1808
865k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
865k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
865k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
865k
                    if (status != 0)
1813
471k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
393k
                    if (++ptr >= end)
1816
12
                        return 0;
1817
393k
                    RESET_CAPTURE_GROUP();
1818
393k
                }
1819
475k
                i = overlap[i];
1820
475k
            } while (i != 0);
1821
946k
        }
1822
0
        return 0;
1823
471k
    }
1824
1825
42.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.9M
        end = (SRE_CHAR *)state->end;
1828
41.9M
        state->must_advance = 0;
1829
42.4M
        for (;;) {
1830
190M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
147M
                ptr++;
1832
42.4M
            if (ptr >= end)
1833
1.00M
                return 0;
1834
41.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
41.4M
            state->start = ptr;
1836
41.4M
            state->ptr = ptr;
1837
41.4M
            status = SRE(match)(state, pattern, 0);
1838
41.4M
            if (status != 0)
1839
40.9M
                break;
1840
423k
            ptr++;
1841
423k
            RESET_CAPTURE_GROUP();
1842
423k
        }
1843
41.9M
    } else {
1844
        /* general case */
1845
927k
        assert(ptr <= end);
1846
927k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
927k
        state->start = state->ptr = ptr;
1848
927k
        status = SRE(match)(state, pattern, 1);
1849
927k
        state->must_advance = 0;
1850
927k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
927k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
193M
        while (status == 0 && ptr < end) {
1858
192M
            ptr++;
1859
192M
            RESET_CAPTURE_GROUP();
1860
192M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
192M
            state->start = state->ptr = ptr;
1862
192M
            status = SRE(match)(state, pattern, 0);
1863
192M
        }
1864
927k
    }
1865
1866
41.9M
    return status;
1867
42.9M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.04M
{
1694
7.04M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.04M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.04M
    Py_ssize_t status = 0;
1697
7.04M
    Py_ssize_t prefix_len = 0;
1698
7.04M
    Py_ssize_t prefix_skip = 0;
1699
7.04M
    SRE_CODE* prefix = NULL;
1700
7.04M
    SRE_CODE* charset = NULL;
1701
7.04M
    SRE_CODE* overlap = NULL;
1702
7.04M
    int flags = 0;
1703
7.04M
    INIT_TRACE(state);
1704
1705
7.04M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.04M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.04M
        flags = pattern[2];
1713
1714
7.04M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.39k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.39k
                   end - ptr, (size_t) pattern[3]));
1717
6.39k
            return 0;
1718
6.39k
        }
1719
7.03M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.86M
            end -= pattern[3] - 1;
1723
2.86M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.86M
        }
1726
1727
7.03M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.86M
            prefix_len = pattern[5];
1731
2.86M
            prefix_skip = pattern[6];
1732
2.86M
            prefix = pattern + 7;
1733
2.86M
            overlap = prefix + prefix_len - 1;
1734
4.16M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.97M
            charset = pattern + 5;
1738
1739
7.03M
        pattern += 1 + pattern[1];
1740
7.03M
    }
1741
1742
7.03M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.03M
           prefix, prefix_len, prefix_skip));
1744
7.03M
    TRACE(("charset = %p\n", charset));
1745
1746
7.03M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.86M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.86M
        end = (SRE_CHAR *)state->end;
1754
2.86M
        state->must_advance = 0;
1755
3.15M
        while (ptr < end) {
1756
24.1M
            while (*ptr != c) {
1757
21.0M
                if (++ptr >= end)
1758
4.47k
                    return 0;
1759
21.0M
            }
1760
3.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.14M
            state->start = ptr;
1762
3.14M
            state->ptr = ptr + prefix_skip;
1763
3.14M
            if (flags & SRE_INFO_LITERAL)
1764
2.66k
                return 1; /* we got all of it */
1765
3.14M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.14M
            if (status != 0)
1767
2.85M
                return status;
1768
291k
            ++ptr;
1769
291k
            RESET_CAPTURE_GROUP();
1770
291k
        }
1771
907
        return 0;
1772
2.86M
    }
1773
1774
4.17M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
4.37k
        Py_ssize_t i = 0;
1778
1779
4.37k
        end = (SRE_CHAR *)state->end;
1780
4.37k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
200k
        while (ptr < end) {
1788
200k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.02M
            while (*ptr++ != c) {
1790
2.82M
                if (ptr >= end)
1791
121
                    return 0;
1792
2.82M
            }
1793
200k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
200k
            i = 1;
1797
200k
            state->must_advance = 0;
1798
200k
            do {
1799
200k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
192k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
192k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
192k
                    state->start = ptr - (prefix_len - 1);
1808
192k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
192k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
192k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
192k
                    if (status != 0)
1813
4.23k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
187k
                    if (++ptr >= end)
1816
5
                        return 0;
1817
187k
                    RESET_CAPTURE_GROUP();
1818
187k
                }
1819
196k
                i = overlap[i];
1820
196k
            } while (i != 0);
1821
200k
        }
1822
0
        return 0;
1823
4.37k
    }
1824
1825
4.16M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.97M
        end = (SRE_CHAR *)state->end;
1828
3.97M
        state->must_advance = 0;
1829
4.46M
        for (;;) {
1830
77.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
73.1M
                ptr++;
1832
4.46M
            if (ptr >= end)
1833
51.9k
                return 0;
1834
4.41M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.41M
            state->start = ptr;
1836
4.41M
            state->ptr = ptr;
1837
4.41M
            status = SRE(match)(state, pattern, 0);
1838
4.41M
            if (status != 0)
1839
3.91M
                break;
1840
492k
            ptr++;
1841
492k
            RESET_CAPTURE_GROUP();
1842
492k
        }
1843
3.97M
    } else {
1844
        /* general case */
1845
198k
        assert(ptr <= end);
1846
198k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
198k
        state->start = state->ptr = ptr;
1848
198k
        status = SRE(match)(state, pattern, 1);
1849
198k
        state->must_advance = 0;
1850
198k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
198k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
72.2M
        while (status == 0 && ptr < end) {
1858
72.0M
            ptr++;
1859
72.0M
            RESET_CAPTURE_GROUP();
1860
72.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
72.0M
            state->start = state->ptr = ptr;
1862
72.0M
            status = SRE(match)(state, pattern, 0);
1863
72.0M
        }
1864
198k
    }
1865
1866
4.11M
    return status;
1867
4.16M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/