Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
96.8M
{
18
    /* check if pointer is at given position */
19
20
96.8M
    Py_ssize_t thisp, thatp;
21
22
96.8M
    switch (at) {
23
24
9.80M
    case SRE_AT_BEGINNING:
25
9.80M
    case SRE_AT_BEGINNING_STRING:
26
9.80M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
82.4M
    case SRE_AT_END:
33
82.4M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.36M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
82.4M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
4.61M
    case SRE_AT_END_STRING:
42
4.61M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
96.8M
    }
87
88
0
    return 0;
89
96.8M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
29.6M
{
18
    /* check if pointer is at given position */
19
20
29.6M
    Py_ssize_t thisp, thatp;
21
22
29.6M
    switch (at) {
23
24
8.49M
    case SRE_AT_BEGINNING:
25
8.49M
    case SRE_AT_BEGINNING_STRING:
26
8.49M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
18.7M
    case SRE_AT_END:
33
18.7M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
289k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
18.7M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.43M
    case SRE_AT_END_STRING:
42
2.43M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
29.6M
    }
87
88
0
    return 0;
89
29.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
56.6M
{
18
    /* check if pointer is at given position */
19
20
56.6M
    Py_ssize_t thisp, thatp;
21
22
56.6M
    switch (at) {
23
24
1.29M
    case SRE_AT_BEGINNING:
25
1.29M
    case SRE_AT_BEGINNING_STRING:
26
1.29M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
54.2M
    case SRE_AT_END:
33
54.2M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.06M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
54.2M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.08M
    case SRE_AT_END_STRING:
42
1.08M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
56.6M
    }
87
88
0
    return 0;
89
56.6M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
10.5M
{
18
    /* check if pointer is at given position */
19
20
10.5M
    Py_ssize_t thisp, thatp;
21
22
10.5M
    switch (at) {
23
24
18.1k
    case SRE_AT_BEGINNING:
25
18.1k
    case SRE_AT_BEGINNING_STRING:
26
18.1k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
9.48M
    case SRE_AT_END:
33
9.48M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.22k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
9.48M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.09M
    case SRE_AT_END_STRING:
42
1.09M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
10.5M
    }
87
88
0
    return 0;
89
10.5M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.26G
{
94
    /* check if character is a member of the given set */
95
96
1.26G
    int ok = 1;
97
98
2.85G
    for (;;) {
99
2.85G
        switch (*set++) {
100
101
841M
        case SRE_OP_FAILURE:
102
841M
            return !ok;
103
104
951M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
951M
            if (ch == set[0])
107
5.86M
                return ok;
108
945M
            set++;
109
945M
            break;
110
111
98.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
98.5M
            if (sre_category(set[0], (int) ch))
114
35.5M
                return ok;
115
62.9M
            set++;
116
62.9M
            break;
117
118
413M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
413M
            if (ch < 256 &&
121
396M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
164M
                return ok;
123
249M
            set += 256/SRE_CODE_BITS;
124
249M
            break;
125
126
326M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
326M
            if (set[0] <= ch && ch <= set[1])
129
214M
                return ok;
130
111M
            set += 2;
131
111M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
218M
        case SRE_OP_NEGATE:
148
218M
            ok = !ok;
149
218M
            break;
150
151
2
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
2
        {
154
2
            Py_ssize_t count, block;
155
2
            count = *(set++);
156
157
2
            if (ch < 0x10000u)
158
2
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
2
            set += 256/sizeof(SRE_CODE);
162
2
            if (block >=0 &&
163
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
2
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
2
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
2.85G
        }
175
2.85G
    }
176
1.26G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
408M
{
94
    /* check if character is a member of the given set */
95
96
408M
    int ok = 1;
97
98
823M
    for (;;) {
99
823M
        switch (*set++) {
100
101
219M
        case SRE_OP_FAILURE:
102
219M
            return !ok;
103
104
239M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
239M
            if (ch == set[0])
107
3.83M
                return ok;
108
235M
            set++;
109
235M
            break;
110
111
32.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.0M
            if (sre_category(set[0], (int) ch))
114
15.4M
                return ok;
115
16.5M
            set++;
116
16.5M
            break;
117
118
103M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
103M
            if (ch < 256 &&
121
103M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
49.2M
                return ok;
123
54.3M
            set += 256/SRE_CODE_BITS;
124
54.3M
            break;
125
126
181M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
181M
            if (set[0] <= ch && ch <= set[1])
129
120M
                return ok;
130
60.9M
            set += 2;
131
60.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
47.5M
        case SRE_OP_NEGATE:
148
47.5M
            ok = !ok;
149
47.5M
            break;
150
151
2
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
2
        {
154
2
            Py_ssize_t count, block;
155
2
            count = *(set++);
156
157
2
            if (ch < 0x10000u)
158
2
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
2
            set += 256/sizeof(SRE_CODE);
162
2
            if (block >=0 &&
163
2
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
2
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
2
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
2
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
823M
        }
175
823M
    }
176
408M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
576M
{
94
    /* check if character is a member of the given set */
95
96
576M
    int ok = 1;
97
98
1.38G
    for (;;) {
99
1.38G
        switch (*set++) {
100
101
428M
        case SRE_OP_FAILURE:
102
428M
            return !ok;
103
104
550M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
550M
            if (ch == set[0])
107
1.16M
                return ok;
108
549M
            set++;
109
549M
            break;
110
111
56.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
56.4M
            if (sre_category(set[0], (int) ch))
114
17.1M
                return ok;
115
39.2M
            set++;
116
39.2M
            break;
117
118
145M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
145M
            if (ch < 256 &&
121
138M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
51.0M
                return ok;
123
94.3M
            set += 256/SRE_CODE_BITS;
124
94.3M
            break;
125
126
118M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
118M
            if (set[0] <= ch && ch <= set[1])
129
77.7M
                return ok;
130
41.0M
            set += 2;
131
41.0M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
85.1M
        case SRE_OP_NEGATE:
148
85.1M
            ok = !ok;
149
85.1M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.38G
        }
175
1.38G
    }
176
576M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
277M
{
94
    /* check if character is a member of the given set */
95
96
277M
    int ok = 1;
97
98
641M
    for (;;) {
99
641M
        switch (*set++) {
100
101
192M
        case SRE_OP_FAILURE:
102
192M
            return !ok;
103
104
162M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
162M
            if (ch == set[0])
107
870k
                return ok;
108
161M
            set++;
109
161M
            break;
110
111
10.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.1M
            if (sre_category(set[0], (int) ch))
114
2.97M
                return ok;
115
7.13M
            set++;
116
7.13M
            break;
117
118
164M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
164M
            if (ch < 256 &&
121
154M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
64.1M
                return ok;
123
100M
            set += 256/SRE_CODE_BITS;
124
100M
            break;
125
126
25.8M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
25.8M
            if (set[0] <= ch && ch <= set[1])
129
16.7M
                return ok;
130
9.03M
            set += 2;
131
9.03M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
85.9M
        case SRE_OP_NEGATE:
148
85.9M
            ok = !ok;
149
85.9M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
641M
        }
175
641M
    }
176
277M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
501M
{
195
501M
    SRE_CODE chr;
196
501M
    SRE_CHAR c;
197
501M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
501M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
501M
    Py_ssize_t i;
200
501M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
501M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
92.1M
        end = ptr + maxcount;
205
206
501M
    switch (pattern[0]) {
207
208
374M
    case SRE_OP_IN:
209
        /* repeated set */
210
374M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
707M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
332M
            ptr++;
213
374M
        break;
214
215
56.5M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
56.5M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
140M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
83.9M
            ptr++;
220
56.5M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
68.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
68.3M
        chr = pattern[1];
232
68.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
68.3M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
58.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
58.4M
        else
238
58.4M
#endif
239
71.6M
        while (ptr < end && *ptr == c)
240
3.29M
            ptr++;
241
68.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.58M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.58M
        chr = pattern[1];
270
1.58M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.58M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.20M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.20M
        else
276
1.20M
#endif
277
39.5M
        while (ptr < end && *ptr != c)
278
37.9M
            ptr++;
279
1.58M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
501M
    }
319
320
501M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
501M
           ptr - (SRE_CHAR*) state->ptr));
322
501M
    return ptr - (SRE_CHAR*) state->ptr;
323
501M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
183M
{
195
183M
    SRE_CODE chr;
196
183M
    SRE_CHAR c;
197
183M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
183M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
183M
    Py_ssize_t i;
200
183M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
183M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
24.6M
        end = ptr + maxcount;
205
206
183M
    switch (pattern[0]) {
207
208
125M
    case SRE_OP_IN:
209
        /* repeated set */
210
125M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
253M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
127M
            ptr++;
213
125M
        break;
214
215
10.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
10.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
27.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.4M
            ptr++;
220
10.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
45.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
45.6M
        chr = pattern[1];
232
45.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
45.6M
        c = (SRE_CHAR) chr;
234
45.6M
#if SIZEOF_SRE_CHAR < 4
235
45.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
45.6M
        else
238
45.6M
#endif
239
46.7M
        while (ptr < end && *ptr == c)
240
1.06M
            ptr++;
241
45.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
586k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
586k
        chr = pattern[1];
270
586k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
586k
        c = (SRE_CHAR) chr;
272
586k
#if SIZEOF_SRE_CHAR < 4
273
586k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
586k
        else
276
586k
#endif
277
11.2M
        while (ptr < end && *ptr != c)
278
10.6M
            ptr++;
279
586k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
183M
    }
319
320
183M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
183M
           ptr - (SRE_CHAR*) state->ptr));
322
183M
    return ptr - (SRE_CHAR*) state->ptr;
323
183M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
232M
{
195
232M
    SRE_CODE chr;
196
232M
    SRE_CHAR c;
197
232M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
232M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
232M
    Py_ssize_t i;
200
232M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
232M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
48.7M
        end = ptr + maxcount;
205
206
232M
    switch (pattern[0]) {
207
208
180M
    case SRE_OP_IN:
209
        /* repeated set */
210
180M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
290M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
109M
            ptr++;
213
180M
        break;
214
215
38.6M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
38.6M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
87.8M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
49.1M
            ptr++;
220
38.6M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.7M
        chr = pattern[1];
232
12.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.7M
        c = (SRE_CHAR) chr;
234
12.7M
#if SIZEOF_SRE_CHAR < 4
235
12.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
12.7M
        else
238
12.7M
#endif
239
14.2M
        while (ptr < end && *ptr == c)
240
1.49M
            ptr++;
241
12.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
622k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
622k
        chr = pattern[1];
270
622k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
622k
        c = (SRE_CHAR) chr;
272
622k
#if SIZEOF_SRE_CHAR < 4
273
622k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
622k
        else
276
622k
#endif
277
10.7M
        while (ptr < end && *ptr != c)
278
10.1M
            ptr++;
279
622k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
232M
    }
319
320
232M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
232M
           ptr - (SRE_CHAR*) state->ptr));
322
232M
    return ptr - (SRE_CHAR*) state->ptr;
323
232M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
85.3M
{
195
85.3M
    SRE_CODE chr;
196
85.3M
    SRE_CHAR c;
197
85.3M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
85.3M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
85.3M
    Py_ssize_t i;
200
85.3M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
85.3M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
18.7M
        end = ptr + maxcount;
205
206
85.3M
    switch (pattern[0]) {
207
208
68.1M
    case SRE_OP_IN:
209
        /* repeated set */
210
68.1M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
162M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
94.3M
            ptr++;
213
68.1M
        break;
214
215
6.86M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
6.86M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
25.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
18.3M
            ptr++;
220
6.86M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
9.98M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
9.98M
        chr = pattern[1];
232
9.98M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
9.98M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
10.7M
        while (ptr < end && *ptr == c)
240
729k
            ptr++;
241
9.98M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
374k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
374k
        chr = pattern[1];
270
374k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
374k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
17.5M
        while (ptr < end && *ptr != c)
278
17.1M
            ptr++;
279
374k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
85.3M
    }
319
320
85.3M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
85.3M
           ptr - (SRE_CHAR*) state->ptr));
322
85.3M
    return ptr - (SRE_CHAR*) state->ptr;
323
85.3M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
480M
    do { \
355
480M
        ctx->lastmark = state->lastmark; \
356
480M
        ctx->lastindex = state->lastindex; \
357
480M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
318M
    do { \
360
318M
        state->lastmark = ctx->lastmark; \
361
318M
        state->lastindex = ctx->lastindex; \
362
318M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
178M
    do { \
366
178M
        TRACE(("push last_ptr: %zd", \
367
178M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
178M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
178M
    } while (0)
370
#define LAST_PTR_POP()  \
371
178M
    do { \
372
178M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
178M
        TRACE(("pop last_ptr: %zd", \
374
178M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
178M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
738M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
461M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
932M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
106M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
20.0M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.20G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.20G
do { \
390
1.20G
    alloc_pos = state->data_stack_base; \
391
1.20G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.20G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.20G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
131M
        int j = data_stack_grow(state, sizeof(type)); \
395
131M
        if (j < 0) return j; \
396
131M
        if (ctx_pos != -1) \
397
131M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
131M
    } \
399
1.20G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.20G
    state->data_stack_base += sizeof(type); \
401
1.20G
} while (0)
402
403
1.32G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.32G
do { \
405
1.32G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.32G
    ptr = (type*)(state->data_stack+pos); \
407
1.32G
} while (0)
408
409
425M
#define DATA_STACK_PUSH(state, data, size) \
410
425M
do { \
411
425M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
425M
           data, state->data_stack_base, size)); \
413
425M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
85.2k
        int j = data_stack_grow(state, size); \
415
85.2k
        if (j < 0) return j; \
416
85.2k
        if (ctx_pos != -1) \
417
85.2k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
85.2k
    } \
419
425M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
425M
    state->data_stack_base += size; \
421
425M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
281M
#define DATA_STACK_POP(state, data, size, discard) \
427
281M
do { \
428
281M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
281M
           data, state->data_stack_base-size, size)); \
430
281M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
281M
    if (discard) \
432
281M
        state->data_stack_base -= size; \
433
281M
} while (0)
434
435
1.34G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.34G
do { \
437
1.34G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.34G
           state->data_stack_base-size, size)); \
439
1.34G
    state->data_stack_base -= size; \
440
1.34G
} while(0)
441
442
#define DATA_PUSH(x) \
443
178M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
178M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.20G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.20G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.32G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
341M
    do if (lastmark >= 0) { \
473
247M
        MARK_TRACE("push", (lastmark)); \
474
247M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
247M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
341M
    } while (0)
477
#define MARK_POP(lastmark) \
478
114M
    do if (lastmark >= 0) { \
479
102M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
102M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
102M
        MARK_TRACE("pop", (lastmark)); \
482
114M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.12M
    do if (lastmark >= 0) { \
485
882k
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
882k
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
882k
        MARK_TRACE("pop keep", (lastmark)); \
488
1.12M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
226M
    do if (lastmark >= 0) { \
491
145M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
145M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
145M
        MARK_TRACE("pop discard", (lastmark)); \
494
226M
    } while (0)
495
496
375M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
178M
#define JUMP_MAX_UNTIL_2     2
499
106M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
106M
#define JUMP_REPEAT          7
504
11.0M
#define JUMP_REPEAT_ONE_1    8
505
204M
#define JUMP_REPEAT_ONE_2    9
506
57.5M
#define JUMP_MIN_REPEAT_ONE  10
507
118M
#define JUMP_BRANCH          11
508
20.0M
#define JUMP_ASSERT          12
509
22.2M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
824M
    ctx->pattern = pattern; \
516
824M
    ctx->ptr = ptr; \
517
824M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
824M
    nextctx->pattern = nextpattern; \
519
824M
    nextctx->toplevel = toplevel_; \
520
824M
    nextctx->jump = jumpvalue; \
521
824M
    nextctx->last_ctx_pos = ctx_pos; \
522
824M
    pattern = nextpattern; \
523
824M
    ctx_pos = alloc_pos; \
524
824M
    ctx = nextctx; \
525
824M
    goto entrance; \
526
824M
    jumplabel: \
527
824M
    pattern = ctx->pattern; \
528
824M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
782M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
42.3M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.02G
    do {                                                           \
553
2.02G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.02G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.02G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.08G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.02G
        do {                               \
588
2.02G
            MAYBE_CHECK_SIGNALS;           \
589
2.02G
            goto *sre_targets[*pattern++]; \
590
2.02G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
375M
{
601
375M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
375M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
375M
    Py_ssize_t ret = 0;
604
375M
    int jump;
605
375M
    unsigned int sigcount = state->sigcount;
606
607
375M
    SRE(match_context)* ctx;
608
375M
    SRE(match_context)* nextctx;
609
375M
    INIT_TRACE(state);
610
611
375M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
375M
    DATA_ALLOC(SRE(match_context), ctx);
614
375M
    ctx->last_ctx_pos = -1;
615
375M
    ctx->jump = JUMP_NONE;
616
375M
    ctx->toplevel = toplevel;
617
375M
    ctx_pos = alloc_pos;
618
619
375M
#if USE_COMPUTED_GOTOS
620
375M
#include "sre_targets.h"
621
375M
#endif
622
623
1.20G
entrance:
624
625
1.20G
    ;  // Fashion statement.
626
1.20G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.20G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
49.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.41M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.41M
                   end - ptr, (size_t) pattern[3]));
634
3.41M
            RETURN_FAILURE;
635
3.41M
        }
636
45.7M
        pattern += pattern[1] + 1;
637
45.7M
    }
638
639
1.19G
#if USE_COMPUTED_GOTOS
640
1.19G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.19G
    {
647
648
1.19G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
479M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
479M
                   ptr, pattern[0]));
653
479M
            {
654
479M
                int i = pattern[0];
655
479M
                if (i & 1)
656
110M
                    state->lastindex = i/2 + 1;
657
479M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
474M
                    int j = state->lastmark + 1;
663
487M
                    while (j < i)
664
12.3M
                        state->mark[j++] = NULL;
665
474M
                    state->lastmark = i;
666
474M
                }
667
479M
                state->mark[i] = ptr;
668
479M
            }
669
479M
            pattern++;
670
479M
            DISPATCH;
671
672
479M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
122M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
122M
                   ptr, *pattern));
677
122M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
53.8M
                RETURN_FAILURE;
679
68.9M
            pattern++;
680
68.9M
            ptr++;
681
68.9M
            DISPATCH;
682
683
68.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
125M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
125M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
125M
            if (ctx->toplevel &&
698
32.6M
                ((state->match_all && ptr != state->end) ||
699
32.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
125M
            state->ptr = ptr;
704
125M
            RETURN_SUCCESS;
705
706
96.8M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
96.8M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
96.8M
            if (!SRE(at)(state, ptr, *pattern))
711
80.4M
                RETURN_FAILURE;
712
16.4M
            pattern++;
713
16.4M
            DISPATCH;
714
715
16.4M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
245M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
245M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
245M
            if (ptr >= end ||
749
244M
                !SRE(charset)(state, pattern + 1, *ptr))
750
92.2M
                RETURN_FAILURE;
751
153M
            pattern += pattern[0];
752
153M
            ptr++;
753
153M
            DISPATCH;
754
755
153M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.14M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.14M
                   pattern, ptr, pattern[0]));
758
6.14M
            if (ptr >= end ||
759
6.14M
                sre_lower_ascii(*ptr) != *pattern)
760
49.1k
                RETURN_FAILURE;
761
6.09M
            pattern++;
762
6.09M
            ptr++;
763
6.09M
            DISPATCH;
764
765
6.09M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
14
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
14
                   pattern, ptr, pattern[0]));
768
14
            if (ptr >= end ||
769
14
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
14
            pattern++;
772
14
            ptr++;
773
14
            DISPATCH;
774
775
14
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
14
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
14
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
14
            if (ptr >= end
828
10
                || !SRE(charset)(state, pattern+1,
829
10
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
8
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
64.4M
        TARGET(SRE_OP_JUMP):
845
64.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
64.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
64.4M
                   ptr, pattern[0]));
850
64.4M
            pattern += pattern[0];
851
64.4M
            DISPATCH;
852
853
90.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
90.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
90.3M
            LASTMARK_SAVE();
858
90.3M
            if (state->repeat)
859
48.5M
                MARK_PUSH(ctx->lastmark);
860
218M
            for (; pattern[0]; pattern += pattern[0]) {
861
190M
                if (pattern[1] == SRE_OP_LITERAL &&
862
96.1M
                    (ptr >= end ||
863
96.0M
                     (SRE_CODE) *ptr != pattern[2]))
864
49.6M
                    continue;
865
140M
                if (pattern[1] == SRE_OP_IN &&
866
41.9M
                    (ptr >= end ||
867
41.8M
                     !SRE(charset)(state, pattern + 3,
868
41.8M
                                   (SRE_CODE) *ptr)))
869
22.4M
                    continue;
870
118M
                state->ptr = ptr;
871
118M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
118M
                if (ret) {
873
62.2M
                    if (state->repeat)
874
42.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
62.2M
                    RETURN_ON_ERROR(ret);
876
62.2M
                    RETURN_SUCCESS;
877
62.2M
                }
878
56.1M
                if (state->repeat)
879
20.3k
                    MARK_POP_KEEP(ctx->lastmark);
880
56.1M
                LASTMARK_RESTORE();
881
56.1M
            }
882
28.0M
            if (state->repeat)
883
5.71M
                MARK_POP_DISCARD(ctx->lastmark);
884
28.0M
            RETURN_FAILURE;
885
886
448M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
448M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
448M
                   pattern[1], pattern[2]));
898
899
448M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
919k
                RETURN_FAILURE; /* cannot match */
901
902
447M
            state->ptr = ptr;
903
904
447M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
447M
            RETURN_ON_ERROR(ret);
906
447M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
447M
            ctx->count = ret;
908
447M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
447M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
260M
                RETURN_FAILURE;
917
918
186M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.48M
                ptr == state->end &&
920
70.4k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
70.4k
            {
922
                /* tail is empty.  we're finished */
923
70.4k
                state->ptr = ptr;
924
70.4k
                RETURN_SUCCESS;
925
70.4k
            }
926
927
186M
            LASTMARK_SAVE();
928
186M
            if (state->repeat)
929
92.6M
                MARK_PUSH(ctx->lastmark);
930
931
186M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
19.1M
                ctx->u.chr = pattern[pattern[0]+1];
935
19.1M
                for (;;) {
936
44.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
36.5M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
25.4M
                        ptr--;
939
25.4M
                        ctx->count--;
940
25.4M
                    }
941
19.1M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
8.05M
                        break;
943
11.0M
                    state->ptr = ptr;
944
11.0M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
11.0M
                            pattern+pattern[0]);
946
11.0M
                    if (ret) {
947
11.0M
                        if (state->repeat)
948
10.1M
                            MARK_POP_DISCARD(ctx->lastmark);
949
11.0M
                        RETURN_ON_ERROR(ret);
950
11.0M
                        RETURN_SUCCESS;
951
11.0M
                    }
952
725
                    if (state->repeat)
953
721
                        MARK_POP_KEEP(ctx->lastmark);
954
725
                    LASTMARK_RESTORE();
955
956
725
                    ptr--;
957
725
                    ctx->count--;
958
725
                }
959
8.05M
                if (state->repeat)
960
6.78M
                    MARK_POP_DISCARD(ctx->lastmark);
961
167M
            } else {
962
                /* general case */
963
259M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
204M
                    state->ptr = ptr;
965
204M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
204M
                            pattern+pattern[0]);
967
204M
                    if (ret) {
968
111M
                        if (state->repeat)
969
74.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
111M
                        RETURN_ON_ERROR(ret);
971
111M
                        RETURN_SUCCESS;
972
111M
                    }
973
92.7M
                    if (state->repeat)
974
1.09M
                        MARK_POP_KEEP(ctx->lastmark);
975
92.7M
                    LASTMARK_RESTORE();
976
977
92.7M
                    ptr--;
978
92.7M
                    ctx->count--;
979
92.7M
                }
980
55.4M
                if (state->repeat)
981
794k
                    MARK_POP_DISCARD(ctx->lastmark);
982
55.4M
            }
983
63.4M
            RETURN_FAILURE;
984
985
3.25M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.25M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.25M
                   pattern[1], pattern[2]));
997
998
3.25M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.25M
            state->ptr = ptr;
1002
1003
3.25M
            if (pattern[1] == 0)
1004
3.25M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.25M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.25M
            } else {
1028
                /* general case */
1029
3.25M
                LASTMARK_SAVE();
1030
3.25M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
57.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
57.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
57.5M
                    state->ptr = ptr;
1036
57.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
57.5M
                            pattern+pattern[0]);
1038
57.5M
                    if (ret) {
1039
3.25M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.25M
                        RETURN_ON_ERROR(ret);
1042
3.25M
                        RETURN_SUCCESS;
1043
3.25M
                    }
1044
54.2M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
54.2M
                    LASTMARK_RESTORE();
1047
1048
54.2M
                    state->ptr = ptr;
1049
54.2M
                    ret = SRE(count)(state, pattern+3, 1);
1050
54.2M
                    RETURN_ON_ERROR(ret);
1051
54.2M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
54.2M
                    if (ret == 0)
1053
4
                        break;
1054
54.2M
                    assert(ret == 1);
1055
54.2M
                    ptr++;
1056
54.2M
                    ctx->count++;
1057
54.2M
                }
1058
4
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
4
            }
1061
4
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
106M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
106M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
106M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
106M
            ctx->u.rep = repeat_pool_malloc(state);
1127
106M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
106M
            ctx->u.rep->count = -1;
1131
106M
            ctx->u.rep->pattern = pattern;
1132
106M
            ctx->u.rep->prev = state->repeat;
1133
106M
            ctx->u.rep->last_ptr = NULL;
1134
106M
            state->repeat = ctx->u.rep;
1135
1136
106M
            state->ptr = ptr;
1137
106M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
106M
            state->repeat = ctx->u.rep->prev;
1139
106M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
106M
            if (ret) {
1142
30.7M
                RETURN_ON_ERROR(ret);
1143
30.7M
                RETURN_SUCCESS;
1144
30.7M
            }
1145
75.4M
            RETURN_FAILURE;
1146
1147
192M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
192M
            ctx->u.rep = state->repeat;
1155
192M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
192M
            state->ptr = ptr;
1159
1160
192M
            ctx->count = ctx->u.rep->count+1;
1161
1162
192M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
192M
                   ptr, ctx->count));
1164
1165
192M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
192M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
13.9M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
178M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
178M
                ctx->u.rep->count = ctx->count;
1185
178M
                LASTMARK_SAVE();
1186
178M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
178M
                LAST_PTR_PUSH();
1189
178M
                ctx->u.rep->last_ptr = state->ptr;
1190
178M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
178M
                        ctx->u.rep->pattern+3);
1192
178M
                LAST_PTR_POP();
1193
178M
                if (ret) {
1194
85.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
85.6M
                    RETURN_ON_ERROR(ret);
1196
85.6M
                    RETURN_SUCCESS;
1197
85.6M
                }
1198
92.6M
                MARK_POP(ctx->lastmark);
1199
92.6M
                LASTMARK_RESTORE();
1200
92.6M
                ctx->u.rep->count = ctx->count-1;
1201
92.6M
                state->ptr = ptr;
1202
92.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
106M
            state->repeat = ctx->u.rep->prev;
1207
106M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
106M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
106M
            RETURN_ON_SUCCESS(ret);
1211
75.8M
            state->ptr = ptr;
1212
75.8M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
20.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
20.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
20.0M
                   ptr, pattern[1]));
1565
20.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
20.0M
            state->ptr = ptr - pattern[1];
1568
20.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
20.0M
            RETURN_ON_FAILURE(ret);
1570
16.1M
            pattern += pattern[0];
1571
16.1M
            DISPATCH;
1572
1573
22.2M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.2M
                   ptr, pattern[1]));
1578
22.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.2M
                state->ptr = ptr - pattern[1];
1580
22.2M
                LASTMARK_SAVE();
1581
22.2M
                if (state->repeat)
1582
22.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
44.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
44.4M
                if (ret) {
1586
14.7k
                    if (state->repeat)
1587
14.7k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.7k
                    RETURN_ON_ERROR(ret);
1589
14.7k
                    RETURN_FAILURE;
1590
14.7k
                }
1591
22.2M
                if (state->repeat)
1592
22.2M
                    MARK_POP(ctx->lastmark);
1593
22.2M
                LASTMARK_RESTORE();
1594
22.2M
            }
1595
22.2M
            pattern += pattern[0];
1596
22.2M
            DISPATCH;
1597
1598
22.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.20G
exit:
1620
1.20G
    ctx_pos = ctx->last_ctx_pos;
1621
1.20G
    jump = ctx->jump;
1622
1.20G
    DATA_POP_DISCARD(ctx);
1623
1.20G
    if (ctx_pos == -1) {
1624
375M
        state->sigcount = sigcount;
1625
375M
        return ret;
1626
375M
    }
1627
824M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
824M
    switch (jump) {
1630
178M
        case JUMP_MAX_UNTIL_2:
1631
178M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
178M
            goto jump_max_until_2;
1633
106M
        case JUMP_MAX_UNTIL_3:
1634
106M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
106M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
118M
        case JUMP_BRANCH:
1643
118M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
118M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
106M
        case JUMP_REPEAT:
1658
106M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
106M
            goto jump_repeat;
1660
11.0M
        case JUMP_REPEAT_ONE_1:
1661
11.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
11.0M
            goto jump_repeat_one_1;
1663
204M
        case JUMP_REPEAT_ONE_2:
1664
204M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
204M
            goto jump_repeat_one_2;
1666
57.5M
        case JUMP_MIN_REPEAT_ONE:
1667
57.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
57.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
20.0M
        case JUMP_ASSERT:
1673
20.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
20.0M
            goto jump_assert;
1675
22.2M
        case JUMP_ASSERT_NOT:
1676
22.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
824M
    }
1683
1684
0
    return ret; /* should never get here */
1685
824M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
166M
{
601
166M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
166M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
166M
    Py_ssize_t ret = 0;
604
166M
    int jump;
605
166M
    unsigned int sigcount = state->sigcount;
606
607
166M
    SRE(match_context)* ctx;
608
166M
    SRE(match_context)* nextctx;
609
166M
    INIT_TRACE(state);
610
611
166M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
166M
    DATA_ALLOC(SRE(match_context), ctx);
614
166M
    ctx->last_ctx_pos = -1;
615
166M
    ctx->jump = JUMP_NONE;
616
166M
    ctx->toplevel = toplevel;
617
166M
    ctx_pos = alloc_pos;
618
619
166M
#if USE_COMPUTED_GOTOS
620
166M
#include "sre_targets.h"
621
166M
#endif
622
623
422M
entrance:
624
625
422M
    ;  // Fashion statement.
626
422M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
422M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
31.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.30M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.30M
                   end - ptr, (size_t) pattern[3]));
634
3.30M
            RETURN_FAILURE;
635
3.30M
        }
636
27.8M
        pattern += pattern[1] + 1;
637
27.8M
    }
638
639
418M
#if USE_COMPUTED_GOTOS
640
418M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
418M
    {
647
648
418M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
177M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
177M
                   ptr, pattern[0]));
653
177M
            {
654
177M
                int i = pattern[0];
655
177M
                if (i & 1)
656
37.5M
                    state->lastindex = i/2 + 1;
657
177M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
173M
                    int j = state->lastmark + 1;
663
182M
                    while (j < i)
664
8.31M
                        state->mark[j++] = NULL;
665
173M
                    state->lastmark = i;
666
173M
                }
667
177M
                state->mark[i] = ptr;
668
177M
            }
669
177M
            pattern++;
670
177M
            DISPATCH;
671
672
177M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
67.6M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
67.6M
                   ptr, *pattern));
677
67.6M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
26.9M
                RETURN_FAILURE;
679
40.7M
            pattern++;
680
40.7M
            ptr++;
681
40.7M
            DISPATCH;
682
683
40.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
58.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
58.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
58.9M
            if (ctx->toplevel &&
698
20.4M
                ((state->match_all && ptr != state->end) ||
699
20.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
58.9M
            state->ptr = ptr;
704
58.9M
            RETURN_SUCCESS;
705
706
29.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
29.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
29.6M
            if (!SRE(at)(state, ptr, *pattern))
711
15.6M
                RETURN_FAILURE;
712
13.9M
            pattern++;
713
13.9M
            DISPATCH;
714
715
13.9M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
73.9M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
73.9M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
73.9M
            if (ptr >= end ||
749
73.7M
                !SRE(charset)(state, pattern + 1, *ptr))
750
16.1M
                RETURN_FAILURE;
751
57.8M
            pattern += pattern[0];
752
57.8M
            ptr++;
753
57.8M
            DISPATCH;
754
755
57.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
431k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
431k
                   pattern, ptr, pattern[0]));
758
431k
            if (ptr >= end ||
759
431k
                sre_lower_ascii(*ptr) != *pattern)
760
4.34k
                RETURN_FAILURE;
761
426k
            pattern++;
762
426k
            ptr++;
763
426k
            DISPATCH;
764
765
426k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
14
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
14
                   pattern, ptr, pattern[0]));
768
14
            if (ptr >= end ||
769
14
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
14
            pattern++;
772
14
            ptr++;
773
14
            DISPATCH;
774
775
14
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
14
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
14
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
14
            if (ptr >= end
828
10
                || !SRE(charset)(state, pattern+1,
829
10
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
8
                RETURN_FAILURE;
831
6
            pattern += pattern[0];
832
6
            ptr++;
833
6
            DISPATCH;
834
835
6
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
28.4M
        TARGET(SRE_OP_JUMP):
845
28.4M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
28.4M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
28.4M
                   ptr, pattern[0]));
850
28.4M
            pattern += pattern[0];
851
28.4M
            DISPATCH;
852
853
46.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
46.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
46.4M
            LASTMARK_SAVE();
858
46.4M
            if (state->repeat)
859
12.5M
                MARK_PUSH(ctx->lastmark);
860
126M
            for (; pattern[0]; pattern += pattern[0]) {
861
106M
                if (pattern[1] == SRE_OP_LITERAL &&
862
56.4M
                    (ptr >= end ||
863
56.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.5M
                    continue;
865
82.1M
                if (pattern[1] == SRE_OP_IN &&
866
12.0M
                    (ptr >= end ||
867
12.0M
                     !SRE(charset)(state, pattern + 3,
868
12.0M
                                   (SRE_CODE) *ptr)))
869
6.13M
                    continue;
870
76.0M
                state->ptr = ptr;
871
76.0M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
76.0M
                if (ret) {
873
27.0M
                    if (state->repeat)
874
12.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
27.0M
                    RETURN_ON_ERROR(ret);
876
27.0M
                    RETURN_SUCCESS;
877
27.0M
                }
878
48.9M
                if (state->repeat)
879
7.11k
                    MARK_POP_KEEP(ctx->lastmark);
880
48.9M
                LASTMARK_RESTORE();
881
48.9M
            }
882
19.4M
            if (state->repeat)
883
476k
                MARK_POP_DISCARD(ctx->lastmark);
884
19.4M
            RETURN_FAILURE;
885
886
174M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
174M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
174M
                   pattern[1], pattern[2]));
898
899
174M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
767k
                RETURN_FAILURE; /* cannot match */
901
902
174M
            state->ptr = ptr;
903
904
174M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
174M
            RETURN_ON_ERROR(ret);
906
174M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
174M
            ctx->count = ret;
908
174M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
174M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
113M
                RETURN_FAILURE;
917
918
60.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
503k
                ptr == state->end &&
920
50.3k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
50.3k
            {
922
                /* tail is empty.  we're finished */
923
50.3k
                state->ptr = ptr;
924
50.3k
                RETURN_SUCCESS;
925
50.3k
            }
926
927
60.1M
            LASTMARK_SAVE();
928
60.1M
            if (state->repeat)
929
35.0M
                MARK_PUSH(ctx->lastmark);
930
931
60.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.77M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.77M
                for (;;) {
936
15.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
13.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.72M
                        ptr--;
939
9.72M
                        ctx->count--;
940
9.72M
                    }
941
5.77M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.12M
                        break;
943
3.64M
                    state->ptr = ptr;
944
3.64M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.64M
                            pattern+pattern[0]);
946
3.64M
                    if (ret) {
947
3.64M
                        if (state->repeat)
948
2.77M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.64M
                        RETURN_ON_ERROR(ret);
950
3.64M
                        RETURN_SUCCESS;
951
3.64M
                    }
952
185
                    if (state->repeat)
953
181
                        MARK_POP_KEEP(ctx->lastmark);
954
185
                    LASTMARK_RESTORE();
955
956
185
                    ptr--;
957
185
                    ctx->count--;
958
185
                }
959
2.12M
                if (state->repeat)
960
867k
                    MARK_POP_DISCARD(ctx->lastmark);
961
54.3M
            } else {
962
                /* general case */
963
72.3M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
62.4M
                    state->ptr = ptr;
965
62.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
62.4M
                            pattern+pattern[0]);
967
62.4M
                    if (ret) {
968
44.5M
                        if (state->repeat)
969
30.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
44.5M
                        RETURN_ON_ERROR(ret);
971
44.5M
                        RETURN_SUCCESS;
972
44.5M
                    }
973
17.9M
                    if (state->repeat)
974
762k
                        MARK_POP_KEEP(ctx->lastmark);
975
17.9M
                    LASTMARK_RESTORE();
976
977
17.9M
                    ptr--;
978
17.9M
                    ctx->count--;
979
17.9M
                }
980
9.86M
                if (state->repeat)
981
563k
                    MARK_POP_DISCARD(ctx->lastmark);
982
9.86M
            }
983
11.9M
            RETURN_FAILURE;
984
985
2.10M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
2.10M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
2.10M
                   pattern[1], pattern[2]));
997
998
2.10M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
2.10M
            state->ptr = ptr;
1002
1003
2.10M
            if (pattern[1] == 0)
1004
2.10M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
2.10M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
2.10M
            } else {
1028
                /* general case */
1029
2.10M
                LASTMARK_SAVE();
1030
2.10M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
11.0M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
11.0M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
11.0M
                    state->ptr = ptr;
1036
11.0M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
11.0M
                            pattern+pattern[0]);
1038
11.0M
                    if (ret) {
1039
2.10M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
2.10M
                        RETURN_ON_ERROR(ret);
1042
2.10M
                        RETURN_SUCCESS;
1043
2.10M
                    }
1044
8.98M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
8.98M
                    LASTMARK_RESTORE();
1047
1048
8.98M
                    state->ptr = ptr;
1049
8.98M
                    ret = SRE(count)(state, pattern+3, 1);
1050
8.98M
                    RETURN_ON_ERROR(ret);
1051
8.98M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
8.98M
                    if (ret == 0)
1053
4
                        break;
1054
8.98M
                    assert(ret == 1);
1055
8.98M
                    ptr++;
1056
8.98M
                    ctx->count++;
1057
8.98M
                }
1058
4
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
4
            }
1061
4
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
22.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
22.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
22.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
22.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
22.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
22.6M
            ctx->u.rep->count = -1;
1131
22.6M
            ctx->u.rep->pattern = pattern;
1132
22.6M
            ctx->u.rep->prev = state->repeat;
1133
22.6M
            ctx->u.rep->last_ptr = NULL;
1134
22.6M
            state->repeat = ctx->u.rep;
1135
1136
22.6M
            state->ptr = ptr;
1137
22.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
22.6M
            state->repeat = ctx->u.rep->prev;
1139
22.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
22.6M
            if (ret) {
1142
9.69M
                RETURN_ON_ERROR(ret);
1143
9.69M
                RETURN_SUCCESS;
1144
9.69M
            }
1145
13.0M
            RETURN_FAILURE;
1146
1147
54.8M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
54.8M
            ctx->u.rep = state->repeat;
1155
54.8M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
54.8M
            state->ptr = ptr;
1159
1160
54.8M
            ctx->count = ctx->u.rep->count+1;
1161
1162
54.8M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
54.8M
                   ptr, ctx->count));
1164
1165
54.8M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
54.8M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.23M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
47.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
47.5M
                ctx->u.rep->count = ctx->count;
1185
47.5M
                LASTMARK_SAVE();
1186
47.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
47.5M
                LAST_PTR_PUSH();
1189
47.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
47.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
47.5M
                        ctx->u.rep->pattern+3);
1192
47.5M
                LAST_PTR_POP();
1193
47.5M
                if (ret) {
1194
31.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
31.8M
                    RETURN_ON_ERROR(ret);
1196
31.8M
                    RETURN_SUCCESS;
1197
31.8M
                }
1198
15.7M
                MARK_POP(ctx->lastmark);
1199
15.7M
                LASTMARK_RESTORE();
1200
15.7M
                ctx->u.rep->count = ctx->count-1;
1201
15.7M
                state->ptr = ptr;
1202
15.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
22.9M
            state->repeat = ctx->u.rep->prev;
1207
22.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
22.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
22.9M
            RETURN_ON_SUCCESS(ret);
1211
13.2M
            state->ptr = ptr;
1212
13.2M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.28M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.28M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.28M
                   ptr, pattern[1]));
1565
3.28M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.28M
            state->ptr = ptr - pattern[1];
1568
3.28M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.28M
            RETURN_ON_FAILURE(ret);
1570
3.10M
            pattern += pattern[0];
1571
3.10M
            DISPATCH;
1572
1573
5.86M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.86M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.86M
                   ptr, pattern[1]));
1578
5.86M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.86M
                state->ptr = ptr - pattern[1];
1580
5.86M
                LASTMARK_SAVE();
1581
5.86M
                if (state->repeat)
1582
5.86M
                    MARK_PUSH(ctx->lastmark);
1583
1584
11.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
11.7M
                if (ret) {
1586
2.15k
                    if (state->repeat)
1587
2.15k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.15k
                    RETURN_ON_ERROR(ret);
1589
2.15k
                    RETURN_FAILURE;
1590
2.15k
                }
1591
5.86M
                if (state->repeat)
1592
5.86M
                    MARK_POP(ctx->lastmark);
1593
5.86M
                LASTMARK_RESTORE();
1594
5.86M
            }
1595
5.86M
            pattern += pattern[0];
1596
5.86M
            DISPATCH;
1597
1598
5.86M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
422M
exit:
1620
422M
    ctx_pos = ctx->last_ctx_pos;
1621
422M
    jump = ctx->jump;
1622
422M
    DATA_POP_DISCARD(ctx);
1623
422M
    if (ctx_pos == -1) {
1624
166M
        state->sigcount = sigcount;
1625
166M
        return ret;
1626
166M
    }
1627
255M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
255M
    switch (jump) {
1630
47.5M
        case JUMP_MAX_UNTIL_2:
1631
47.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
47.5M
            goto jump_max_until_2;
1633
22.9M
        case JUMP_MAX_UNTIL_3:
1634
22.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
22.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
76.0M
        case JUMP_BRANCH:
1643
76.0M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
76.0M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
22.6M
        case JUMP_REPEAT:
1658
22.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
22.6M
            goto jump_repeat;
1660
3.64M
        case JUMP_REPEAT_ONE_1:
1661
3.64M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.64M
            goto jump_repeat_one_1;
1663
62.4M
        case JUMP_REPEAT_ONE_2:
1664
62.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
62.4M
            goto jump_repeat_one_2;
1666
11.0M
        case JUMP_MIN_REPEAT_ONE:
1667
11.0M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
11.0M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.28M
        case JUMP_ASSERT:
1673
3.28M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.28M
            goto jump_assert;
1675
5.86M
        case JUMP_ASSERT_NOT:
1676
5.86M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.86M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
255M
    }
1683
1684
0
    return ret; /* should never get here */
1685
255M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
167M
{
601
167M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
167M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
167M
    Py_ssize_t ret = 0;
604
167M
    int jump;
605
167M
    unsigned int sigcount = state->sigcount;
606
607
167M
    SRE(match_context)* ctx;
608
167M
    SRE(match_context)* nextctx;
609
167M
    INIT_TRACE(state);
610
611
167M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
167M
    DATA_ALLOC(SRE(match_context), ctx);
614
167M
    ctx->last_ctx_pos = -1;
615
167M
    ctx->jump = JUMP_NONE;
616
167M
    ctx->toplevel = toplevel;
617
167M
    ctx_pos = alloc_pos;
618
619
167M
#if USE_COMPUTED_GOTOS
620
167M
#include "sre_targets.h"
621
167M
#endif
622
623
545M
entrance:
624
625
545M
    ;  // Fashion statement.
626
545M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
545M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
10.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
102k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
102k
                   end - ptr, (size_t) pattern[3]));
634
102k
            RETURN_FAILURE;
635
102k
        }
636
10.8M
        pattern += pattern[1] + 1;
637
10.8M
    }
638
639
544M
#if USE_COMPUTED_GOTOS
640
544M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
544M
    {
647
648
544M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
235M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
235M
                   ptr, pattern[0]));
653
235M
            {
654
235M
                int i = pattern[0];
655
235M
                if (i & 1)
656
52.8M
                    state->lastindex = i/2 + 1;
657
235M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
235M
                    int j = state->lastmark + 1;
663
238M
                    while (j < i)
664
3.05M
                        state->mark[j++] = NULL;
665
235M
                    state->lastmark = i;
666
235M
                }
667
235M
                state->mark[i] = ptr;
668
235M
            }
669
235M
            pattern++;
670
235M
            DISPATCH;
671
672
235M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
29.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
29.2M
                   ptr, *pattern));
677
29.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
15.3M
                RETURN_FAILURE;
679
13.9M
            pattern++;
680
13.9M
            ptr++;
681
13.9M
            DISPATCH;
682
683
13.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
48.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
48.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
48.1M
            if (ctx->toplevel &&
698
7.14M
                ((state->match_all && ptr != state->end) ||
699
7.14M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
48.1M
            state->ptr = ptr;
704
48.1M
            RETURN_SUCCESS;
705
706
56.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
56.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
56.6M
            if (!SRE(at)(state, ptr, *pattern))
711
54.1M
                RETURN_FAILURE;
712
2.43M
            pattern++;
713
2.43M
            DISPATCH;
714
715
2.43M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
119M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
119M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
119M
            if (ptr >= end ||
749
118M
                !SRE(charset)(state, pattern + 1, *ptr))
750
60.3M
                RETURN_FAILURE;
751
59.1M
            pattern += pattern[0];
752
59.1M
            ptr++;
753
59.1M
            DISPATCH;
754
755
59.1M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.16M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.16M
                   pattern, ptr, pattern[0]));
758
3.16M
            if (ptr >= end ||
759
3.16M
                sre_lower_ascii(*ptr) != *pattern)
760
19.4k
                RETURN_FAILURE;
761
3.14M
            pattern++;
762
3.14M
            ptr++;
763
3.14M
            DISPATCH;
764
765
3.14M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
16.9M
        TARGET(SRE_OP_JUMP):
845
16.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
16.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
16.9M
                   ptr, pattern[0]));
850
16.9M
            pattern += pattern[0];
851
16.9M
            DISPATCH;
852
853
21.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
21.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
21.3M
            LASTMARK_SAVE();
858
21.3M
            if (state->repeat)
859
16.2M
                MARK_PUSH(ctx->lastmark);
860
44.4M
            for (; pattern[0]; pattern += pattern[0]) {
861
39.7M
                if (pattern[1] == SRE_OP_LITERAL &&
862
17.0M
                    (ptr >= end ||
863
17.0M
                     (SRE_CODE) *ptr != pattern[2]))
864
10.1M
                    continue;
865
29.6M
                if (pattern[1] == SRE_OP_IN &&
866
13.6M
                    (ptr >= end ||
867
13.6M
                     !SRE(charset)(state, pattern + 3,
868
13.6M
                                   (SRE_CODE) *ptr)))
869
6.68M
                    continue;
870
22.9M
                state->ptr = ptr;
871
22.9M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
22.9M
                if (ret) {
873
16.6M
                    if (state->repeat)
874
14.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
16.6M
                    RETURN_ON_ERROR(ret);
876
16.6M
                    RETURN_SUCCESS;
877
16.6M
                }
878
6.33M
                if (state->repeat)
879
6.97k
                    MARK_POP_KEEP(ctx->lastmark);
880
6.33M
                LASTMARK_RESTORE();
881
6.33M
            }
882
4.71M
            if (state->repeat)
883
1.90M
                MARK_POP_DISCARD(ctx->lastmark);
884
4.71M
            RETURN_FAILURE;
885
886
194M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
194M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
194M
                   pattern[1], pattern[2]));
898
899
194M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
140k
                RETURN_FAILURE; /* cannot match */
901
902
194M
            state->ptr = ptr;
903
904
194M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
194M
            RETURN_ON_ERROR(ret);
906
194M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
194M
            ctx->count = ret;
908
194M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
194M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
119M
                RETURN_FAILURE;
917
918
74.8M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
2.85M
                ptr == state->end &&
920
17.0k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
17.0k
            {
922
                /* tail is empty.  we're finished */
923
17.0k
                state->ptr = ptr;
924
17.0k
                RETURN_SUCCESS;
925
17.0k
            }
926
927
74.8M
            LASTMARK_SAVE();
928
74.8M
            if (state->repeat)
929
23.0M
                MARK_PUSH(ctx->lastmark);
930
931
74.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.69M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.69M
                for (;;) {
936
8.87M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.58M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
4.18M
                        ptr--;
939
4.18M
                        ctx->count--;
940
4.18M
                    }
941
4.69M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.28M
                        break;
943
3.40M
                    state->ptr = ptr;
944
3.40M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.40M
                            pattern+pattern[0]);
946
3.40M
                    if (ret) {
947
3.40M
                        if (state->repeat)
948
3.37M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.40M
                        RETURN_ON_ERROR(ret);
950
3.40M
                        RETURN_SUCCESS;
951
3.40M
                    }
952
256
                    if (state->repeat)
953
256
                        MARK_POP_KEEP(ctx->lastmark);
954
256
                    LASTMARK_RESTORE();
955
956
256
                    ptr--;
957
256
                    ctx->count--;
958
256
                }
959
1.28M
                if (state->repeat)
960
1.27M
                    MARK_POP_DISCARD(ctx->lastmark);
961
70.1M
            } else {
962
                /* general case */
963
130M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
91.5M
                    state->ptr = ptr;
965
91.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
91.5M
                            pattern+pattern[0]);
967
91.5M
                    if (ret) {
968
31.5M
                        if (state->repeat)
969
18.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
31.5M
                        RETURN_ON_ERROR(ret);
971
31.5M
                        RETURN_SUCCESS;
972
31.5M
                    }
973
59.9M
                    if (state->repeat)
974
227k
                        MARK_POP_KEEP(ctx->lastmark);
975
59.9M
                    LASTMARK_RESTORE();
976
977
59.9M
                    ptr--;
978
59.9M
                    ctx->count--;
979
59.9M
                }
980
38.6M
                if (state->repeat)
981
155k
                    MARK_POP_DISCARD(ctx->lastmark);
982
38.6M
            }
983
39.9M
            RETURN_FAILURE;
984
985
1.14M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
1.14M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
1.14M
                   pattern[1], pattern[2]));
997
998
1.14M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
1.14M
            state->ptr = ptr;
1002
1003
1.14M
            if (pattern[1] == 0)
1004
1.14M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
1.14M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
1.14M
            } else {
1028
                /* general case */
1029
1.14M
                LASTMARK_SAVE();
1030
1.14M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
39.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
39.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
39.5M
                    state->ptr = ptr;
1036
39.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
39.5M
                            pattern+pattern[0]);
1038
39.5M
                    if (ret) {
1039
1.14M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
1.14M
                        RETURN_ON_ERROR(ret);
1042
1.14M
                        RETURN_SUCCESS;
1043
1.14M
                    }
1044
38.4M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
38.4M
                    LASTMARK_RESTORE();
1047
1048
38.4M
                    state->ptr = ptr;
1049
38.4M
                    ret = SRE(count)(state, pattern+3, 1);
1050
38.4M
                    RETURN_ON_ERROR(ret);
1051
38.4M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
38.4M
                    if (ret == 0)
1053
0
                        break;
1054
38.4M
                    assert(ret == 1);
1055
38.4M
                    ptr++;
1056
38.4M
                    ctx->count++;
1057
38.4M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
61.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
61.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
61.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
61.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
61.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
61.5M
            ctx->u.rep->count = -1;
1131
61.5M
            ctx->u.rep->pattern = pattern;
1132
61.5M
            ctx->u.rep->prev = state->repeat;
1133
61.5M
            ctx->u.rep->last_ptr = NULL;
1134
61.5M
            state->repeat = ctx->u.rep;
1135
1136
61.5M
            state->ptr = ptr;
1137
61.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
61.5M
            state->repeat = ctx->u.rep->prev;
1139
61.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
61.5M
            if (ret) {
1142
8.55M
                RETURN_ON_ERROR(ret);
1143
8.55M
                RETURN_SUCCESS;
1144
8.55M
            }
1145
53.0M
            RETURN_FAILURE;
1146
1147
85.8M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
85.8M
            ctx->u.rep = state->repeat;
1155
85.8M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
85.8M
            state->ptr = ptr;
1159
1160
85.8M
            ctx->count = ctx->u.rep->count+1;
1161
1162
85.8M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
85.8M
                   ptr, ctx->count));
1164
1165
85.8M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
85.8M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.93M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
82.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
82.9M
                ctx->u.rep->count = ctx->count;
1185
82.9M
                LASTMARK_SAVE();
1186
82.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
82.9M
                LAST_PTR_PUSH();
1189
82.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
82.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
82.9M
                        ctx->u.rep->pattern+3);
1192
82.9M
                LAST_PTR_POP();
1193
82.9M
                if (ret) {
1194
24.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
24.2M
                    RETURN_ON_ERROR(ret);
1196
24.2M
                    RETURN_SUCCESS;
1197
24.2M
                }
1198
58.7M
                MARK_POP(ctx->lastmark);
1199
58.7M
                LASTMARK_RESTORE();
1200
58.7M
                ctx->u.rep->count = ctx->count-1;
1201
58.7M
                state->ptr = ptr;
1202
58.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
61.6M
            state->repeat = ctx->u.rep->prev;
1207
61.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
61.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
61.6M
            RETURN_ON_SUCCESS(ret);
1211
53.0M
            state->ptr = ptr;
1212
53.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
6.81M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
6.81M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
6.81M
                   ptr, pattern[1]));
1565
6.81M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
6.81M
            state->ptr = ptr - pattern[1];
1568
6.81M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
6.81M
            RETURN_ON_FAILURE(ret);
1570
4.44M
            pattern += pattern[0];
1571
4.44M
            DISPATCH;
1572
1573
6.85M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
6.85M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
6.85M
                   ptr, pattern[1]));
1578
6.85M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
6.85M
                state->ptr = ptr - pattern[1];
1580
6.85M
                LASTMARK_SAVE();
1581
6.85M
                if (state->repeat)
1582
6.85M
                    MARK_PUSH(ctx->lastmark);
1583
1584
13.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
13.7M
                if (ret) {
1586
6.67k
                    if (state->repeat)
1587
6.67k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.67k
                    RETURN_ON_ERROR(ret);
1589
6.67k
                    RETURN_FAILURE;
1590
6.67k
                }
1591
6.84M
                if (state->repeat)
1592
6.84M
                    MARK_POP(ctx->lastmark);
1593
6.84M
                LASTMARK_RESTORE();
1594
6.84M
            }
1595
6.84M
            pattern += pattern[0];
1596
6.84M
            DISPATCH;
1597
1598
6.84M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
545M
exit:
1620
545M
    ctx_pos = ctx->last_ctx_pos;
1621
545M
    jump = ctx->jump;
1622
545M
    DATA_POP_DISCARD(ctx);
1623
545M
    if (ctx_pos == -1) {
1624
167M
        state->sigcount = sigcount;
1625
167M
        return ret;
1626
167M
    }
1627
377M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
377M
    switch (jump) {
1630
82.9M
        case JUMP_MAX_UNTIL_2:
1631
82.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
82.9M
            goto jump_max_until_2;
1633
61.6M
        case JUMP_MAX_UNTIL_3:
1634
61.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
61.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
22.9M
        case JUMP_BRANCH:
1643
22.9M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
22.9M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
61.5M
        case JUMP_REPEAT:
1658
61.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
61.5M
            goto jump_repeat;
1660
3.40M
        case JUMP_REPEAT_ONE_1:
1661
3.40M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.40M
            goto jump_repeat_one_1;
1663
91.5M
        case JUMP_REPEAT_ONE_2:
1664
91.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
91.5M
            goto jump_repeat_one_2;
1666
39.5M
        case JUMP_MIN_REPEAT_ONE:
1667
39.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
39.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
6.81M
        case JUMP_ASSERT:
1673
6.81M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
6.81M
            goto jump_assert;
1675
6.85M
        case JUMP_ASSERT_NOT:
1676
6.85M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
6.85M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
377M
    }
1683
1684
0
    return ret; /* should never get here */
1685
377M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
40.8M
{
601
40.8M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
40.8M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
40.8M
    Py_ssize_t ret = 0;
604
40.8M
    int jump;
605
40.8M
    unsigned int sigcount = state->sigcount;
606
607
40.8M
    SRE(match_context)* ctx;
608
40.8M
    SRE(match_context)* nextctx;
609
40.8M
    INIT_TRACE(state);
610
611
40.8M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
40.8M
    DATA_ALLOC(SRE(match_context), ctx);
614
40.8M
    ctx->last_ctx_pos = -1;
615
40.8M
    ctx->jump = JUMP_NONE;
616
40.8M
    ctx->toplevel = toplevel;
617
40.8M
    ctx_pos = alloc_pos;
618
619
40.8M
#if USE_COMPUTED_GOTOS
620
40.8M
#include "sre_targets.h"
621
40.8M
#endif
622
623
232M
entrance:
624
625
232M
    ;  // Fashion statement.
626
232M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
232M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
7.07M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.74k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.74k
                   end - ptr, (size_t) pattern[3]));
634
3.74k
            RETURN_FAILURE;
635
3.74k
        }
636
7.07M
        pattern += pattern[1] + 1;
637
7.07M
    }
638
639
232M
#if USE_COMPUTED_GOTOS
640
232M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
232M
    {
647
648
232M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
66.8M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
66.8M
                   ptr, pattern[0]));
653
66.8M
            {
654
66.8M
                int i = pattern[0];
655
66.8M
                if (i & 1)
656
20.1M
                    state->lastindex = i/2 + 1;
657
66.8M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
65.8M
                    int j = state->lastmark + 1;
663
66.8M
                    while (j < i)
664
1.02M
                        state->mark[j++] = NULL;
665
65.8M
                    state->lastmark = i;
666
65.8M
                }
667
66.8M
                state->mark[i] = ptr;
668
66.8M
            }
669
66.8M
            pattern++;
670
66.8M
            DISPATCH;
671
672
66.8M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
25.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
25.8M
                   ptr, *pattern));
677
25.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
11.6M
                RETURN_FAILURE;
679
14.2M
            pattern++;
680
14.2M
            ptr++;
681
14.2M
            DISPATCH;
682
683
14.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
18.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
18.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
18.9M
            if (ctx->toplevel &&
698
5.05M
                ((state->match_all && ptr != state->end) ||
699
5.05M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
18.9M
            state->ptr = ptr;
704
18.9M
            RETURN_SUCCESS;
705
706
10.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
10.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
10.5M
            if (!SRE(at)(state, ptr, *pattern))
711
10.5M
                RETURN_FAILURE;
712
30.3k
            pattern++;
713
30.3k
            DISPATCH;
714
715
30.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
51.9M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
51.9M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
51.9M
            if (ptr >= end ||
749
51.9M
                !SRE(charset)(state, pattern + 1, *ptr))
750
15.7M
                RETURN_FAILURE;
751
36.2M
            pattern += pattern[0];
752
36.2M
            ptr++;
753
36.2M
            DISPATCH;
754
755
36.2M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.54M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.54M
                   pattern, ptr, pattern[0]));
758
2.54M
            if (ptr >= end ||
759
2.54M
                sre_lower_ascii(*ptr) != *pattern)
760
25.3k
                RETURN_FAILURE;
761
2.52M
            pattern++;
762
2.52M
            ptr++;
763
2.52M
            DISPATCH;
764
765
2.52M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.0M
        TARGET(SRE_OP_JUMP):
845
19.0M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.0M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.0M
                   ptr, pattern[0]));
850
19.0M
            pattern += pattern[0];
851
19.0M
            DISPATCH;
852
853
22.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
22.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
22.4M
            LASTMARK_SAVE();
858
22.4M
            if (state->repeat)
859
19.7M
                MARK_PUSH(ctx->lastmark);
860
47.8M
            for (; pattern[0]; pattern += pattern[0]) {
861
43.8M
                if (pattern[1] == SRE_OP_LITERAL &&
862
22.6M
                    (ptr >= end ||
863
22.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
14.9M
                    continue;
865
28.9M
                if (pattern[1] == SRE_OP_IN &&
866
16.2M
                    (ptr >= end ||
867
16.2M
                     !SRE(charset)(state, pattern + 3,
868
16.2M
                                   (SRE_CODE) *ptr)))
869
9.58M
                    continue;
870
19.4M
                state->ptr = ptr;
871
19.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
19.4M
                if (ret) {
873
18.4M
                    if (state->repeat)
874
16.4M
                        MARK_POP_DISCARD(ctx->lastmark);
875
18.4M
                    RETURN_ON_ERROR(ret);
876
18.4M
                    RETURN_SUCCESS;
877
18.4M
                }
878
921k
                if (state->repeat)
879
6.25k
                    MARK_POP_KEEP(ctx->lastmark);
880
921k
                LASTMARK_RESTORE();
881
921k
            }
882
3.97M
            if (state->repeat)
883
3.33M
                MARK_POP_DISCARD(ctx->lastmark);
884
3.97M
            RETURN_FAILURE;
885
886
78.5M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
78.5M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
78.5M
                   pattern[1], pattern[2]));
898
899
78.5M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
11.2k
                RETURN_FAILURE; /* cannot match */
901
902
78.5M
            state->ptr = ptr;
903
904
78.5M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
78.5M
            RETURN_ON_ERROR(ret);
906
78.5M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
78.5M
            ctx->count = ret;
908
78.5M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
78.5M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
27.2M
                RETURN_FAILURE;
917
918
51.2M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.12M
                ptr == state->end &&
920
3.17k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.17k
            {
922
                /* tail is empty.  we're finished */
923
3.17k
                state->ptr = ptr;
924
3.17k
                RETURN_SUCCESS;
925
3.17k
            }
926
927
51.2M
            LASTMARK_SAVE();
928
51.2M
            if (state->repeat)
929
34.4M
                MARK_PUSH(ctx->lastmark);
930
931
51.2M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
8.68M
                ctx->u.chr = pattern[pattern[0]+1];
935
8.68M
                for (;;) {
936
20.2M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
15.6M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.5M
                        ptr--;
939
11.5M
                        ctx->count--;
940
11.5M
                    }
941
8.68M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
4.64M
                        break;
943
4.04M
                    state->ptr = ptr;
944
4.04M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.04M
                            pattern+pattern[0]);
946
4.04M
                    if (ret) {
947
4.04M
                        if (state->repeat)
948
4.04M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.04M
                        RETURN_ON_ERROR(ret);
950
4.04M
                        RETURN_SUCCESS;
951
4.04M
                    }
952
284
                    if (state->repeat)
953
284
                        MARK_POP_KEEP(ctx->lastmark);
954
284
                    LASTMARK_RESTORE();
955
956
284
                    ptr--;
957
284
                    ctx->count--;
958
284
                }
959
4.64M
                if (state->repeat)
960
4.63M
                    MARK_POP_DISCARD(ctx->lastmark);
961
42.5M
            } else {
962
                /* general case */
963
57.3M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
50.4M
                    state->ptr = ptr;
965
50.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
50.4M
                            pattern+pattern[0]);
967
50.4M
                    if (ret) {
968
35.6M
                        if (state->repeat)
969
25.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
35.6M
                        RETURN_ON_ERROR(ret);
971
35.6M
                        RETURN_SUCCESS;
972
35.6M
                    }
973
14.8M
                    if (state->repeat)
974
109k
                        MARK_POP_KEEP(ctx->lastmark);
975
14.8M
                    LASTMARK_RESTORE();
976
977
14.8M
                    ptr--;
978
14.8M
                    ctx->count--;
979
14.8M
                }
980
6.94M
                if (state->repeat)
981
75.3k
                    MARK_POP_DISCARD(ctx->lastmark);
982
6.94M
            }
983
11.5M
            RETURN_FAILURE;
984
985
11.6k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
11.6k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
11.6k
                   pattern[1], pattern[2]));
997
998
11.6k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
11.6k
            state->ptr = ptr;
1002
1003
11.6k
            if (pattern[1] == 0)
1004
11.6k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
11.6k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
11.6k
            } else {
1028
                /* general case */
1029
11.6k
                LASTMARK_SAVE();
1030
11.6k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
6.86M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
6.86M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
6.86M
                    state->ptr = ptr;
1036
6.86M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
6.86M
                            pattern+pattern[0]);
1038
6.86M
                    if (ret) {
1039
11.6k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
11.6k
                        RETURN_ON_ERROR(ret);
1042
11.6k
                        RETURN_SUCCESS;
1043
11.6k
                    }
1044
6.85M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
6.85M
                    LASTMARK_RESTORE();
1047
1048
6.85M
                    state->ptr = ptr;
1049
6.85M
                    ret = SRE(count)(state, pattern+3, 1);
1050
6.85M
                    RETURN_ON_ERROR(ret);
1051
6.85M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
6.85M
                    if (ret == 0)
1053
0
                        break;
1054
6.85M
                    assert(ret == 1);
1055
6.85M
                    ptr++;
1056
6.85M
                    ctx->count++;
1057
6.85M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
21.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
21.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
21.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
21.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
21.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
21.9M
            ctx->u.rep->count = -1;
1131
21.9M
            ctx->u.rep->pattern = pattern;
1132
21.9M
            ctx->u.rep->prev = state->repeat;
1133
21.9M
            ctx->u.rep->last_ptr = NULL;
1134
21.9M
            state->repeat = ctx->u.rep;
1135
1136
21.9M
            state->ptr = ptr;
1137
21.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
21.9M
            state->repeat = ctx->u.rep->prev;
1139
21.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
21.9M
            if (ret) {
1142
12.4M
                RETURN_ON_ERROR(ret);
1143
12.4M
                RETURN_SUCCESS;
1144
12.4M
            }
1145
9.43M
            RETURN_FAILURE;
1146
1147
51.4M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
51.4M
            ctx->u.rep = state->repeat;
1155
51.4M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
51.4M
            state->ptr = ptr;
1159
1160
51.4M
            ctx->count = ctx->u.rep->count+1;
1161
1162
51.4M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
51.4M
                   ptr, ctx->count));
1164
1165
51.4M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
51.4M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.72M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
47.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
47.7M
                ctx->u.rep->count = ctx->count;
1185
47.7M
                LASTMARK_SAVE();
1186
47.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
47.7M
                LAST_PTR_PUSH();
1189
47.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
47.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
47.7M
                        ctx->u.rep->pattern+3);
1192
47.7M
                LAST_PTR_POP();
1193
47.7M
                if (ret) {
1194
29.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
29.4M
                    RETURN_ON_ERROR(ret);
1196
29.4M
                    RETURN_SUCCESS;
1197
29.4M
                }
1198
18.2M
                MARK_POP(ctx->lastmark);
1199
18.2M
                LASTMARK_RESTORE();
1200
18.2M
                ctx->u.rep->count = ctx->count-1;
1201
18.2M
                state->ptr = ptr;
1202
18.2M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
21.9M
            state->repeat = ctx->u.rep->prev;
1207
21.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
21.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
21.9M
            RETURN_ON_SUCCESS(ret);
1211
9.47M
            state->ptr = ptr;
1212
9.47M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
9.99M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
9.99M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
9.99M
                   ptr, pattern[1]));
1565
9.99M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
9.99M
            state->ptr = ptr - pattern[1];
1568
9.99M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
9.99M
            RETURN_ON_FAILURE(ret);
1570
8.57M
            pattern += pattern[0];
1571
8.57M
            DISPATCH;
1572
1573
9.50M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
9.50M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
9.50M
                   ptr, pattern[1]));
1578
9.50M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
9.50M
                state->ptr = ptr - pattern[1];
1580
9.50M
                LASTMARK_SAVE();
1581
9.50M
                if (state->repeat)
1582
9.50M
                    MARK_PUSH(ctx->lastmark);
1583
1584
19.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
19.0M
                if (ret) {
1586
5.94k
                    if (state->repeat)
1587
5.94k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.94k
                    RETURN_ON_ERROR(ret);
1589
5.94k
                    RETURN_FAILURE;
1590
5.94k
                }
1591
9.50M
                if (state->repeat)
1592
9.50M
                    MARK_POP(ctx->lastmark);
1593
9.50M
                LASTMARK_RESTORE();
1594
9.50M
            }
1595
9.50M
            pattern += pattern[0];
1596
9.50M
            DISPATCH;
1597
1598
9.50M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
232M
exit:
1620
232M
    ctx_pos = ctx->last_ctx_pos;
1621
232M
    jump = ctx->jump;
1622
232M
    DATA_POP_DISCARD(ctx);
1623
232M
    if (ctx_pos == -1) {
1624
40.8M
        state->sigcount = sigcount;
1625
40.8M
        return ret;
1626
40.8M
    }
1627
191M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
191M
    switch (jump) {
1630
47.7M
        case JUMP_MAX_UNTIL_2:
1631
47.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
47.7M
            goto jump_max_until_2;
1633
21.9M
        case JUMP_MAX_UNTIL_3:
1634
21.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
21.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
19.4M
        case JUMP_BRANCH:
1643
19.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
19.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
21.9M
        case JUMP_REPEAT:
1658
21.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
21.9M
            goto jump_repeat;
1660
4.04M
        case JUMP_REPEAT_ONE_1:
1661
4.04M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.04M
            goto jump_repeat_one_1;
1663
50.4M
        case JUMP_REPEAT_ONE_2:
1664
50.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
50.4M
            goto jump_repeat_one_2;
1666
6.86M
        case JUMP_MIN_REPEAT_ONE:
1667
6.86M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
6.86M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
9.99M
        case JUMP_ASSERT:
1673
9.99M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
9.99M
            goto jump_assert;
1675
9.50M
        case JUMP_ASSERT_NOT:
1676
9.50M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
9.50M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
191M
    }
1683
1684
0
    return ret; /* should never get here */
1685
191M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
244M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
88.5M
{
1694
88.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
88.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
88.5M
    Py_ssize_t status = 0;
1697
88.5M
    Py_ssize_t prefix_len = 0;
1698
88.5M
    Py_ssize_t prefix_skip = 0;
1699
88.5M
    SRE_CODE* prefix = NULL;
1700
88.5M
    SRE_CODE* charset = NULL;
1701
88.5M
    SRE_CODE* overlap = NULL;
1702
88.5M
    int flags = 0;
1703
88.5M
    INIT_TRACE(state);
1704
1705
88.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
88.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
88.5M
        flags = pattern[2];
1713
1714
88.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.15M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.15M
                   end - ptr, (size_t) pattern[3]));
1717
4.15M
            return 0;
1718
4.15M
        }
1719
84.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
6.79M
            end -= pattern[3] - 1;
1723
6.79M
            if (end <= ptr)
1724
0
                end = ptr;
1725
6.79M
        }
1726
1727
84.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
6.80M
            prefix_len = pattern[5];
1731
6.80M
            prefix_skip = pattern[6];
1732
6.80M
            prefix = pattern + 7;
1733
6.80M
            overlap = prefix + prefix_len - 1;
1734
77.6M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
69.9M
            charset = pattern + 5;
1738
1739
84.4M
        pattern += 1 + pattern[1];
1740
84.4M
    }
1741
1742
84.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
84.4M
           prefix, prefix_len, prefix_skip));
1744
84.4M
    TRACE(("charset = %p\n", charset));
1745
1746
84.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.10M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.11M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.11M
#endif
1753
4.11M
        end = (SRE_CHAR *)state->end;
1754
4.11M
        state->must_advance = 0;
1755
6.47M
        while (ptr < end) {
1756
102M
            while (*ptr != c) {
1757
96.0M
                if (++ptr >= end)
1758
250k
                    return 0;
1759
96.0M
            }
1760
6.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.14M
            state->start = ptr;
1762
6.14M
            state->ptr = ptr + prefix_skip;
1763
6.14M
            if (flags & SRE_INFO_LITERAL)
1764
7.29k
                return 1; /* we got all of it */
1765
6.13M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.13M
            if (status != 0)
1767
5.77M
                return status;
1768
367k
            ++ptr;
1769
367k
            RESET_CAPTURE_GROUP();
1770
367k
        }
1771
78.0k
        return 0;
1772
4.11M
    }
1773
1774
78.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
693k
        Py_ssize_t i = 0;
1778
1779
693k
        end = (SRE_CHAR *)state->end;
1780
693k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.34M
        for (i = 0; i < prefix_len; i++)
1784
894k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
447k
#endif
1787
1.42M
        while (ptr < end) {
1788
1.42M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.83M
            while (*ptr++ != c) {
1790
7.41M
                if (ptr >= end)
1791
323
                    return 0;
1792
7.41M
            }
1793
1.42M
            if (ptr >= end)
1794
58
                return 0;
1795
1796
1.42M
            i = 1;
1797
1.42M
            state->must_advance = 0;
1798
1.42M
            do {
1799
1.42M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.27M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.27M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.27M
                    state->start = ptr - (prefix_len - 1);
1808
1.27M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.27M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.27M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.27M
                    if (status != 0)
1813
692k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
581k
                    if (++ptr >= end)
1816
68
                        return 0;
1817
581k
                    RESET_CAPTURE_GROUP();
1818
581k
                }
1819
730k
                i = overlap[i];
1820
730k
            } while (i != 0);
1821
1.42M
        }
1822
0
        return 0;
1823
693k
    }
1824
1825
77.6M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
69.9M
        end = (SRE_CHAR *)state->end;
1828
69.9M
        state->must_advance = 0;
1829
72.2M
        for (;;) {
1830
279M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
207M
                ptr++;
1832
72.2M
            if (ptr >= end)
1833
2.70M
                return 0;
1834
69.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
69.5M
            state->start = ptr;
1836
69.5M
            state->ptr = ptr;
1837
69.5M
            status = SRE(match)(state, pattern, 0);
1838
69.5M
            if (status != 0)
1839
67.2M
                break;
1840
2.24M
            ptr++;
1841
2.24M
            RESET_CAPTURE_GROUP();
1842
2.24M
        }
1843
69.9M
    } else {
1844
        /* general case */
1845
7.64M
        assert(ptr <= end);
1846
7.64M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
7.64M
        state->start = state->ptr = ptr;
1848
7.64M
        status = SRE(match)(state, pattern, 1);
1849
7.64M
        state->must_advance = 0;
1850
7.64M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.53M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
63
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
3.53M
        {
1854
3.53M
            state->start = state->ptr = ptr = end;
1855
3.53M
            return 0;
1856
3.53M
        }
1857
245M
        while (status == 0 && ptr < end) {
1858
241M
            ptr++;
1859
241M
            RESET_CAPTURE_GROUP();
1860
241M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
241M
            state->start = state->ptr = ptr;
1862
241M
            status = SRE(match)(state, pattern, 0);
1863
241M
        }
1864
4.10M
    }
1865
1866
71.3M
    return status;
1867
77.6M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
44.3M
{
1694
44.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
44.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
44.3M
    Py_ssize_t status = 0;
1697
44.3M
    Py_ssize_t prefix_len = 0;
1698
44.3M
    Py_ssize_t prefix_skip = 0;
1699
44.3M
    SRE_CODE* prefix = NULL;
1700
44.3M
    SRE_CODE* charset = NULL;
1701
44.3M
    SRE_CODE* overlap = NULL;
1702
44.3M
    int flags = 0;
1703
44.3M
    INIT_TRACE(state);
1704
1705
44.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
44.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
44.3M
        flags = pattern[2];
1713
1714
44.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.07M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.07M
                   end - ptr, (size_t) pattern[3]));
1717
4.07M
            return 0;
1718
4.07M
        }
1719
40.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.95M
            end -= pattern[3] - 1;
1723
1.95M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.95M
        }
1726
1727
40.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.95M
            prefix_len = pattern[5];
1731
1.95M
            prefix_skip = pattern[6];
1732
1.95M
            prefix = pattern + 7;
1733
1.95M
            overlap = prefix + prefix_len - 1;
1734
38.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
32.8M
            charset = pattern + 5;
1738
1739
40.3M
        pattern += 1 + pattern[1];
1740
40.3M
    }
1741
1742
40.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
40.3M
           prefix, prefix_len, prefix_skip));
1744
40.3M
    TRACE(("charset = %p\n", charset));
1745
1746
40.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.91M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.91M
#if SIZEOF_SRE_CHAR < 4
1750
1.91M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.91M
#endif
1753
1.91M
        end = (SRE_CHAR *)state->end;
1754
1.91M
        state->must_advance = 0;
1755
2.11M
        while (ptr < end) {
1756
27.5M
            while (*ptr != c) {
1757
25.6M
                if (++ptr >= end)
1758
178k
                    return 0;
1759
25.6M
            }
1760
1.86M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.86M
            state->start = ptr;
1762
1.86M
            state->ptr = ptr + prefix_skip;
1763
1.86M
            if (flags & SRE_INFO_LITERAL)
1764
670
                return 1; /* we got all of it */
1765
1.86M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.86M
            if (status != 0)
1767
1.66M
                return status;
1768
195k
            ++ptr;
1769
195k
            RESET_CAPTURE_GROUP();
1770
195k
        }
1771
71.5k
        return 0;
1772
1.91M
    }
1773
1774
38.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
39.5k
        Py_ssize_t i = 0;
1778
1779
39.5k
        end = (SRE_CHAR *)state->end;
1780
39.5k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
39.5k
#if SIZEOF_SRE_CHAR < 4
1783
118k
        for (i = 0; i < prefix_len; i++)
1784
79.0k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
39.5k
#endif
1787
95.7k
        while (ptr < end) {
1788
95.7k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.83M
            while (*ptr++ != c) {
1790
1.73M
                if (ptr >= end)
1791
61
                    return 0;
1792
1.73M
            }
1793
95.6k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
95.6k
            i = 1;
1797
95.6k
            state->must_advance = 0;
1798
95.9k
            do {
1799
95.9k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
88.8k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
88.8k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
88.8k
                    state->start = ptr - (prefix_len - 1);
1808
88.8k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
88.8k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
88.8k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
88.8k
                    if (status != 0)
1813
39.4k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
49.4k
                    if (++ptr >= end)
1816
28
                        return 0;
1817
49.3k
                    RESET_CAPTURE_GROUP();
1818
49.3k
                }
1819
56.5k
                i = overlap[i];
1820
56.5k
            } while (i != 0);
1821
95.6k
        }
1822
0
        return 0;
1823
39.5k
    }
1824
1825
38.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
32.8M
        end = (SRE_CHAR *)state->end;
1828
32.8M
        state->must_advance = 0;
1829
34.2M
        for (;;) {
1830
77.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
42.9M
                ptr++;
1832
34.2M
            if (ptr >= end)
1833
1.88M
                return 0;
1834
32.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
32.3M
            state->start = ptr;
1836
32.3M
            state->ptr = ptr;
1837
32.3M
            status = SRE(match)(state, pattern, 0);
1838
32.3M
            if (status != 0)
1839
30.9M
                break;
1840
1.36M
            ptr++;
1841
1.36M
            RESET_CAPTURE_GROUP();
1842
1.36M
        }
1843
32.8M
    } else {
1844
        /* general case */
1845
5.47M
        assert(ptr <= end);
1846
5.47M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.47M
        state->start = state->ptr = ptr;
1848
5.47M
        status = SRE(match)(state, pattern, 1);
1849
5.47M
        state->must_advance = 0;
1850
5.47M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.25M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
23
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.25M
        {
1854
2.25M
            state->start = state->ptr = ptr = end;
1855
2.25M
            return 0;
1856
2.25M
        }
1857
99.0M
        while (status == 0 && ptr < end) {
1858
95.7M
            ptr++;
1859
95.7M
            RESET_CAPTURE_GROUP();
1860
95.7M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
95.7M
            state->start = state->ptr = ptr;
1862
95.7M
            status = SRE(match)(state, pattern, 0);
1863
95.7M
        }
1864
3.22M
    }
1865
1866
34.2M
    return status;
1867
38.3M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
38.8M
{
1694
38.8M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
38.8M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
38.8M
    Py_ssize_t status = 0;
1697
38.8M
    Py_ssize_t prefix_len = 0;
1698
38.8M
    Py_ssize_t prefix_skip = 0;
1699
38.8M
    SRE_CODE* prefix = NULL;
1700
38.8M
    SRE_CODE* charset = NULL;
1701
38.8M
    SRE_CODE* overlap = NULL;
1702
38.8M
    int flags = 0;
1703
38.8M
    INIT_TRACE(state);
1704
1705
38.8M
    if (ptr > end)
1706
0
        return 0;
1707
1708
38.8M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
38.8M
        flags = pattern[2];
1713
1714
38.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
76.4k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
76.4k
                   end - ptr, (size_t) pattern[3]));
1717
76.4k
            return 0;
1718
76.4k
        }
1719
38.7M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.59M
            end -= pattern[3] - 1;
1723
2.59M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.59M
        }
1726
1727
38.7M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.60M
            prefix_len = pattern[5];
1731
2.60M
            prefix_skip = pattern[6];
1732
2.60M
            prefix = pattern + 7;
1733
2.60M
            overlap = prefix + prefix_len - 1;
1734
36.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
34.1M
            charset = pattern + 5;
1738
1739
38.7M
        pattern += 1 + pattern[1];
1740
38.7M
    }
1741
1742
38.7M
    TRACE(("prefix = %p %zd %zd\n",
1743
38.7M
           prefix, prefix_len, prefix_skip));
1744
38.7M
    TRACE(("charset = %p\n", charset));
1745
1746
38.7M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.19M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.19M
#if SIZEOF_SRE_CHAR < 4
1750
2.19M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.19M
#endif
1753
2.19M
        end = (SRE_CHAR *)state->end;
1754
2.19M
        state->must_advance = 0;
1755
2.30M
        while (ptr < end) {
1756
48.8M
            while (*ptr != c) {
1757
46.5M
                if (++ptr >= end)
1758
67.9k
                    return 0;
1759
46.5M
            }
1760
2.23M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.23M
            state->start = ptr;
1762
2.23M
            state->ptr = ptr + prefix_skip;
1763
2.23M
            if (flags & SRE_INFO_LITERAL)
1764
5.28k
                return 1; /* we got all of it */
1765
2.22M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.22M
            if (status != 0)
1767
2.11M
                return status;
1768
107k
            ++ptr;
1769
107k
            RESET_CAPTURE_GROUP();
1770
107k
        }
1771
5.53k
        return 0;
1772
2.19M
    }
1773
1774
36.5M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
407k
        Py_ssize_t i = 0;
1778
1779
407k
        end = (SRE_CHAR *)state->end;
1780
407k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
407k
#if SIZEOF_SRE_CHAR < 4
1783
1.22M
        for (i = 0; i < prefix_len; i++)
1784
815k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
407k
#endif
1787
679k
        while (ptr < end) {
1788
679k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.30M
            while (*ptr++ != c) {
1790
1.62M
                if (ptr >= end)
1791
122
                    return 0;
1792
1.62M
            }
1793
679k
            if (ptr >= end)
1794
20
                return 0;
1795
1796
679k
            i = 1;
1797
679k
            state->must_advance = 0;
1798
680k
            do {
1799
680k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
661k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
661k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
661k
                    state->start = ptr - (prefix_len - 1);
1808
661k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
661k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
661k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
661k
                    if (status != 0)
1813
407k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
254k
                    if (++ptr >= end)
1816
23
                        return 0;
1817
254k
                    RESET_CAPTURE_GROUP();
1818
254k
                }
1819
272k
                i = overlap[i];
1820
272k
            } while (i != 0);
1821
679k
        }
1822
0
        return 0;
1823
407k
    }
1824
1825
36.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
34.1M
        end = (SRE_CHAR *)state->end;
1828
34.1M
        state->must_advance = 0;
1829
34.4M
        for (;;) {
1830
155M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
120M
                ptr++;
1832
34.4M
            if (ptr >= end)
1833
770k
                return 0;
1834
33.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
33.7M
            state->start = ptr;
1836
33.7M
            state->ptr = ptr;
1837
33.7M
            status = SRE(match)(state, pattern, 0);
1838
33.7M
            if (status != 0)
1839
33.3M
                break;
1840
345k
            ptr++;
1841
345k
            RESET_CAPTURE_GROUP();
1842
345k
        }
1843
34.1M
    } else {
1844
        /* general case */
1845
2.03M
        assert(ptr <= end);
1846
2.03M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.03M
        state->start = state->ptr = ptr;
1848
2.03M
        status = SRE(match)(state, pattern, 1);
1849
2.03M
        state->must_advance = 0;
1850
2.03M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.26M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
19
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.26M
        {
1854
1.26M
            state->start = state->ptr = ptr = end;
1855
1.26M
            return 0;
1856
1.26M
        }
1857
118M
        while (status == 0 && ptr < end) {
1858
118M
            ptr++;
1859
118M
            RESET_CAPTURE_GROUP();
1860
118M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
118M
            state->start = state->ptr = ptr;
1862
118M
            status = SRE(match)(state, pattern, 0);
1863
118M
        }
1864
762k
    }
1865
1866
34.1M
    return status;
1867
36.1M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
5.36M
{
1694
5.36M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
5.36M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
5.36M
    Py_ssize_t status = 0;
1697
5.36M
    Py_ssize_t prefix_len = 0;
1698
5.36M
    Py_ssize_t prefix_skip = 0;
1699
5.36M
    SRE_CODE* prefix = NULL;
1700
5.36M
    SRE_CODE* charset = NULL;
1701
5.36M
    SRE_CODE* overlap = NULL;
1702
5.36M
    int flags = 0;
1703
5.36M
    INIT_TRACE(state);
1704
1705
5.36M
    if (ptr > end)
1706
0
        return 0;
1707
1708
5.36M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
5.36M
        flags = pattern[2];
1713
1714
5.36M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.94k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.94k
                   end - ptr, (size_t) pattern[3]));
1717
4.94k
            return 0;
1718
4.94k
        }
1719
5.35M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.23M
            end -= pattern[3] - 1;
1723
2.23M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.23M
        }
1726
1727
5.35M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.23M
            prefix_len = pattern[5];
1731
2.23M
            prefix_skip = pattern[6];
1732
2.23M
            prefix = pattern + 7;
1733
2.23M
            overlap = prefix + prefix_len - 1;
1734
3.12M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
2.97M
            charset = pattern + 5;
1738
1739
5.35M
        pattern += 1 + pattern[1];
1740
5.35M
    }
1741
1742
5.35M
    TRACE(("prefix = %p %zd %zd\n",
1743
5.35M
           prefix, prefix_len, prefix_skip));
1744
5.35M
    TRACE(("charset = %p\n", charset));
1745
1746
5.35M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.99M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
1.99M
        end = (SRE_CHAR *)state->end;
1754
1.99M
        state->must_advance = 0;
1755
2.05M
        while (ptr < end) {
1756
25.8M
            while (*ptr != c) {
1757
23.7M
                if (++ptr >= end)
1758
4.10k
                    return 0;
1759
23.7M
            }
1760
2.05M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.05M
            state->start = ptr;
1762
2.05M
            state->ptr = ptr + prefix_skip;
1763
2.05M
            if (flags & SRE_INFO_LITERAL)
1764
1.33k
                return 1; /* we got all of it */
1765
2.05M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.05M
            if (status != 0)
1767
1.98M
                return status;
1768
64.6k
            ++ptr;
1769
64.6k
            RESET_CAPTURE_GROUP();
1770
64.6k
        }
1771
931
        return 0;
1772
1.99M
    }
1773
1774
3.36M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
246k
        Py_ssize_t i = 0;
1778
1779
246k
        end = (SRE_CHAR *)state->end;
1780
246k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
646k
        while (ptr < end) {
1788
646k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.69M
            while (*ptr++ != c) {
1790
4.04M
                if (ptr >= end)
1791
140
                    return 0;
1792
4.04M
            }
1793
646k
            if (ptr >= end)
1794
15
                return 0;
1795
1796
646k
            i = 1;
1797
646k
            state->must_advance = 0;
1798
647k
            do {
1799
647k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
523k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
523k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
523k
                    state->start = ptr - (prefix_len - 1);
1808
523k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
523k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
523k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
523k
                    if (status != 0)
1813
246k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
277k
                    if (++ptr >= end)
1816
17
                        return 0;
1817
277k
                    RESET_CAPTURE_GROUP();
1818
277k
                }
1819
401k
                i = overlap[i];
1820
401k
            } while (i != 0);
1821
646k
        }
1822
0
        return 0;
1823
246k
    }
1824
1825
3.12M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
2.97M
        end = (SRE_CHAR *)state->end;
1828
2.97M
        state->must_advance = 0;
1829
3.50M
        for (;;) {
1830
47.1M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
43.6M
                ptr++;
1832
3.50M
            if (ptr >= end)
1833
43.5k
                return 0;
1834
3.46M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.46M
            state->start = ptr;
1836
3.46M
            state->ptr = ptr;
1837
3.46M
            status = SRE(match)(state, pattern, 0);
1838
3.46M
            if (status != 0)
1839
2.93M
                break;
1840
529k
            ptr++;
1841
529k
            RESET_CAPTURE_GROUP();
1842
529k
        }
1843
2.97M
    } else {
1844
        /* general case */
1845
141k
        assert(ptr <= end);
1846
141k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
141k
        state->start = state->ptr = ptr;
1848
141k
        status = SRE(match)(state, pattern, 1);
1849
141k
        state->must_advance = 0;
1850
141k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
14.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
21
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
14.7k
        {
1854
14.7k
            state->start = state->ptr = ptr = end;
1855
14.7k
            return 0;
1856
14.7k
        }
1857
27.6M
        while (status == 0 && ptr < end) {
1858
27.5M
            ptr++;
1859
27.5M
            RESET_CAPTURE_GROUP();
1860
27.5M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
27.5M
            state->start = state->ptr = ptr;
1862
27.5M
            status = SRE(match)(state, pattern, 0);
1863
27.5M
        }
1864
126k
    }
1865
1866
3.06M
    return status;
1867
3.12M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/