Coverage Report

Created: 2026-03-08 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
99.2M
{
18
    /* check if pointer is at given position */
19
20
99.2M
    Py_ssize_t thisp, thatp;
21
22
99.2M
    switch (at) {
23
24
10.7M
    case SRE_AT_BEGINNING:
25
10.7M
    case SRE_AT_BEGINNING_STRING:
26
10.7M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
82.6M
    case SRE_AT_END:
33
82.6M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.50M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
82.6M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
5.79M
    case SRE_AT_END_STRING:
42
5.79M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
99.2M
    }
87
88
0
    return 0;
89
99.2M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
30.6M
{
18
    /* check if pointer is at given position */
19
20
30.6M
    Py_ssize_t thisp, thatp;
21
22
30.6M
    switch (at) {
23
24
9.28M
    case SRE_AT_BEGINNING:
25
9.28M
    case SRE_AT_BEGINNING_STRING:
26
9.28M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
18.8M
    case SRE_AT_END:
33
18.8M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
311k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
18.8M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.55M
    case SRE_AT_END_STRING:
42
2.55M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
30.6M
    }
87
88
0
    return 0;
89
30.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
59.2M
{
18
    /* check if pointer is at given position */
19
20
59.2M
    Py_ssize_t thisp, thatp;
21
22
59.2M
    switch (at) {
23
24
1.43M
    case SRE_AT_BEGINNING:
25
1.43M
    case SRE_AT_BEGINNING_STRING:
26
1.43M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
56.3M
    case SRE_AT_END:
33
56.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.18M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
56.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.41M
    case SRE_AT_END_STRING:
42
1.41M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
59.2M
    }
87
88
0
    return 0;
89
59.2M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
9.30M
{
18
    /* check if pointer is at given position */
19
20
9.30M
    Py_ssize_t thisp, thatp;
21
22
9.30M
    switch (at) {
23
24
17.3k
    case SRE_AT_BEGINNING:
25
17.3k
    case SRE_AT_BEGINNING_STRING:
26
17.3k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.45M
    case SRE_AT_END:
33
7.45M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6.54k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.45M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.82M
    case SRE_AT_END_STRING:
42
1.82M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
9.30M
    }
87
88
0
    return 0;
89
9.30M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.60G
{
94
    /* check if character is a member of the given set */
95
96
1.60G
    int ok = 1;
97
98
3.72G
    for (;;) {
99
3.72G
        switch (*set++) {
100
101
1.10G
        case SRE_OP_FAILURE:
102
1.10G
            return !ok;
103
104
1.33G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.33G
            if (ch == set[0])
107
7.78M
                return ok;
108
1.32G
            set++;
109
1.32G
            break;
110
111
100M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
100M
            if (sre_category(set[0], (int) ch))
114
31.8M
                return ok;
115
68.4M
            set++;
116
68.4M
            break;
117
118
488M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
488M
            if (ch < 256 &&
121
466M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
179M
                return ok;
123
309M
            set += 256/SRE_CODE_BITS;
124
309M
            break;
125
126
414M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
414M
            if (set[0] <= ch && ch <= set[1])
129
273M
                return ok;
130
140M
            set += 2;
131
140M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
276M
        case SRE_OP_NEGATE:
148
276M
            ok = !ok;
149
276M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.72G
        }
175
3.72G
    }
176
1.60G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
514M
{
94
    /* check if character is a member of the given set */
95
96
514M
    int ok = 1;
97
98
1.11G
    for (;;) {
99
1.11G
        switch (*set++) {
100
101
310M
        case SRE_OP_FAILURE:
102
310M
            return !ok;
103
104
374M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
374M
            if (ch == set[0])
107
4.95M
                return ok;
108
369M
            set++;
109
369M
            break;
110
111
33.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
33.2M
            if (sre_category(set[0], (int) ch))
114
15.4M
                return ok;
115
17.7M
            set++;
116
17.7M
            break;
117
118
117M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
117M
            if (ch < 256 &&
121
117M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
47.8M
                return ok;
123
69.2M
            set += 256/SRE_CODE_BITS;
124
69.2M
            break;
125
126
211M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
211M
            if (set[0] <= ch && ch <= set[1])
129
135M
                return ok;
130
76.2M
            set += 2;
131
76.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
63.1M
        case SRE_OP_NEGATE:
148
63.1M
            ok = !ok;
149
63.1M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.11G
        }
175
1.11G
    }
176
514M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
755M
{
94
    /* check if character is a member of the given set */
95
96
755M
    int ok = 1;
97
98
1.82G
    for (;;) {
99
1.82G
        switch (*set++) {
100
101
563M
        case SRE_OP_FAILURE:
102
563M
            return !ok;
103
104
737M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
737M
            if (ch == set[0])
107
1.45M
                return ok;
108
735M
            set++;
109
735M
            break;
110
111
58.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
58.9M
            if (sre_category(set[0], (int) ch))
114
13.6M
                return ok;
115
45.3M
            set++;
116
45.3M
            break;
117
118
183M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
183M
            if (ch < 256 &&
121
172M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
60.2M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
171M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
171M
            if (set[0] <= ch && ch <= set[1])
129
115M
                return ok;
130
55.2M
            set += 2;
131
55.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
112M
        case SRE_OP_NEGATE:
148
112M
            ok = !ok;
149
112M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.82G
        }
175
1.82G
    }
176
755M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
333M
{
94
    /* check if character is a member of the given set */
95
96
333M
    int ok = 1;
97
98
784M
    for (;;) {
99
784M
        switch (*set++) {
100
101
235M
        case SRE_OP_FAILURE:
102
235M
            return !ok;
103
104
219M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
219M
            if (ch == set[0])
107
1.37M
                return ok;
108
217M
            set++;
109
217M
            break;
110
111
8.10M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
8.10M
            if (sre_category(set[0], (int) ch))
114
2.73M
                return ok;
115
5.37M
            set++;
116
5.37M
            break;
117
118
187M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
187M
            if (ch < 256 &&
121
176M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
71.0M
                return ok;
123
116M
            set += 256/SRE_CODE_BITS;
124
116M
            break;
125
126
31.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
31.6M
            if (set[0] <= ch && ch <= set[1])
129
22.3M
                return ok;
130
9.28M
            set += 2;
131
9.28M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
101M
        case SRE_OP_NEGATE:
148
101M
            ok = !ok;
149
101M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
784M
        }
175
784M
    }
176
333M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
674M
{
195
674M
    SRE_CODE chr;
196
674M
    SRE_CHAR c;
197
674M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
674M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
674M
    Py_ssize_t i;
200
674M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
674M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
109M
        end = ptr + maxcount;
205
206
674M
    switch (pattern[0]) {
207
208
516M
    case SRE_OP_IN:
209
        /* repeated set */
210
516M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
898M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
382M
            ptr++;
213
516M
        break;
214
215
62.4M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
62.4M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
158M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
95.8M
            ptr++;
220
62.4M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
94.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
94.7M
        chr = pattern[1];
232
94.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
94.7M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
82.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
82.8M
        else
238
82.8M
#endif
239
98.8M
        while (ptr < end && *ptr == c)
240
4.11M
            ptr++;
241
94.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.31M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.31M
        chr = pattern[1];
270
1.31M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.31M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
806k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
806k
        else
276
806k
#endif
277
51.5M
        while (ptr < end && *ptr != c)
278
50.2M
            ptr++;
279
1.31M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
674M
    }
319
320
674M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
674M
           ptr - (SRE_CHAR*) state->ptr));
322
674M
    return ptr - (SRE_CHAR*) state->ptr;
323
674M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
262M
{
195
262M
    SRE_CODE chr;
196
262M
    SRE_CHAR c;
197
262M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
262M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
262M
    Py_ssize_t i;
200
262M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
262M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
30.9M
        end = ptr + maxcount;
205
206
262M
    switch (pattern[0]) {
207
208
182M
    case SRE_OP_IN:
209
        /* repeated set */
210
182M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
323M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
140M
            ptr++;
213
182M
        break;
214
215
12.5M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.5M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
32.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.6M
            ptr++;
220
12.5M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
66.1M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
66.1M
        chr = pattern[1];
232
66.1M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
66.1M
        c = (SRE_CHAR) chr;
234
66.1M
#if SIZEOF_SRE_CHAR < 4
235
66.1M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
66.1M
        else
238
66.1M
#endif
239
67.7M
        while (ptr < end && *ptr == c)
240
1.61M
            ptr++;
241
66.1M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
611k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
611k
        chr = pattern[1];
270
611k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
611k
        c = (SRE_CHAR) chr;
272
611k
#if SIZEOF_SRE_CHAR < 4
273
611k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
611k
        else
276
611k
#endif
277
10.3M
        while (ptr < end && *ptr != c)
278
9.76M
            ptr++;
279
611k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
262M
    }
319
320
262M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
262M
           ptr - (SRE_CHAR*) state->ptr));
322
262M
    return ptr - (SRE_CHAR*) state->ptr;
323
262M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
314M
{
195
314M
    SRE_CODE chr;
196
314M
    SRE_CHAR c;
197
314M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
314M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
314M
    Py_ssize_t i;
200
314M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
314M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
59.4M
        end = ptr + maxcount;
205
206
314M
    switch (pattern[0]) {
207
208
252M
    case SRE_OP_IN:
209
        /* repeated set */
210
252M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
386M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
134M
            ptr++;
213
252M
        break;
214
215
44.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
44.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
101M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
56.6M
            ptr++;
220
44.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
16.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
16.6M
        chr = pattern[1];
232
16.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
16.6M
        c = (SRE_CHAR) chr;
234
16.6M
#if SIZEOF_SRE_CHAR < 4
235
16.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
16.6M
        else
238
16.6M
#endif
239
18.3M
        while (ptr < end && *ptr == c)
240
1.71M
            ptr++;
241
16.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
194k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
194k
        chr = pattern[1];
270
194k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
194k
        c = (SRE_CHAR) chr;
272
194k
#if SIZEOF_SRE_CHAR < 4
273
194k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
194k
        else
276
194k
#endif
277
14.4M
        while (ptr < end && *ptr != c)
278
14.2M
            ptr++;
279
194k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
314M
    }
319
320
314M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
314M
           ptr - (SRE_CHAR*) state->ptr));
322
314M
    return ptr - (SRE_CHAR*) state->ptr;
323
314M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
98.4M
{
195
98.4M
    SRE_CODE chr;
196
98.4M
    SRE_CHAR c;
197
98.4M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
98.4M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
98.4M
    Py_ssize_t i;
200
98.4M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
98.4M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
18.9M
        end = ptr + maxcount;
205
206
98.4M
    switch (pattern[0]) {
207
208
80.9M
    case SRE_OP_IN:
209
        /* repeated set */
210
80.9M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
188M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
107M
            ptr++;
213
80.9M
        break;
214
215
5.08M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
5.08M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.5M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.5M
            ptr++;
220
5.08M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
11.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
11.9M
        chr = pattern[1];
232
11.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
11.9M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
12.6M
        while (ptr < end && *ptr == c)
240
778k
            ptr++;
241
11.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
505k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
505k
        chr = pattern[1];
270
505k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
505k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
26.7M
        while (ptr < end && *ptr != c)
278
26.2M
            ptr++;
279
505k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
98.4M
    }
319
320
98.4M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
98.4M
           ptr - (SRE_CHAR*) state->ptr));
322
98.4M
    return ptr - (SRE_CHAR*) state->ptr;
323
98.4M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
555M
    do { \
355
555M
        ctx->lastmark = state->lastmark; \
356
555M
        ctx->lastindex = state->lastindex; \
357
555M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
360M
    do { \
360
360M
        state->lastmark = ctx->lastmark; \
361
360M
        state->lastindex = ctx->lastindex; \
362
360M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
196M
    do { \
366
196M
        TRACE(("push last_ptr: %zd", \
367
196M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
196M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
196M
    } while (0)
370
#define LAST_PTR_POP()  \
371
196M
    do { \
372
196M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
196M
        TRACE(("pop last_ptr: %zd", \
374
196M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
196M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
911M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
565M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.17G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
112M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
22.8M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.47G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.47G
do { \
390
1.47G
    alloc_pos = state->data_stack_base; \
391
1.47G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.47G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.47G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
169M
        int j = data_stack_grow(state, sizeof(type)); \
395
169M
        if (j < 0) return j; \
396
169M
        if (ctx_pos != -1) \
397
169M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
169M
    } \
399
1.47G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.47G
    state->data_stack_base += sizeof(type); \
401
1.47G
} while (0)
402
403
1.60G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.60G
do { \
405
1.60G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.60G
    ptr = (type*)(state->data_stack+pos); \
407
1.60G
} while (0)
408
409
480M
#define DATA_STACK_PUSH(state, data, size) \
410
480M
do { \
411
480M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
480M
           data, state->data_stack_base, size)); \
413
480M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
85.6k
        int j = data_stack_grow(state, size); \
415
85.6k
        if (j < 0) return j; \
416
85.6k
        if (ctx_pos != -1) \
417
85.6k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
85.6k
    } \
419
480M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
480M
    state->data_stack_base += size; \
421
480M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
304M
#define DATA_STACK_POP(state, data, size, discard) \
427
304M
do { \
428
304M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
304M
           data, state->data_stack_base-size, size)); \
430
304M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
304M
    if (discard) \
432
304M
        state->data_stack_base -= size; \
433
304M
} while (0)
434
435
1.65G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.65G
do { \
437
1.65G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.65G
           state->data_stack_base-size, size)); \
439
1.65G
    state->data_stack_base -= size; \
440
1.65G
} while(0)
441
442
#define DATA_PUSH(x) \
443
196M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
196M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.47G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.47G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.60G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
384M
    do if (lastmark >= 0) { \
473
284M
        MARK_TRACE("push", (lastmark)); \
474
284M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
284M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
384M
    } while (0)
477
#define MARK_POP(lastmark) \
478
121M
    do if (lastmark >= 0) { \
479
107M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
107M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
107M
        MARK_TRACE("pop", (lastmark)); \
482
121M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.49M
    do if (lastmark >= 0) { \
485
971k
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
971k
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
971k
        MARK_TRACE("pop keep", (lastmark)); \
488
1.49M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
263M
    do if (lastmark >= 0) { \
491
176M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
176M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
176M
        MARK_TRACE("pop discard", (lastmark)); \
494
263M
    } while (0)
495
496
542M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
196M
#define JUMP_MAX_UNTIL_2     2
499
112M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
112M
#define JUMP_REPEAT          7
504
13.0M
#define JUMP_REPEAT_ONE_1    8
505
229M
#define JUMP_REPEAT_ONE_2    9
506
63.5M
#define JUMP_MIN_REPEAT_ONE  10
507
158M
#define JUMP_BRANCH          11
508
22.8M
#define JUMP_ASSERT          12
509
24.5M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
934M
    ctx->pattern = pattern; \
516
934M
    ctx->ptr = ptr; \
517
934M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
934M
    nextctx->pattern = nextpattern; \
519
934M
    nextctx->toplevel = toplevel_; \
520
934M
    nextctx->jump = jumpvalue; \
521
934M
    nextctx->last_ctx_pos = ctx_pos; \
522
934M
    pattern = nextpattern; \
523
934M
    ctx_pos = alloc_pos; \
524
934M
    ctx = nextctx; \
525
934M
    goto entrance; \
526
934M
    jumplabel: \
527
934M
    pattern = ctx->pattern; \
528
934M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
886M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
47.4M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.53G
    do {                                                           \
553
2.53G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.53G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.53G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.61G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.53G
        do {                               \
588
2.53G
            MAYBE_CHECK_SIGNALS;           \
589
2.53G
            goto *sre_targets[*pattern++]; \
590
2.53G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
542M
{
601
542M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
542M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
542M
    Py_ssize_t ret = 0;
604
542M
    int jump;
605
542M
    unsigned int sigcount = state->sigcount;
606
607
542M
    SRE(match_context)* ctx;
608
542M
    SRE(match_context)* nextctx;
609
542M
    INIT_TRACE(state);
610
611
542M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
542M
    DATA_ALLOC(SRE(match_context), ctx);
614
542M
    ctx->last_ctx_pos = -1;
615
542M
    ctx->jump = JUMP_NONE;
616
542M
    ctx->toplevel = toplevel;
617
542M
    ctx_pos = alloc_pos;
618
619
542M
#if USE_COMPUTED_GOTOS
620
542M
#include "sre_targets.h"
621
542M
#endif
622
623
1.47G
entrance:
624
625
1.47G
    ;  // Fashion statement.
626
1.47G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.47G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
61.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.85M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.85M
                   end - ptr, (size_t) pattern[3]));
634
3.85M
            RETURN_FAILURE;
635
3.85M
        }
636
57.7M
        pattern += pattern[1] + 1;
637
57.7M
    }
638
639
1.47G
#if USE_COMPUTED_GOTOS
640
1.47G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.47G
    {
647
648
1.47G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
636M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
636M
                   ptr, pattern[0]));
653
636M
            {
654
636M
                int i = pattern[0];
655
636M
                if (i & 1)
656
128M
                    state->lastindex = i/2 + 1;
657
636M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
630M
                    int j = state->lastmark + 1;
663
645M
                    while (j < i)
664
14.5M
                        state->mark[j++] = NULL;
665
630M
                    state->lastmark = i;
666
630M
                }
667
636M
                state->mark[i] = ptr;
668
636M
            }
669
636M
            pattern++;
670
636M
            DISPATCH;
671
672
636M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
154M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
154M
                   ptr, *pattern));
677
154M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
68.0M
                RETURN_FAILURE;
679
86.1M
            pattern++;
680
86.1M
            ptr++;
681
86.1M
            DISPATCH;
682
683
86.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
165M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
165M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
165M
            if (ctx->toplevel &&
698
44.2M
                ((state->match_all && ptr != state->end) ||
699
44.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
165M
            state->ptr = ptr;
704
165M
            RETURN_SUCCESS;
705
706
99.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
99.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
99.2M
            if (!SRE(at)(state, ptr, *pattern))
711
81.5M
                RETURN_FAILURE;
712
17.6M
            pattern++;
713
17.6M
            DISPATCH;
714
715
17.6M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
288M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
288M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
288M
            if (ptr >= end ||
749
287M
                !SRE(charset)(state, pattern + 1, *ptr))
750
94.3M
                RETURN_FAILURE;
751
194M
            pattern += pattern[0];
752
194M
            ptr++;
753
194M
            DISPATCH;
754
755
194M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.84M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.84M
                   pattern, ptr, pattern[0]));
758
5.84M
            if (ptr >= end ||
759
5.84M
                sre_lower_ascii(*ptr) != *pattern)
760
56.9k
                RETURN_FAILURE;
761
5.78M
            pattern++;
762
5.78M
            ptr++;
763
5.78M
            DISPATCH;
764
765
5.78M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
79.6M
        TARGET(SRE_OP_JUMP):
845
79.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
79.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
79.6M
                   ptr, pattern[0]));
850
79.6M
            pattern += pattern[0];
851
79.6M
            DISPATCH;
852
853
114M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
114M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
114M
            LASTMARK_SAVE();
858
114M
            if (state->repeat)
859
55.2M
                MARK_PUSH(ctx->lastmark);
860
282M
            for (; pattern[0]; pattern += pattern[0]) {
861
244M
                if (pattern[1] == SRE_OP_LITERAL &&
862
124M
                    (ptr >= end ||
863
123M
                     (SRE_CODE) *ptr != pattern[2]))
864
60.8M
                    continue;
865
184M
                if (pattern[1] == SRE_OP_IN &&
866
47.9M
                    (ptr >= end ||
867
47.7M
                     !SRE(charset)(state, pattern + 3,
868
47.7M
                                   (SRE_CODE) *ptr)))
869
25.4M
                    continue;
870
158M
                state->ptr = ptr;
871
158M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
158M
                if (ret) {
873
76.9M
                    if (state->repeat)
874
47.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
76.9M
                    RETURN_ON_ERROR(ret);
876
76.9M
                    RETURN_SUCCESS;
877
76.9M
                }
878
81.6M
                if (state->repeat)
879
12.5k
                    MARK_POP_KEEP(ctx->lastmark);
880
81.6M
                LASTMARK_RESTORE();
881
81.6M
            }
882
37.6M
            if (state->repeat)
883
7.99M
                MARK_POP_DISCARD(ctx->lastmark);
884
37.6M
            RETURN_FAILURE;
885
886
616M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
616M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
616M
                   pattern[1], pattern[2]));
898
899
616M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.17M
                RETURN_FAILURE; /* cannot match */
901
902
614M
            state->ptr = ptr;
903
904
614M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
614M
            RETURN_ON_ERROR(ret);
906
614M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
614M
            ctx->count = ret;
908
614M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
614M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
398M
                RETURN_FAILURE;
917
918
216M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.72M
                ptr == state->end &&
920
93.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
93.8k
            {
922
                /* tail is empty.  we're finished */
923
93.8k
                state->ptr = ptr;
924
93.8k
                RETURN_SUCCESS;
925
93.8k
            }
926
927
215M
            LASTMARK_SAVE();
928
215M
            if (state->repeat)
929
108M
                MARK_PUSH(ctx->lastmark);
930
931
215M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
21.9M
                ctx->u.chr = pattern[pattern[0]+1];
935
21.9M
                for (;;) {
936
57.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
48.1M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
35.1M
                        ptr--;
939
35.1M
                        ctx->count--;
940
35.1M
                    }
941
21.9M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
8.96M
                        break;
943
13.0M
                    state->ptr = ptr;
944
13.0M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
13.0M
                            pattern+pattern[0]);
946
13.0M
                    if (ret) {
947
12.9M
                        if (state->repeat)
948
11.5M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.9M
                        RETURN_ON_ERROR(ret);
950
12.9M
                        RETURN_SUCCESS;
951
12.9M
                    }
952
741
                    if (state->repeat)
953
725
                        MARK_POP_KEEP(ctx->lastmark);
954
741
                    LASTMARK_RESTORE();
955
956
741
                    ptr--;
957
741
                    ctx->count--;
958
741
                }
959
8.96M
                if (state->repeat)
960
7.57M
                    MARK_POP_DISCARD(ctx->lastmark);
961
193M
            } else {
962
                /* general case */
963
291M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
229M
                    state->ptr = ptr;
965
229M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
229M
                            pattern+pattern[0]);
967
229M
                    if (ret) {
968
132M
                        if (state->repeat)
969
88.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
132M
                        RETURN_ON_ERROR(ret);
971
132M
                        RETURN_SUCCESS;
972
132M
                    }
973
97.6M
                    if (state->repeat)
974
1.48M
                        MARK_POP_KEEP(ctx->lastmark);
975
97.6M
                    LASTMARK_RESTORE();
976
977
97.6M
                    ptr--;
978
97.6M
                    ctx->count--;
979
97.6M
                }
980
61.6M
                if (state->repeat)
981
1.24M
                    MARK_POP_DISCARD(ctx->lastmark);
982
61.6M
            }
983
70.5M
            RETURN_FAILURE;
984
985
3.72M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.72M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.72M
                   pattern[1], pattern[2]));
997
998
3.72M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.72M
            state->ptr = ptr;
1002
1003
3.72M
            if (pattern[1] == 0)
1004
3.72M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.72M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.72M
            } else {
1028
                /* general case */
1029
3.72M
                LASTMARK_SAVE();
1030
3.72M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
63.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
63.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
63.5M
                    state->ptr = ptr;
1036
63.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
63.5M
                            pattern+pattern[0]);
1038
63.5M
                    if (ret) {
1039
3.72M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.72M
                        RETURN_ON_ERROR(ret);
1042
3.72M
                        RETURN_SUCCESS;
1043
3.72M
                    }
1044
59.8M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
59.8M
                    LASTMARK_RESTORE();
1047
1048
59.8M
                    state->ptr = ptr;
1049
59.8M
                    ret = SRE(count)(state, pattern+3, 1);
1050
59.8M
                    RETURN_ON_ERROR(ret);
1051
59.8M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
59.8M
                    if (ret == 0)
1053
16
                        break;
1054
59.8M
                    assert(ret == 1);
1055
59.8M
                    ptr++;
1056
59.8M
                    ctx->count++;
1057
59.8M
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
112M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
112M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
112M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
112M
            ctx->u.rep = repeat_pool_malloc(state);
1127
112M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
112M
            ctx->u.rep->count = -1;
1131
112M
            ctx->u.rep->pattern = pattern;
1132
112M
            ctx->u.rep->prev = state->repeat;
1133
112M
            ctx->u.rep->last_ptr = NULL;
1134
112M
            state->repeat = ctx->u.rep;
1135
1136
112M
            state->ptr = ptr;
1137
112M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
112M
            state->repeat = ctx->u.rep->prev;
1139
112M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
112M
            if (ret) {
1142
37.0M
                RETURN_ON_ERROR(ret);
1143
37.0M
                RETURN_SUCCESS;
1144
37.0M
            }
1145
75.4M
            RETURN_FAILURE;
1146
1147
212M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
212M
            ctx->u.rep = state->repeat;
1155
212M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
212M
            state->ptr = ptr;
1159
1160
212M
            ctx->count = ctx->u.rep->count+1;
1161
1162
212M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
212M
                   ptr, ctx->count));
1164
1165
212M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
212M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.8M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
196M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
196M
                ctx->u.rep->count = ctx->count;
1185
196M
                LASTMARK_SAVE();
1186
196M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
196M
                LAST_PTR_PUSH();
1189
196M
                ctx->u.rep->last_ptr = state->ptr;
1190
196M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
196M
                        ctx->u.rep->pattern+3);
1192
196M
                LAST_PTR_POP();
1193
196M
                if (ret) {
1194
99.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
99.4M
                    RETURN_ON_ERROR(ret);
1196
99.4M
                    RETURN_SUCCESS;
1197
99.4M
                }
1198
96.9M
                MARK_POP(ctx->lastmark);
1199
96.9M
                LASTMARK_RESTORE();
1200
96.9M
                ctx->u.rep->count = ctx->count-1;
1201
96.9M
                state->ptr = ptr;
1202
96.9M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
112M
            state->repeat = ctx->u.rep->prev;
1207
112M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
112M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
112M
            RETURN_ON_SUCCESS(ret);
1211
75.7M
            state->ptr = ptr;
1212
75.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
22.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
22.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
22.8M
                   ptr, pattern[1]));
1565
22.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
22.8M
            state->ptr = ptr - pattern[1];
1568
22.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
22.8M
            RETURN_ON_FAILURE(ret);
1570
18.4M
            pattern += pattern[0];
1571
18.4M
            DISPATCH;
1572
1573
24.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
24.5M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
24.5M
                   ptr, pattern[1]));
1578
24.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
24.5M
                state->ptr = ptr - pattern[1];
1580
24.5M
                LASTMARK_SAVE();
1581
24.5M
                if (state->repeat)
1582
24.5M
                    MARK_PUSH(ctx->lastmark);
1583
1584
49.1M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
49.1M
                if (ret) {
1586
7.93k
                    if (state->repeat)
1587
7.93k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
7.93k
                    RETURN_ON_ERROR(ret);
1589
7.93k
                    RETURN_FAILURE;
1590
7.93k
                }
1591
24.5M
                if (state->repeat)
1592
24.5M
                    MARK_POP(ctx->lastmark);
1593
24.5M
                LASTMARK_RESTORE();
1594
24.5M
            }
1595
24.5M
            pattern += pattern[0];
1596
24.5M
            DISPATCH;
1597
1598
24.5M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.47G
exit:
1620
1.47G
    ctx_pos = ctx->last_ctx_pos;
1621
1.47G
    jump = ctx->jump;
1622
1.47G
    DATA_POP_DISCARD(ctx);
1623
1.47G
    if (ctx_pos == -1) {
1624
542M
        state->sigcount = sigcount;
1625
542M
        return ret;
1626
542M
    }
1627
934M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
934M
    switch (jump) {
1630
196M
        case JUMP_MAX_UNTIL_2:
1631
196M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
196M
            goto jump_max_until_2;
1633
112M
        case JUMP_MAX_UNTIL_3:
1634
112M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
112M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
158M
        case JUMP_BRANCH:
1643
158M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
158M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
112M
        case JUMP_REPEAT:
1658
112M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
112M
            goto jump_repeat;
1660
13.0M
        case JUMP_REPEAT_ONE_1:
1661
13.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
13.0M
            goto jump_repeat_one_1;
1663
229M
        case JUMP_REPEAT_ONE_2:
1664
229M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
229M
            goto jump_repeat_one_2;
1666
63.5M
        case JUMP_MIN_REPEAT_ONE:
1667
63.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
63.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
22.8M
        case JUMP_ASSERT:
1673
22.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
22.8M
            goto jump_assert;
1675
24.5M
        case JUMP_ASSERT_NOT:
1676
24.5M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
24.5M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
934M
    }
1683
1684
0
    return ret; /* should never get here */
1685
934M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
240M
{
601
240M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
240M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
240M
    Py_ssize_t ret = 0;
604
240M
    int jump;
605
240M
    unsigned int sigcount = state->sigcount;
606
607
240M
    SRE(match_context)* ctx;
608
240M
    SRE(match_context)* nextctx;
609
240M
    INIT_TRACE(state);
610
611
240M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
240M
    DATA_ALLOC(SRE(match_context), ctx);
614
240M
    ctx->last_ctx_pos = -1;
615
240M
    ctx->jump = JUMP_NONE;
616
240M
    ctx->toplevel = toplevel;
617
240M
    ctx_pos = alloc_pos;
618
619
240M
#if USE_COMPUTED_GOTOS
620
240M
#include "sre_targets.h"
621
240M
#endif
622
623
549M
entrance:
624
625
549M
    ;  // Fashion statement.
626
549M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
549M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
40.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.74M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.74M
                   end - ptr, (size_t) pattern[3]));
634
3.74M
            RETURN_FAILURE;
635
3.74M
        }
636
36.9M
        pattern += pattern[1] + 1;
637
36.9M
    }
638
639
545M
#if USE_COMPUTED_GOTOS
640
545M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
545M
    {
647
648
545M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
244M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
244M
                   ptr, pattern[0]));
653
244M
            {
654
244M
                int i = pattern[0];
655
244M
                if (i & 1)
656
43.3M
                    state->lastindex = i/2 + 1;
657
244M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
240M
                    int j = state->lastmark + 1;
663
250M
                    while (j < i)
664
9.14M
                        state->mark[j++] = NULL;
665
240M
                    state->lastmark = i;
666
240M
                }
667
244M
                state->mark[i] = ptr;
668
244M
            }
669
244M
            pattern++;
670
244M
            DISPATCH;
671
672
244M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
93.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
93.0M
                   ptr, *pattern));
677
93.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
37.4M
                RETURN_FAILURE;
679
55.6M
            pattern++;
680
55.6M
            ptr++;
681
55.6M
            DISPATCH;
682
683
55.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
73.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
73.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
73.7M
            if (ctx->toplevel &&
698
28.9M
                ((state->match_all && ptr != state->end) ||
699
28.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
73.7M
            state->ptr = ptr;
704
73.7M
            RETURN_SUCCESS;
705
706
30.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
30.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
30.6M
            if (!SRE(at)(state, ptr, *pattern))
711
15.7M
                RETURN_FAILURE;
712
14.9M
            pattern++;
713
14.9M
            DISPATCH;
714
715
14.9M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
85.3M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
85.3M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
85.3M
            if (ptr >= end ||
749
85.0M
                !SRE(charset)(state, pattern + 1, *ptr))
750
16.7M
                RETURN_FAILURE;
751
68.5M
            pattern += pattern[0];
752
68.5M
            ptr++;
753
68.5M
            DISPATCH;
754
755
68.5M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
476k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
476k
                   pattern, ptr, pattern[0]));
758
476k
            if (ptr >= end ||
759
476k
                sre_lower_ascii(*ptr) != *pattern)
760
18.8k
                RETURN_FAILURE;
761
458k
            pattern++;
762
458k
            ptr++;
763
458k
            DISPATCH;
764
765
458k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
36.7M
        TARGET(SRE_OP_JUMP):
845
36.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
36.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
36.7M
                   ptr, pattern[0]));
850
36.7M
            pattern += pattern[0];
851
36.7M
            DISPATCH;
852
853
61.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
61.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
61.6M
            LASTMARK_SAVE();
858
61.6M
            if (state->repeat)
859
11.6M
                MARK_PUSH(ctx->lastmark);
860
170M
            for (; pattern[0]; pattern += pattern[0]) {
861
144M
                if (pattern[1] == SRE_OP_LITERAL &&
862
76.2M
                    (ptr >= end ||
863
76.0M
                     (SRE_CODE) *ptr != pattern[2]))
864
29.3M
                    continue;
865
114M
                if (pattern[1] == SRE_OP_IN &&
866
11.6M
                    (ptr >= end ||
867
11.4M
                     !SRE(charset)(state, pattern + 3,
868
11.4M
                                   (SRE_CODE) *ptr)))
869
5.83M
                    continue;
870
108M
                state->ptr = ptr;
871
108M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
108M
                if (ret) {
873
35.2M
                    if (state->repeat)
874
11.0M
                        MARK_POP_DISCARD(ctx->lastmark);
875
35.2M
                    RETURN_ON_ERROR(ret);
876
35.2M
                    RETURN_SUCCESS;
877
35.2M
                }
878
73.6M
                if (state->repeat)
879
5.09k
                    MARK_POP_KEEP(ctx->lastmark);
880
73.6M
                LASTMARK_RESTORE();
881
73.6M
            }
882
26.3M
            if (state->repeat)
883
676k
                MARK_POP_DISCARD(ctx->lastmark);
884
26.3M
            RETURN_FAILURE;
885
886
252M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
252M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
252M
                   pattern[1], pattern[2]));
898
899
252M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.01M
                RETURN_FAILURE; /* cannot match */
901
902
251M
            state->ptr = ptr;
903
904
251M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
251M
            RETURN_ON_ERROR(ret);
906
251M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
251M
            ctx->count = ret;
908
251M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
251M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
181M
                RETURN_FAILURE;
917
918
69.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
563k
                ptr == state->end &&
920
71.6k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
71.6k
            {
922
                /* tail is empty.  we're finished */
923
71.6k
                state->ptr = ptr;
924
71.6k
                RETURN_SUCCESS;
925
71.6k
            }
926
927
69.9M
            LASTMARK_SAVE();
928
69.9M
            if (state->repeat)
929
41.7M
                MARK_PUSH(ctx->lastmark);
930
931
69.9M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.73M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.73M
                for (;;) {
936
20.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
18.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
13.4M
                        ptr--;
939
13.4M
                        ctx->count--;
940
13.4M
                    }
941
6.73M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.18M
                        break;
943
4.55M
                    state->ptr = ptr;
944
4.55M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.55M
                            pattern+pattern[0]);
946
4.55M
                    if (ret) {
947
4.55M
                        if (state->repeat)
948
3.24M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.55M
                        RETURN_ON_ERROR(ret);
950
4.55M
                        RETURN_SUCCESS;
951
4.55M
                    }
952
215
                    if (state->repeat)
953
199
                        MARK_POP_KEEP(ctx->lastmark);
954
215
                    LASTMARK_RESTORE();
955
956
215
                    ptr--;
957
215
                    ctx->count--;
958
215
                }
959
2.18M
                if (state->repeat)
960
807k
                    MARK_POP_DISCARD(ctx->lastmark);
961
63.1M
            } else {
962
                /* general case */
963
84.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
72.3M
                    state->ptr = ptr;
965
72.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
72.3M
                            pattern+pattern[0]);
967
72.3M
                    if (ret) {
968
51.5M
                        if (state->repeat)
969
36.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.5M
                        RETURN_ON_ERROR(ret);
971
51.5M
                        RETURN_SUCCESS;
972
51.5M
                    }
973
20.8M
                    if (state->repeat)
974
1.18M
                        MARK_POP_KEEP(ctx->lastmark);
975
20.8M
                    LASTMARK_RESTORE();
976
977
20.8M
                    ptr--;
978
20.8M
                    ctx->count--;
979
20.8M
                }
980
11.6M
                if (state->repeat)
981
1.05M
                    MARK_POP_DISCARD(ctx->lastmark);
982
11.6M
            }
983
13.8M
            RETURN_FAILURE;
984
985
2.44M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
2.44M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
2.44M
                   pattern[1], pattern[2]));
997
998
2.44M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
2.44M
            state->ptr = ptr;
1002
1003
2.44M
            if (pattern[1] == 0)
1004
2.44M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
2.44M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
2.44M
            } else {
1028
                /* general case */
1029
2.44M
                LASTMARK_SAVE();
1030
2.44M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
12.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
12.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
12.5M
                    state->ptr = ptr;
1036
12.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
12.5M
                            pattern+pattern[0]);
1038
12.5M
                    if (ret) {
1039
2.44M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
2.44M
                        RETURN_ON_ERROR(ret);
1042
2.44M
                        RETURN_SUCCESS;
1043
2.44M
                    }
1044
10.1M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
10.1M
                    LASTMARK_RESTORE();
1047
1048
10.1M
                    state->ptr = ptr;
1049
10.1M
                    ret = SRE(count)(state, pattern+3, 1);
1050
10.1M
                    RETURN_ON_ERROR(ret);
1051
10.1M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
10.1M
                    if (ret == 0)
1053
16
                        break;
1054
10.1M
                    assert(ret == 1);
1055
10.1M
                    ptr++;
1056
10.1M
                    ctx->count++;
1057
10.1M
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
24.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
24.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
24.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
24.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
24.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
24.6M
            ctx->u.rep->count = -1;
1131
24.6M
            ctx->u.rep->pattern = pattern;
1132
24.6M
            ctx->u.rep->prev = state->repeat;
1133
24.6M
            ctx->u.rep->last_ptr = NULL;
1134
24.6M
            state->repeat = ctx->u.rep;
1135
1136
24.6M
            state->ptr = ptr;
1137
24.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
24.6M
            state->repeat = ctx->u.rep->prev;
1139
24.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
24.6M
            if (ret) {
1142
11.6M
                RETURN_ON_ERROR(ret);
1143
11.6M
                RETURN_SUCCESS;
1144
11.6M
            }
1145
13.0M
            RETURN_FAILURE;
1146
1147
60.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
60.1M
            ctx->u.rep = state->repeat;
1155
60.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
60.1M
            state->ptr = ptr;
1159
1160
60.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
60.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
60.1M
                   ptr, ctx->count));
1164
1165
60.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
60.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.00M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
52.1M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
52.1M
                ctx->u.rep->count = ctx->count;
1185
52.1M
                LASTMARK_SAVE();
1186
52.1M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
52.1M
                LAST_PTR_PUSH();
1189
52.1M
                ctx->u.rep->last_ptr = state->ptr;
1190
52.1M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
52.1M
                        ctx->u.rep->pattern+3);
1192
52.1M
                LAST_PTR_POP();
1193
52.1M
                if (ret) {
1194
35.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.3M
                    RETURN_ON_ERROR(ret);
1196
35.3M
                    RETURN_SUCCESS;
1197
35.3M
                }
1198
16.8M
                MARK_POP(ctx->lastmark);
1199
16.8M
                LASTMARK_RESTORE();
1200
16.8M
                ctx->u.rep->count = ctx->count-1;
1201
16.8M
                state->ptr = ptr;
1202
16.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
24.8M
            state->repeat = ctx->u.rep->prev;
1207
24.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
24.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
24.8M
            RETURN_ON_SUCCESS(ret);
1211
13.1M
            state->ptr = ptr;
1212
13.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.57M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.57M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.57M
                   ptr, pattern[1]));
1565
3.57M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.57M
            state->ptr = ptr - pattern[1];
1568
3.57M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.57M
            RETURN_ON_FAILURE(ret);
1570
3.45M
            pattern += pattern[0];
1571
3.45M
            DISPATCH;
1572
1573
5.47M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.47M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.47M
                   ptr, pattern[1]));
1578
5.47M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.47M
                state->ptr = ptr - pattern[1];
1580
5.47M
                LASTMARK_SAVE();
1581
5.47M
                if (state->repeat)
1582
5.47M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.9M
                if (ret) {
1586
1.03k
                    if (state->repeat)
1587
1.03k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.03k
                    RETURN_ON_ERROR(ret);
1589
1.03k
                    RETURN_FAILURE;
1590
1.03k
                }
1591
5.47M
                if (state->repeat)
1592
5.47M
                    MARK_POP(ctx->lastmark);
1593
5.47M
                LASTMARK_RESTORE();
1594
5.47M
            }
1595
5.47M
            pattern += pattern[0];
1596
5.47M
            DISPATCH;
1597
1598
5.47M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
549M
exit:
1620
549M
    ctx_pos = ctx->last_ctx_pos;
1621
549M
    jump = ctx->jump;
1622
549M
    DATA_POP_DISCARD(ctx);
1623
549M
    if (ctx_pos == -1) {
1624
240M
        state->sigcount = sigcount;
1625
240M
        return ret;
1626
240M
    }
1627
309M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
309M
    switch (jump) {
1630
52.1M
        case JUMP_MAX_UNTIL_2:
1631
52.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
52.1M
            goto jump_max_until_2;
1633
24.8M
        case JUMP_MAX_UNTIL_3:
1634
24.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
24.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
108M
        case JUMP_BRANCH:
1643
108M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
108M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
24.6M
        case JUMP_REPEAT:
1658
24.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
24.6M
            goto jump_repeat;
1660
4.55M
        case JUMP_REPEAT_ONE_1:
1661
4.55M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.55M
            goto jump_repeat_one_1;
1663
72.3M
        case JUMP_REPEAT_ONE_2:
1664
72.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
72.3M
            goto jump_repeat_one_2;
1666
12.5M
        case JUMP_MIN_REPEAT_ONE:
1667
12.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
12.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.57M
        case JUMP_ASSERT:
1673
3.57M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.57M
            goto jump_assert;
1675
5.47M
        case JUMP_ASSERT_NOT:
1676
5.47M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.47M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
309M
    }
1683
1684
0
    return ret; /* should never get here */
1685
309M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
248M
{
601
248M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
248M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
248M
    Py_ssize_t ret = 0;
604
248M
    int jump;
605
248M
    unsigned int sigcount = state->sigcount;
606
607
248M
    SRE(match_context)* ctx;
608
248M
    SRE(match_context)* nextctx;
609
248M
    INIT_TRACE(state);
610
611
248M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
248M
    DATA_ALLOC(SRE(match_context), ctx);
614
248M
    ctx->last_ctx_pos = -1;
615
248M
    ctx->jump = JUMP_NONE;
616
248M
    ctx->toplevel = toplevel;
617
248M
    ctx_pos = alloc_pos;
618
619
248M
#if USE_COMPUTED_GOTOS
620
248M
#include "sre_targets.h"
621
248M
#endif
622
623
669M
entrance:
624
625
669M
    ;  // Fashion statement.
626
669M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
669M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
13.1M
        pattern += pattern[1] + 1;
637
13.1M
    }
638
639
669M
#if USE_COMPUTED_GOTOS
640
669M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
669M
    {
647
648
669M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
312M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
312M
                   ptr, pattern[0]));
653
312M
            {
654
312M
                int i = pattern[0];
655
312M
                if (i & 1)
656
62.8M
                    state->lastindex = i/2 + 1;
657
312M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
311M
                    int j = state->lastmark + 1;
663
315M
                    while (j < i)
664
3.69M
                        state->mark[j++] = NULL;
665
311M
                    state->lastmark = i;
666
311M
                }
667
312M
                state->mark[i] = ptr;
668
312M
            }
669
312M
            pattern++;
670
312M
            DISPATCH;
671
672
312M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
34.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
34.1M
                   ptr, *pattern));
677
34.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
18.0M
                RETURN_FAILURE;
679
16.0M
            pattern++;
680
16.0M
            ptr++;
681
16.0M
            DISPATCH;
682
683
16.0M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
68.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
68.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
68.7M
            if (ctx->toplevel &&
698
9.14M
                ((state->match_all && ptr != state->end) ||
699
9.14M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
68.7M
            state->ptr = ptr;
704
68.7M
            RETURN_SUCCESS;
705
706
59.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
59.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
59.2M
            if (!SRE(at)(state, ptr, *pattern))
711
56.5M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
145M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
145M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
145M
            if (ptr >= end ||
749
144M
                !SRE(charset)(state, pattern + 1, *ptr))
750
63.5M
                RETURN_FAILURE;
751
81.9M
            pattern += pattern[0];
752
81.9M
            ptr++;
753
81.9M
            DISPATCH;
754
755
81.9M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.41M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.41M
                   pattern, ptr, pattern[0]));
758
3.41M
            if (ptr >= end ||
759
3.41M
                sre_lower_ascii(*ptr) != *pattern)
760
21.3k
                RETURN_FAILURE;
761
3.39M
            pattern++;
762
3.39M
            ptr++;
763
3.39M
            DISPATCH;
764
765
3.39M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
21.6M
        TARGET(SRE_OP_JUMP):
845
21.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
21.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
21.6M
                   ptr, pattern[0]));
850
21.6M
            pattern += pattern[0];
851
21.6M
            DISPATCH;
852
853
27.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
27.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
27.3M
            LASTMARK_SAVE();
858
27.3M
            if (state->repeat)
859
20.8M
                MARK_PUSH(ctx->lastmark);
860
57.3M
            for (; pattern[0]; pattern += pattern[0]) {
861
51.3M
                if (pattern[1] == SRE_OP_LITERAL &&
862
22.4M
                    (ptr >= end ||
863
22.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
13.4M
                    continue;
865
37.8M
                if (pattern[1] == SRE_OP_IN &&
866
18.0M
                    (ptr >= end ||
867
18.0M
                     !SRE(charset)(state, pattern + 3,
868
18.0M
                                   (SRE_CODE) *ptr)))
869
9.63M
                    continue;
870
28.2M
                state->ptr = ptr;
871
28.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
28.2M
                if (ret) {
873
21.3M
                    if (state->repeat)
874
17.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
21.3M
                    RETURN_ON_ERROR(ret);
876
21.3M
                    RETURN_SUCCESS;
877
21.3M
                }
878
6.90M
                if (state->repeat)
879
2.22k
                    MARK_POP_KEEP(ctx->lastmark);
880
6.90M
                LASTMARK_RESTORE();
881
6.90M
            }
882
6.01M
            if (state->repeat)
883
2.92M
                MARK_POP_DISCARD(ctx->lastmark);
884
6.01M
            RETURN_FAILURE;
885
886
269M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
269M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
269M
                   pattern[1], pattern[2]));
898
899
269M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
156k
                RETURN_FAILURE; /* cannot match */
901
902
269M
            state->ptr = ptr;
903
904
269M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
269M
            RETURN_ON_ERROR(ret);
906
269M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
269M
            ctx->count = ret;
908
269M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
269M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
180M
                RETURN_FAILURE;
917
918
89.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.88M
                ptr == state->end &&
920
18.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.9k
            {
922
                /* tail is empty.  we're finished */
923
18.9k
                state->ptr = ptr;
924
18.9k
                RETURN_SUCCESS;
925
18.9k
            }
926
927
89.5M
            LASTMARK_SAVE();
928
89.5M
            if (state->repeat)
929
26.8M
                MARK_PUSH(ctx->lastmark);
930
931
89.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.98M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.98M
                for (;;) {
936
10.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
9.22M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
5.98M
                        ptr--;
939
5.98M
                        ctx->count--;
940
5.98M
                    }
941
4.98M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.73M
                        break;
943
3.24M
                    state->ptr = ptr;
944
3.24M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.24M
                            pattern+pattern[0]);
946
3.24M
                    if (ret) {
947
3.24M
                        if (state->repeat)
948
3.20M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.24M
                        RETURN_ON_ERROR(ret);
950
3.24M
                        RETURN_SUCCESS;
951
3.24M
                    }
952
234
                    if (state->repeat)
953
234
                        MARK_POP_KEEP(ctx->lastmark);
954
234
                    LASTMARK_RESTORE();
955
956
234
                    ptr--;
957
234
                    ctx->count--;
958
234
                }
959
1.73M
                if (state->repeat)
960
1.72M
                    MARK_POP_DISCARD(ctx->lastmark);
961
84.5M
            } else {
962
                /* general case */
963
146M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
102M
                    state->ptr = ptr;
965
102M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
102M
                            pattern+pattern[0]);
967
102M
                    if (ret) {
968
39.7M
                        if (state->repeat)
969
21.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
39.7M
                        RETURN_ON_ERROR(ret);
971
39.7M
                        RETURN_SUCCESS;
972
39.7M
                    }
973
62.4M
                    if (state->repeat)
974
200k
                        MARK_POP_KEEP(ctx->lastmark);
975
62.4M
                    LASTMARK_RESTORE();
976
977
62.4M
                    ptr--;
978
62.4M
                    ctx->count--;
979
62.4M
                }
980
44.8M
                if (state->repeat)
981
128k
                    MARK_POP_DISCARD(ctx->lastmark);
982
44.8M
            }
983
46.5M
            RETURN_FAILURE;
984
985
1.26M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
1.26M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
1.26M
                   pattern[1], pattern[2]));
997
998
1.26M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
1.26M
            state->ptr = ptr;
1002
1003
1.26M
            if (pattern[1] == 0)
1004
1.26M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
1.26M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
1.26M
            } else {
1028
                /* general case */
1029
1.26M
                LASTMARK_SAVE();
1030
1.26M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
45.9M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
45.9M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
45.9M
                    state->ptr = ptr;
1036
45.9M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
45.9M
                            pattern+pattern[0]);
1038
45.9M
                    if (ret) {
1039
1.26M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
1.26M
                        RETURN_ON_ERROR(ret);
1042
1.26M
                        RETURN_SUCCESS;
1043
1.26M
                    }
1044
44.6M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
44.6M
                    LASTMARK_RESTORE();
1047
1048
44.6M
                    state->ptr = ptr;
1049
44.6M
                    ret = SRE(count)(state, pattern+3, 1);
1050
44.6M
                    RETURN_ON_ERROR(ret);
1051
44.6M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
44.6M
                    if (ret == 0)
1053
0
                        break;
1054
44.6M
                    assert(ret == 1);
1055
44.6M
                    ptr++;
1056
44.6M
                    ctx->count++;
1057
44.6M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
65.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
65.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
65.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
65.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
65.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
65.5M
            ctx->u.rep->count = -1;
1131
65.5M
            ctx->u.rep->pattern = pattern;
1132
65.5M
            ctx->u.rep->prev = state->repeat;
1133
65.5M
            ctx->u.rep->last_ptr = NULL;
1134
65.5M
            state->repeat = ctx->u.rep;
1135
1136
65.5M
            state->ptr = ptr;
1137
65.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
65.5M
            state->repeat = ctx->u.rep->prev;
1139
65.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
65.5M
            if (ret) {
1142
10.5M
                RETURN_ON_ERROR(ret);
1143
10.5M
                RETURN_SUCCESS;
1144
10.5M
            }
1145
55.0M
            RETURN_FAILURE;
1146
1147
96.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
96.1M
            ctx->u.rep = state->repeat;
1155
96.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
96.1M
            state->ptr = ptr;
1159
1160
96.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
96.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
96.1M
                   ptr, ctx->count));
1164
1165
96.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
96.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.19M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
92.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
92.9M
                ctx->u.rep->count = ctx->count;
1185
92.9M
                LASTMARK_SAVE();
1186
92.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
92.9M
                LAST_PTR_PUSH();
1189
92.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
92.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
92.9M
                        ctx->u.rep->pattern+3);
1192
92.9M
                LAST_PTR_POP();
1193
92.9M
                if (ret) {
1194
30.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
30.4M
                    RETURN_ON_ERROR(ret);
1196
30.4M
                    RETURN_SUCCESS;
1197
30.4M
                }
1198
62.4M
                MARK_POP(ctx->lastmark);
1199
62.4M
                LASTMARK_RESTORE();
1200
62.4M
                ctx->u.rep->count = ctx->count-1;
1201
62.4M
                state->ptr = ptr;
1202
62.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
65.6M
            state->repeat = ctx->u.rep->prev;
1207
65.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
65.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
65.6M
            RETURN_ON_SUCCESS(ret);
1211
55.0M
            state->ptr = ptr;
1212
55.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
7.94M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
7.94M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
7.94M
                   ptr, pattern[1]));
1565
7.94M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
7.94M
            state->ptr = ptr - pattern[1];
1568
7.94M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
7.94M
            RETURN_ON_FAILURE(ret);
1570
5.15M
            pattern += pattern[0];
1571
5.15M
            DISPATCH;
1572
1573
9.43M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
9.43M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
9.43M
                   ptr, pattern[1]));
1578
9.43M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
9.43M
                state->ptr = ptr - pattern[1];
1580
9.43M
                LASTMARK_SAVE();
1581
9.43M
                if (state->repeat)
1582
9.43M
                    MARK_PUSH(ctx->lastmark);
1583
1584
18.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
18.8M
                if (ret) {
1586
1.94k
                    if (state->repeat)
1587
1.94k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.94k
                    RETURN_ON_ERROR(ret);
1589
1.94k
                    RETURN_FAILURE;
1590
1.94k
                }
1591
9.43M
                if (state->repeat)
1592
9.43M
                    MARK_POP(ctx->lastmark);
1593
9.43M
                LASTMARK_RESTORE();
1594
9.43M
            }
1595
9.43M
            pattern += pattern[0];
1596
9.43M
            DISPATCH;
1597
1598
9.43M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
669M
exit:
1620
669M
    ctx_pos = ctx->last_ctx_pos;
1621
669M
    jump = ctx->jump;
1622
669M
    DATA_POP_DISCARD(ctx);
1623
669M
    if (ctx_pos == -1) {
1624
248M
        state->sigcount = sigcount;
1625
248M
        return ret;
1626
248M
    }
1627
421M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
421M
    switch (jump) {
1630
92.9M
        case JUMP_MAX_UNTIL_2:
1631
92.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
92.9M
            goto jump_max_until_2;
1633
65.6M
        case JUMP_MAX_UNTIL_3:
1634
65.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
65.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
28.2M
        case JUMP_BRANCH:
1643
28.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
28.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
65.5M
        case JUMP_REPEAT:
1658
65.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
65.5M
            goto jump_repeat;
1660
3.24M
        case JUMP_REPEAT_ONE_1:
1661
3.24M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.24M
            goto jump_repeat_one_1;
1663
102M
        case JUMP_REPEAT_ONE_2:
1664
102M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
102M
            goto jump_repeat_one_2;
1666
45.9M
        case JUMP_MIN_REPEAT_ONE:
1667
45.9M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
45.9M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
7.94M
        case JUMP_ASSERT:
1673
7.94M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
7.94M
            goto jump_assert;
1675
9.43M
        case JUMP_ASSERT_NOT:
1676
9.43M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
9.43M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
421M
    }
1683
1684
0
    return ret; /* should never get here */
1685
421M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
53.4M
{
601
53.4M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
53.4M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
53.4M
    Py_ssize_t ret = 0;
604
53.4M
    int jump;
605
53.4M
    unsigned int sigcount = state->sigcount;
606
607
53.4M
    SRE(match_context)* ctx;
608
53.4M
    SRE(match_context)* nextctx;
609
53.4M
    INIT_TRACE(state);
610
611
53.4M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
53.4M
    DATA_ALLOC(SRE(match_context), ctx);
614
53.4M
    ctx->last_ctx_pos = -1;
615
53.4M
    ctx->jump = JUMP_NONE;
616
53.4M
    ctx->toplevel = toplevel;
617
53.4M
    ctx_pos = alloc_pos;
618
619
53.4M
#if USE_COMPUTED_GOTOS
620
53.4M
#include "sre_targets.h"
621
53.4M
#endif
622
623
257M
entrance:
624
625
257M
    ;  // Fashion statement.
626
257M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
257M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
7.72M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.77k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.77k
                   end - ptr, (size_t) pattern[3]));
634
3.77k
            RETURN_FAILURE;
635
3.77k
        }
636
7.71M
        pattern += pattern[1] + 1;
637
7.71M
    }
638
639
257M
#if USE_COMPUTED_GOTOS
640
257M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
257M
    {
647
648
257M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
80.2M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
80.2M
                   ptr, pattern[0]));
653
80.2M
            {
654
80.2M
                int i = pattern[0];
655
80.2M
                if (i & 1)
656
22.7M
                    state->lastindex = i/2 + 1;
657
80.2M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
78.5M
                    int j = state->lastmark + 1;
663
80.2M
                    while (j < i)
664
1.75M
                        state->mark[j++] = NULL;
665
78.5M
                    state->lastmark = i;
666
78.5M
                }
667
80.2M
                state->mark[i] = ptr;
668
80.2M
            }
669
80.2M
            pattern++;
670
80.2M
            DISPATCH;
671
672
80.2M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
27.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
27.0M
                   ptr, *pattern));
677
27.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
12.5M
                RETURN_FAILURE;
679
14.5M
            pattern++;
680
14.5M
            ptr++;
681
14.5M
            DISPATCH;
682
683
14.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
22.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
22.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
22.9M
            if (ctx->toplevel &&
698
6.22M
                ((state->match_all && ptr != state->end) ||
699
6.22M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
22.9M
            state->ptr = ptr;
704
22.9M
            RETURN_SUCCESS;
705
706
9.30M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
9.30M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
9.30M
            if (!SRE(at)(state, ptr, *pattern))
711
9.27M
                RETURN_FAILURE;
712
28.3k
            pattern++;
713
28.3k
            DISPATCH;
714
715
28.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
57.8M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
57.8M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
57.8M
            if (ptr >= end ||
749
57.8M
                !SRE(charset)(state, pattern + 1, *ptr))
750
13.9M
                RETURN_FAILURE;
751
43.8M
            pattern += pattern[0];
752
43.8M
            ptr++;
753
43.8M
            DISPATCH;
754
755
43.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.94M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.94M
                   pattern, ptr, pattern[0]));
758
1.94M
            if (ptr >= end ||
759
1.94M
                sre_lower_ascii(*ptr) != *pattern)
760
16.7k
                RETURN_FAILURE;
761
1.93M
            pattern++;
762
1.93M
            ptr++;
763
1.93M
            DISPATCH;
764
765
1.93M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
21.1M
        TARGET(SRE_OP_JUMP):
845
21.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
21.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
21.1M
                   ptr, pattern[0]));
850
21.1M
            pattern += pattern[0];
851
21.1M
            DISPATCH;
852
853
25.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
25.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
25.6M
            LASTMARK_SAVE();
858
25.6M
            if (state->repeat)
859
22.7M
                MARK_PUSH(ctx->lastmark);
860
54.7M
            for (; pattern[0]; pattern += pattern[0]) {
861
49.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
25.4M
                    (ptr >= end ||
863
25.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
18.0M
                    continue;
865
31.4M
                if (pattern[1] == SRE_OP_IN &&
866
18.2M
                    (ptr >= end ||
867
18.2M
                     !SRE(charset)(state, pattern + 3,
868
18.2M
                                   (SRE_CODE) *ptr)))
869
9.97M
                    continue;
870
21.4M
                state->ptr = ptr;
871
21.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
21.4M
                if (ret) {
873
20.4M
                    if (state->repeat)
874
18.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
20.4M
                    RETURN_ON_ERROR(ret);
876
20.4M
                    RETURN_SUCCESS;
877
20.4M
                }
878
1.07M
                if (state->repeat)
879
5.27k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.07M
                LASTMARK_RESTORE();
881
1.07M
            }
882
5.26M
            if (state->repeat)
883
4.39M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.26M
            RETURN_FAILURE;
885
886
93.4M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
93.4M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
93.4M
                   pattern[1], pattern[2]));
898
899
93.4M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
11.9k
                RETURN_FAILURE; /* cannot match */
901
902
93.4M
            state->ptr = ptr;
903
904
93.4M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
93.4M
            RETURN_ON_ERROR(ret);
906
93.4M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
93.4M
            ctx->count = ret;
908
93.4M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
93.4M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
36.9M
                RETURN_FAILURE;
917
918
56.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.27M
                ptr == state->end &&
920
3.23k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.23k
            {
922
                /* tail is empty.  we're finished */
923
3.23k
                state->ptr = ptr;
924
3.23k
                RETURN_SUCCESS;
925
3.23k
            }
926
927
56.4M
            LASTMARK_SAVE();
928
56.4M
            if (state->repeat)
929
40.1M
                MARK_PUSH(ctx->lastmark);
930
931
56.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
10.2M
                ctx->u.chr = pattern[pattern[0]+1];
935
10.2M
                for (;;) {
936
25.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
20.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
15.7M
                        ptr--;
939
15.7M
                        ctx->count--;
940
15.7M
                    }
941
10.2M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
5.03M
                        break;
943
5.20M
                    state->ptr = ptr;
944
5.20M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.20M
                            pattern+pattern[0]);
946
5.20M
                    if (ret) {
947
5.20M
                        if (state->repeat)
948
5.10M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.20M
                        RETURN_ON_ERROR(ret);
950
5.20M
                        RETURN_SUCCESS;
951
5.20M
                    }
952
292
                    if (state->repeat)
953
292
                        MARK_POP_KEEP(ctx->lastmark);
954
292
                    LASTMARK_RESTORE();
955
956
292
                    ptr--;
957
292
                    ctx->count--;
958
292
                }
959
5.03M
                if (state->repeat)
960
5.03M
                    MARK_POP_DISCARD(ctx->lastmark);
961
46.2M
            } else {
962
                /* general case */
963
60.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
55.4M
                    state->ptr = ptr;
965
55.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
55.4M
                            pattern+pattern[0]);
967
55.4M
                    if (ret) {
968
41.1M
                        if (state->repeat)
969
29.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
41.1M
                        RETURN_ON_ERROR(ret);
971
41.1M
                        RETURN_SUCCESS;
972
41.1M
                    }
973
14.3M
                    if (state->repeat)
974
94.8k
                        MARK_POP_KEEP(ctx->lastmark);
975
14.3M
                    LASTMARK_RESTORE();
976
977
14.3M
                    ptr--;
978
14.3M
                    ctx->count--;
979
14.3M
                }
980
5.14M
                if (state->repeat)
981
60.3k
                    MARK_POP_DISCARD(ctx->lastmark);
982
5.14M
            }
983
10.1M
            RETURN_FAILURE;
984
985
10.5k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
10.5k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
10.5k
                   pattern[1], pattern[2]));
997
998
10.5k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
10.5k
            state->ptr = ptr;
1002
1003
10.5k
            if (pattern[1] == 0)
1004
10.5k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
10.5k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
10.5k
            } else {
1028
                /* general case */
1029
10.5k
                LASTMARK_SAVE();
1030
10.5k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
5.08M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
5.08M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
5.08M
                    state->ptr = ptr;
1036
5.08M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
5.08M
                            pattern+pattern[0]);
1038
5.08M
                    if (ret) {
1039
10.5k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
10.5k
                        RETURN_ON_ERROR(ret);
1042
10.5k
                        RETURN_SUCCESS;
1043
10.5k
                    }
1044
5.07M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
5.07M
                    LASTMARK_RESTORE();
1047
1048
5.07M
                    state->ptr = ptr;
1049
5.07M
                    ret = SRE(count)(state, pattern+3, 1);
1050
5.07M
                    RETURN_ON_ERROR(ret);
1051
5.07M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
5.07M
                    if (ret == 0)
1053
0
                        break;
1054
5.07M
                    assert(ret == 1);
1055
5.07M
                    ptr++;
1056
5.07M
                    ctx->count++;
1057
5.07M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
22.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
22.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
22.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
22.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
22.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
22.2M
            ctx->u.rep->count = -1;
1131
22.2M
            ctx->u.rep->pattern = pattern;
1132
22.2M
            ctx->u.rep->prev = state->repeat;
1133
22.2M
            ctx->u.rep->last_ptr = NULL;
1134
22.2M
            state->repeat = ctx->u.rep;
1135
1136
22.2M
            state->ptr = ptr;
1137
22.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
22.2M
            state->repeat = ctx->u.rep->prev;
1139
22.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
22.2M
            if (ret) {
1142
14.8M
                RETURN_ON_ERROR(ret);
1143
14.8M
                RETURN_SUCCESS;
1144
14.8M
            }
1145
7.41M
            RETURN_FAILURE;
1146
1147
55.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
55.9M
            ctx->u.rep = state->repeat;
1155
55.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
55.9M
            state->ptr = ptr;
1159
1160
55.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
55.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
55.9M
                   ptr, ctx->count));
1164
1165
55.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
55.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.64M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
51.2M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
51.2M
                ctx->u.rep->count = ctx->count;
1185
51.2M
                LASTMARK_SAVE();
1186
51.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
51.2M
                LAST_PTR_PUSH();
1189
51.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
51.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
51.2M
                        ctx->u.rep->pattern+3);
1192
51.2M
                LAST_PTR_POP();
1193
51.2M
                if (ret) {
1194
33.6M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
33.6M
                    RETURN_ON_ERROR(ret);
1196
33.6M
                    RETURN_SUCCESS;
1197
33.6M
                }
1198
17.6M
                MARK_POP(ctx->lastmark);
1199
17.6M
                LASTMARK_RESTORE();
1200
17.6M
                ctx->u.rep->count = ctx->count-1;
1201
17.6M
                state->ptr = ptr;
1202
17.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
22.3M
            state->repeat = ctx->u.rep->prev;
1207
22.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
22.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
22.3M
            RETURN_ON_SUCCESS(ret);
1211
7.44M
            state->ptr = ptr;
1212
7.44M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
11.3M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
11.3M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
11.3M
                   ptr, pattern[1]));
1565
11.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
11.3M
            state->ptr = ptr - pattern[1];
1568
11.3M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
11.3M
            RETURN_ON_FAILURE(ret);
1570
9.83M
            pattern += pattern[0];
1571
9.83M
            DISPATCH;
1572
1573
9.83M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
9.64M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
9.64M
                   ptr, pattern[1]));
1578
9.64M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
9.64M
                state->ptr = ptr - pattern[1];
1580
9.64M
                LASTMARK_SAVE();
1581
9.64M
                if (state->repeat)
1582
9.64M
                    MARK_PUSH(ctx->lastmark);
1583
1584
19.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
19.2M
                if (ret) {
1586
4.95k
                    if (state->repeat)
1587
4.95k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
4.95k
                    RETURN_ON_ERROR(ret);
1589
4.95k
                    RETURN_FAILURE;
1590
4.95k
                }
1591
9.64M
                if (state->repeat)
1592
9.64M
                    MARK_POP(ctx->lastmark);
1593
9.64M
                LASTMARK_RESTORE();
1594
9.64M
            }
1595
9.64M
            pattern += pattern[0];
1596
9.64M
            DISPATCH;
1597
1598
9.64M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
257M
exit:
1620
257M
    ctx_pos = ctx->last_ctx_pos;
1621
257M
    jump = ctx->jump;
1622
257M
    DATA_POP_DISCARD(ctx);
1623
257M
    if (ctx_pos == -1) {
1624
53.4M
        state->sigcount = sigcount;
1625
53.4M
        return ret;
1626
53.4M
    }
1627
204M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
204M
    switch (jump) {
1630
51.2M
        case JUMP_MAX_UNTIL_2:
1631
51.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
51.2M
            goto jump_max_until_2;
1633
22.3M
        case JUMP_MAX_UNTIL_3:
1634
22.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
22.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
21.4M
        case JUMP_BRANCH:
1643
21.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
21.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
22.2M
        case JUMP_REPEAT:
1658
22.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
22.2M
            goto jump_repeat;
1660
5.20M
        case JUMP_REPEAT_ONE_1:
1661
5.20M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.20M
            goto jump_repeat_one_1;
1663
55.4M
        case JUMP_REPEAT_ONE_2:
1664
55.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
55.4M
            goto jump_repeat_one_2;
1666
5.08M
        case JUMP_MIN_REPEAT_ONE:
1667
5.08M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
5.08M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
11.3M
        case JUMP_ASSERT:
1673
11.3M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
11.3M
            goto jump_assert;
1675
9.64M
        case JUMP_ASSERT_NOT:
1676
9.64M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
9.64M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
204M
    }
1683
1684
0
    return ret; /* should never get here */
1685
204M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
373M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
117M
{
1694
117M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
117M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
117M
    Py_ssize_t status = 0;
1697
117M
    Py_ssize_t prefix_len = 0;
1698
117M
    Py_ssize_t prefix_skip = 0;
1699
117M
    SRE_CODE* prefix = NULL;
1700
117M
    SRE_CODE* charset = NULL;
1701
117M
    SRE_CODE* overlap = NULL;
1702
117M
    int flags = 0;
1703
117M
    INIT_TRACE(state);
1704
1705
117M
    if (ptr > end)
1706
0
        return 0;
1707
1708
117M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
117M
        flags = pattern[2];
1713
1714
117M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.10M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.10M
                   end - ptr, (size_t) pattern[3]));
1717
6.10M
            return 0;
1718
6.10M
        }
1719
111M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.69M
            end -= pattern[3] - 1;
1723
8.69M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.69M
        }
1726
1727
111M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.70M
            prefix_len = pattern[5];
1731
8.70M
            prefix_skip = pattern[6];
1732
8.70M
            prefix = pattern + 7;
1733
8.70M
            overlap = prefix + prefix_len - 1;
1734
103M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
94.2M
            charset = pattern + 5;
1738
1739
111M
        pattern += 1 + pattern[1];
1740
111M
    }
1741
1742
111M
    TRACE(("prefix = %p %zd %zd\n",
1743
111M
           prefix, prefix_len, prefix_skip));
1744
111M
    TRACE(("charset = %p\n", charset));
1745
1746
111M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.98M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.96M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.96M
#endif
1753
5.96M
        end = (SRE_CHAR *)state->end;
1754
5.96M
        state->must_advance = 0;
1755
8.64M
        while (ptr < end) {
1756
113M
            while (*ptr != c) {
1757
105M
                if (++ptr >= end)
1758
466k
                    return 0;
1759
105M
            }
1760
8.03M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.03M
            state->start = ptr;
1762
8.03M
            state->ptr = ptr + prefix_skip;
1763
8.03M
            if (flags & SRE_INFO_LITERAL)
1764
11.4k
                return 1; /* we got all of it */
1765
8.02M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.02M
            if (status != 0)
1767
7.35M
                return status;
1768
665k
            ++ptr;
1769
665k
            RESET_CAPTURE_GROUP();
1770
665k
        }
1771
144k
        return 0;
1772
5.96M
    }
1773
1774
103M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
724k
        Py_ssize_t i = 0;
1778
1779
724k
        end = (SRE_CHAR *)state->end;
1780
724k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.32M
        for (i = 0; i < prefix_len; i++)
1784
880k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
440k
#endif
1787
1.38M
        while (ptr < end) {
1788
1.38M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
12.1M
            while (*ptr++ != c) {
1790
10.7M
                if (ptr >= end)
1791
313
                    return 0;
1792
10.7M
            }
1793
1.38M
            if (ptr >= end)
1794
57
                return 0;
1795
1796
1.38M
            i = 1;
1797
1.38M
            state->must_advance = 0;
1798
1.39M
            do {
1799
1.39M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.31M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.31M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.31M
                    state->start = ptr - (prefix_len - 1);
1808
1.31M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.31M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.31M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.31M
                    if (status != 0)
1813
724k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
588k
                    if (++ptr >= end)
1816
62
                        return 0;
1817
588k
                    RESET_CAPTURE_GROUP();
1818
588k
                }
1819
666k
                i = overlap[i];
1820
666k
            } while (i != 0);
1821
1.38M
        }
1822
0
        return 0;
1823
724k
    }
1824
1825
103M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
94.2M
        end = (SRE_CHAR *)state->end;
1828
94.2M
        state->must_advance = 0;
1829
96.8M
        for (;;) {
1830
380M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
283M
                ptr++;
1832
96.8M
            if (ptr >= end)
1833
3.47M
                return 0;
1834
93.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
93.3M
            state->start = ptr;
1836
93.3M
            state->ptr = ptr;
1837
93.3M
            status = SRE(match)(state, pattern, 0);
1838
93.3M
            if (status != 0)
1839
90.7M
                break;
1840
2.60M
            ptr++;
1841
2.60M
            RESET_CAPTURE_GROUP();
1842
2.60M
        }
1843
94.2M
    } else {
1844
        /* general case */
1845
8.72M
        assert(ptr <= end);
1846
8.72M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.72M
        state->start = state->ptr = ptr;
1848
8.72M
        status = SRE(match)(state, pattern, 1);
1849
8.72M
        state->must_advance = 0;
1850
8.72M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
63
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
374M
        while (status == 0 && ptr < end) {
1858
369M
            ptr++;
1859
369M
            RESET_CAPTURE_GROUP();
1860
369M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
369M
            state->start = state->ptr = ptr;
1862
369M
            status = SRE(match)(state, pattern, 0);
1863
369M
        }
1864
4.67M
    }
1865
1866
95.4M
    return status;
1867
103M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
53.5M
{
1694
53.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
53.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
53.5M
    Py_ssize_t status = 0;
1697
53.5M
    Py_ssize_t prefix_len = 0;
1698
53.5M
    Py_ssize_t prefix_skip = 0;
1699
53.5M
    SRE_CODE* prefix = NULL;
1700
53.5M
    SRE_CODE* charset = NULL;
1701
53.5M
    SRE_CODE* overlap = NULL;
1702
53.5M
    int flags = 0;
1703
53.5M
    INIT_TRACE(state);
1704
1705
53.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
53.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
53.5M
        flags = pattern[2];
1713
1714
53.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.00M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.00M
                   end - ptr, (size_t) pattern[3]));
1717
6.00M
            return 0;
1718
6.00M
        }
1719
47.5M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.65M
            end -= pattern[3] - 1;
1723
2.65M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.65M
        }
1726
1727
47.5M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.65M
            prefix_len = pattern[5];
1731
2.65M
            prefix_skip = pattern[6];
1732
2.65M
            prefix = pattern + 7;
1733
2.65M
            overlap = prefix + prefix_len - 1;
1734
44.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
38.8M
            charset = pattern + 5;
1738
1739
47.5M
        pattern += 1 + pattern[1];
1740
47.5M
    }
1741
1742
47.5M
    TRACE(("prefix = %p %zd %zd\n",
1743
47.5M
           prefix, prefix_len, prefix_skip));
1744
47.5M
    TRACE(("charset = %p\n", charset));
1745
1746
47.5M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.64M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.64M
#if SIZEOF_SRE_CHAR < 4
1750
2.64M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.64M
#endif
1753
2.64M
        end = (SRE_CHAR *)state->end;
1754
2.64M
        state->must_advance = 0;
1755
3.06M
        while (ptr < end) {
1756
26.9M
            while (*ptr != c) {
1757
24.3M
                if (++ptr >= end)
1758
380k
                    return 0;
1759
24.3M
            }
1760
2.54M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.54M
            state->start = ptr;
1762
2.54M
            state->ptr = ptr + prefix_skip;
1763
2.54M
            if (flags & SRE_INFO_LITERAL)
1764
500
                return 1; /* we got all of it */
1765
2.54M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.54M
            if (status != 0)
1767
2.12M
                return status;
1768
419k
            ++ptr;
1769
419k
            RESET_CAPTURE_GROUP();
1770
419k
        }
1771
139k
        return 0;
1772
2.64M
    }
1773
1774
44.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
13.8k
        Py_ssize_t i = 0;
1778
1779
13.8k
        end = (SRE_CHAR *)state->end;
1780
13.8k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
13.8k
#if SIZEOF_SRE_CHAR < 4
1783
41.4k
        for (i = 0; i < prefix_len; i++)
1784
27.6k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
13.8k
#endif
1787
114k
        while (ptr < end) {
1788
114k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.96M
            while (*ptr++ != c) {
1790
1.85M
                if (ptr >= end)
1791
61
                    return 0;
1792
1.85M
            }
1793
114k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
114k
            i = 1;
1797
114k
            state->must_advance = 0;
1798
114k
            do {
1799
114k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
99.8k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
99.8k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
99.8k
                    state->start = ptr - (prefix_len - 1);
1808
99.8k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
99.8k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
99.8k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
99.8k
                    if (status != 0)
1813
13.6k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
86.1k
                    if (++ptr >= end)
1816
28
                        return 0;
1817
86.1k
                    RESET_CAPTURE_GROUP();
1818
86.1k
                }
1819
100k
                i = overlap[i];
1820
100k
            } while (i != 0);
1821
114k
        }
1822
0
        return 0;
1823
13.8k
    }
1824
1825
44.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
38.8M
        end = (SRE_CHAR *)state->end;
1828
38.8M
        state->must_advance = 0;
1829
40.3M
        for (;;) {
1830
103M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
63.3M
                ptr++;
1832
40.3M
            if (ptr >= end)
1833
2.40M
                return 0;
1834
37.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
37.9M
            state->start = ptr;
1836
37.9M
            state->ptr = ptr;
1837
37.9M
            status = SRE(match)(state, pattern, 0);
1838
37.9M
            if (status != 0)
1839
36.4M
                break;
1840
1.48M
            ptr++;
1841
1.48M
            RESET_CAPTURE_GROUP();
1842
1.48M
        }
1843
38.8M
    } else {
1844
        /* general case */
1845
6.10M
        assert(ptr <= end);
1846
6.10M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
6.10M
        state->start = state->ptr = ptr;
1848
6.10M
        status = SRE(match)(state, pattern, 1);
1849
6.10M
        state->must_advance = 0;
1850
6.10M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
23
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
156M
        while (status == 0 && ptr < end) {
1858
153M
            ptr++;
1859
153M
            RESET_CAPTURE_GROUP();
1860
153M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
153M
            state->start = state->ptr = ptr;
1862
153M
            status = SRE(match)(state, pattern, 0);
1863
153M
        }
1864
3.47M
    }
1865
1866
39.8M
    return status;
1867
44.9M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
57.2M
{
1694
57.2M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
57.2M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
57.2M
    Py_ssize_t status = 0;
1697
57.2M
    Py_ssize_t prefix_len = 0;
1698
57.2M
    Py_ssize_t prefix_skip = 0;
1699
57.2M
    SRE_CODE* prefix = NULL;
1700
57.2M
    SRE_CODE* charset = NULL;
1701
57.2M
    SRE_CODE* overlap = NULL;
1702
57.2M
    int flags = 0;
1703
57.2M
    INIT_TRACE(state);
1704
1705
57.2M
    if (ptr > end)
1706
0
        return 0;
1707
1708
57.2M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
57.2M
        flags = pattern[2];
1713
1714
57.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
98.8k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
98.8k
                   end - ptr, (size_t) pattern[3]));
1717
98.8k
            return 0;
1718
98.8k
        }
1719
57.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.74M
            end -= pattern[3] - 1;
1723
3.74M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.74M
        }
1726
1727
57.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.75M
            prefix_len = pattern[5];
1731
3.75M
            prefix_skip = pattern[6];
1732
3.75M
            prefix = pattern + 7;
1733
3.75M
            overlap = prefix + prefix_len - 1;
1734
53.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
50.9M
            charset = pattern + 5;
1738
1739
57.1M
        pattern += 1 + pattern[1];
1740
57.1M
    }
1741
1742
57.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
57.1M
           prefix, prefix_len, prefix_skip));
1744
57.1M
    TRACE(("charset = %p\n", charset));
1745
1746
57.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.32M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.32M
#if SIZEOF_SRE_CHAR < 4
1750
3.32M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.32M
#endif
1753
3.32M
        end = (SRE_CHAR *)state->end;
1754
3.32M
        state->must_advance = 0;
1755
3.51M
        while (ptr < end) {
1756
58.8M
            while (*ptr != c) {
1757
55.3M
                if (++ptr >= end)
1758
81.1k
                    return 0;
1759
55.3M
            }
1760
3.42M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.42M
            state->start = ptr;
1762
3.42M
            state->ptr = ptr + prefix_skip;
1763
3.42M
            if (flags & SRE_INFO_LITERAL)
1764
6.13k
                return 1; /* we got all of it */
1765
3.42M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.42M
            if (status != 0)
1767
3.23M
                return status;
1768
187k
            ++ptr;
1769
187k
            RESET_CAPTURE_GROUP();
1770
187k
        }
1771
3.47k
        return 0;
1772
3.32M
    }
1773
1774
53.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
426k
        Py_ssize_t i = 0;
1778
1779
426k
        end = (SRE_CHAR *)state->end;
1780
426k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
426k
#if SIZEOF_SRE_CHAR < 4
1783
1.27M
        for (i = 0; i < prefix_len; i++)
1784
852k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
426k
#endif
1787
736k
        while (ptr < end) {
1788
736k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.75M
            while (*ptr++ != c) {
1790
3.01M
                if (ptr >= end)
1791
116
                    return 0;
1792
3.01M
            }
1793
736k
            if (ptr >= end)
1794
17
                return 0;
1795
1796
736k
            i = 1;
1797
736k
            state->must_advance = 0;
1798
737k
            do {
1799
737k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
716k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
716k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
716k
                    state->start = ptr - (prefix_len - 1);
1808
716k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
716k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
716k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
716k
                    if (status != 0)
1813
426k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
290k
                    if (++ptr >= end)
1816
18
                        return 0;
1817
290k
                    RESET_CAPTURE_GROUP();
1818
290k
                }
1819
311k
                i = overlap[i];
1820
311k
            } while (i != 0);
1821
736k
        }
1822
0
        return 0;
1823
426k
    }
1824
1825
53.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
50.9M
        end = (SRE_CHAR *)state->end;
1828
50.9M
        state->must_advance = 0;
1829
51.2M
        for (;;) {
1830
208M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
156M
                ptr++;
1832
51.2M
            if (ptr >= end)
1833
1.02M
                return 0;
1834
50.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
50.2M
            state->start = ptr;
1836
50.2M
            state->ptr = ptr;
1837
50.2M
            status = SRE(match)(state, pattern, 0);
1838
50.2M
            if (status != 0)
1839
49.9M
                break;
1840
341k
            ptr++;
1841
341k
            RESET_CAPTURE_GROUP();
1842
341k
        }
1843
50.9M
    } else {
1844
        /* general case */
1845
2.46M
        assert(ptr <= end);
1846
2.46M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.46M
        state->start = state->ptr = ptr;
1848
2.46M
        status = SRE(match)(state, pattern, 1);
1849
2.46M
        state->must_advance = 0;
1850
2.46M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
179M
        while (status == 0 && ptr < end) {
1858
178M
            ptr++;
1859
178M
            RESET_CAPTURE_GROUP();
1860
178M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
178M
            state->start = state->ptr = ptr;
1862
178M
            status = SRE(match)(state, pattern, 0);
1863
178M
        }
1864
1.05M
    }
1865
1866
50.9M
    return status;
1867
53.4M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.95M
{
1694
6.95M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.95M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.95M
    Py_ssize_t status = 0;
1697
6.95M
    Py_ssize_t prefix_len = 0;
1698
6.95M
    Py_ssize_t prefix_skip = 0;
1699
6.95M
    SRE_CODE* prefix = NULL;
1700
6.95M
    SRE_CODE* charset = NULL;
1701
6.95M
    SRE_CODE* overlap = NULL;
1702
6.95M
    int flags = 0;
1703
6.95M
    INIT_TRACE(state);
1704
1705
6.95M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.95M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.95M
        flags = pattern[2];
1713
1714
6.95M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.45k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.45k
                   end - ptr, (size_t) pattern[3]));
1717
5.45k
            return 0;
1718
5.45k
        }
1719
6.94M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.29M
            end -= pattern[3] - 1;
1723
2.29M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.29M
        }
1726
1727
6.94M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.29M
            prefix_len = pattern[5];
1731
2.29M
            prefix_skip = pattern[6];
1732
2.29M
            prefix = pattern + 7;
1733
2.29M
            overlap = prefix + prefix_len - 1;
1734
4.65M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
4.49M
            charset = pattern + 5;
1738
1739
6.94M
        pattern += 1 + pattern[1];
1740
6.94M
    }
1741
1742
6.94M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.94M
           prefix, prefix_len, prefix_skip));
1744
6.94M
    TRACE(("charset = %p\n", charset));
1745
1746
6.94M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.01M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.01M
        end = (SRE_CHAR *)state->end;
1754
2.01M
        state->must_advance = 0;
1755
2.07M
        while (ptr < end) {
1756
27.5M
            while (*ptr != c) {
1757
25.4M
                if (++ptr >= end)
1758
4.98k
                    return 0;
1759
25.4M
            }
1760
2.06M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.06M
            state->start = ptr;
1762
2.06M
            state->ptr = ptr + prefix_skip;
1763
2.06M
            if (flags & SRE_INFO_LITERAL)
1764
4.80k
                return 1; /* we got all of it */
1765
2.05M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.05M
            if (status != 0)
1767
2.00M
                return status;
1768
58.3k
            ++ptr;
1769
58.3k
            RESET_CAPTURE_GROUP();
1770
58.3k
        }
1771
880
        return 0;
1772
2.01M
    }
1773
1774
4.93M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
284k
        Py_ssize_t i = 0;
1778
1779
284k
        end = (SRE_CHAR *)state->end;
1780
284k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
537k
        while (ptr < end) {
1788
537k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.39M
            while (*ptr++ != c) {
1790
5.85M
                if (ptr >= end)
1791
136
                    return 0;
1792
5.85M
            }
1793
537k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
537k
            i = 1;
1797
537k
            state->must_advance = 0;
1798
538k
            do {
1799
538k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
496k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
496k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
496k
                    state->start = ptr - (prefix_len - 1);
1808
496k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
496k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
496k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
496k
                    if (status != 0)
1813
284k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
212k
                    if (++ptr >= end)
1816
16
                        return 0;
1817
212k
                    RESET_CAPTURE_GROUP();
1818
212k
                }
1819
254k
                i = overlap[i];
1820
254k
            } while (i != 0);
1821
537k
        }
1822
0
        return 0;
1823
284k
    }
1824
1825
4.65M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
4.49M
        end = (SRE_CHAR *)state->end;
1828
4.49M
        state->must_advance = 0;
1829
5.27M
        for (;;) {
1830
68.8M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
63.5M
                ptr++;
1832
5.27M
            if (ptr >= end)
1833
46.7k
                return 0;
1834
5.22M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.22M
            state->start = ptr;
1836
5.22M
            state->ptr = ptr;
1837
5.22M
            status = SRE(match)(state, pattern, 0);
1838
5.22M
            if (status != 0)
1839
4.44M
                break;
1840
778k
            ptr++;
1841
778k
            RESET_CAPTURE_GROUP();
1842
778k
        }
1843
4.49M
    } else {
1844
        /* general case */
1845
155k
        assert(ptr <= end);
1846
155k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
155k
        state->start = state->ptr = ptr;
1848
155k
        status = SRE(match)(state, pattern, 1);
1849
155k
        state->must_advance = 0;
1850
155k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
18
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
37.9M
        while (status == 0 && ptr < end) {
1858
37.7M
            ptr++;
1859
37.7M
            RESET_CAPTURE_GROUP();
1860
37.7M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
37.7M
            state->start = state->ptr = ptr;
1862
37.7M
            status = SRE(match)(state, pattern, 0);
1863
37.7M
        }
1864
141k
    }
1865
1866
4.59M
    return status;
1867
4.65M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/