Coverage Report

Created: 2025-11-30 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
113M
{
18
    /* check if pointer is at given position */
19
20
113M
    Py_ssize_t thisp, thatp;
21
22
113M
    switch (at) {
23
24
12.3M
    case SRE_AT_BEGINNING:
25
12.3M
    case SRE_AT_BEGINNING_STRING:
26
12.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
97.2M
    case SRE_AT_END:
33
97.2M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.10M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
97.2M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
4.05M
    case SRE_AT_END_STRING:
42
4.05M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
113M
    }
87
88
0
    return 0;
89
113M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
31.6M
{
18
    /* check if pointer is at given position */
19
20
31.6M
    Py_ssize_t thisp, thatp;
21
22
31.6M
    switch (at) {
23
24
11.4M
    case SRE_AT_BEGINNING:
25
11.4M
    case SRE_AT_BEGINNING_STRING:
26
11.4M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
18.6M
    case SRE_AT_END:
33
18.6M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
491k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
18.6M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.48M
    case SRE_AT_END_STRING:
42
1.48M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
31.6M
    }
87
88
0
    return 0;
89
31.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
64.6M
{
18
    /* check if pointer is at given position */
19
20
64.6M
    Py_ssize_t thisp, thatp;
21
22
64.6M
    switch (at) {
23
24
948k
    case SRE_AT_BEGINNING:
25
948k
    case SRE_AT_BEGINNING_STRING:
26
948k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
62.4M
    case SRE_AT_END:
33
62.4M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
603k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
62.4M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.19M
    case SRE_AT_END_STRING:
42
1.19M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
64.6M
    }
87
88
0
    return 0;
89
64.6M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
17.5M
{
18
    /* check if pointer is at given position */
19
20
17.5M
    Py_ssize_t thisp, thatp;
21
22
17.5M
    switch (at) {
23
24
25.5k
    case SRE_AT_BEGINNING:
25
25.5k
    case SRE_AT_BEGINNING_STRING:
26
25.5k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
16.1M
    case SRE_AT_END:
33
16.1M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
10.9k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
16.1M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.37M
    case SRE_AT_END_STRING:
42
1.37M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.5M
    }
87
88
0
    return 0;
89
17.5M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.56G
{
94
    /* check if character is a member of the given set */
95
96
1.56G
    int ok = 1;
97
98
3.52G
    for (;;) {
99
3.52G
        switch (*set++) {
100
101
1.02G
        case SRE_OP_FAILURE:
102
1.02G
            return !ok;
103
104
1.20G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.20G
            if (ch == set[0])
107
8.33M
                return ok;
108
1.19G
            set++;
109
1.19G
            break;
110
111
116M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
116M
            if (sre_category(set[0], (int) ch))
114
85.0M
                return ok;
115
31.9M
            set++;
116
31.9M
            break;
117
118
542M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
542M
            if (ch < 256 &&
121
510M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
218M
                return ok;
123
324M
            set += 256/SRE_CODE_BITS;
124
324M
            break;
125
126
364M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
364M
            if (set[0] <= ch && ch <= set[1])
129
227M
                return ok;
130
137M
            set += 2;
131
137M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
275M
        case SRE_OP_NEGATE:
148
275M
            ok = !ok;
149
275M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.52G
        }
175
3.52G
    }
176
1.56G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
390M
{
94
    /* check if character is a member of the given set */
95
96
390M
    int ok = 1;
97
98
785M
    for (;;) {
99
785M
        switch (*set++) {
100
101
209M
        case SRE_OP_FAILURE:
102
209M
            return !ok;
103
104
224M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
224M
            if (ch == set[0])
107
5.68M
                return ok;
108
218M
            set++;
109
218M
            break;
110
111
35.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
35.5M
            if (sre_category(set[0], (int) ch))
114
22.4M
                return ok;
115
13.1M
            set++;
116
13.1M
            break;
117
118
94.5M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
94.5M
            if (ch < 256 &&
121
94.5M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
46.3M
                return ok;
123
48.2M
            set += 256/SRE_CODE_BITS;
124
48.2M
            break;
125
126
180M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
180M
            if (set[0] <= ch && ch <= set[1])
129
106M
                return ok;
130
73.1M
            set += 2;
131
73.1M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
41.8M
        case SRE_OP_NEGATE:
148
41.8M
            ok = !ok;
149
41.8M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
785M
        }
175
785M
    }
176
390M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
724M
{
94
    /* check if character is a member of the given set */
95
96
724M
    int ok = 1;
97
98
1.71G
    for (;;) {
99
1.71G
        switch (*set++) {
100
101
511M
        case SRE_OP_FAILURE:
102
511M
            return !ok;
103
104
681M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
681M
            if (ch == set[0])
107
1.57M
                return ok;
108
679M
            set++;
109
679M
            break;
110
111
64.4M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
64.4M
            if (sre_category(set[0], (int) ch))
114
49.0M
                return ok;
115
15.3M
            set++;
116
15.3M
            break;
117
118
188M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
188M
            if (ch < 256 &&
121
176M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
57.6M
                return ok;
123
130M
            set += 256/SRE_CODE_BITS;
124
130M
            break;
125
126
157M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
157M
            if (set[0] <= ch && ch <= set[1])
129
104M
                return ok;
130
52.3M
            set += 2;
131
52.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
116M
        case SRE_OP_NEGATE:
148
116M
            ok = !ok;
149
116M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.71G
        }
175
1.71G
    }
176
724M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
446M
{
94
    /* check if character is a member of the given set */
95
96
446M
    int ok = 1;
97
98
1.01G
    for (;;) {
99
1.01G
        switch (*set++) {
100
101
301M
        case SRE_OP_FAILURE:
102
301M
            return !ok;
103
104
295M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
295M
            if (ch == set[0])
107
1.08M
                return ok;
108
294M
            set++;
109
294M
            break;
110
111
16.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
16.9M
            if (sre_category(set[0], (int) ch))
114
13.5M
                return ok;
115
3.40M
            set++;
116
3.40M
            break;
117
118
259M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
259M
            if (ch < 256 &&
121
240M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
114M
                return ok;
123
145M
            set += 256/SRE_CODE_BITS;
124
145M
            break;
125
126
27.8M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
27.8M
            if (set[0] <= ch && ch <= set[1])
129
15.3M
                return ok;
130
12.4M
            set += 2;
131
12.4M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
117M
        case SRE_OP_NEGATE:
148
117M
            ok = !ok;
149
117M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.01G
        }
175
1.01G
    }
176
446M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
565M
{
195
565M
    SRE_CODE chr;
196
565M
    SRE_CHAR c;
197
565M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
565M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
565M
    Py_ssize_t i;
200
565M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
565M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
56.4M
        end = ptr + maxcount;
205
206
565M
    switch (pattern[0]) {
207
208
448M
    case SRE_OP_IN:
209
        /* repeated set */
210
448M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
868M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
420M
            ptr++;
213
448M
        break;
214
215
25.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
25.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
75.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
49.6M
            ptr++;
220
25.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
88.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
88.7M
        chr = pattern[1];
232
88.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
88.7M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
79.8M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
79.8M
        else
238
79.8M
#endif
239
94.6M
        while (ptr < end && *ptr == c)
240
5.88M
            ptr++;
241
88.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.70M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.70M
        chr = pattern[1];
270
2.70M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.70M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.11M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.11M
        else
276
1.11M
#endif
277
32.5M
        while (ptr < end && *ptr != c)
278
29.8M
            ptr++;
279
2.70M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
565M
    }
319
320
565M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
565M
           ptr - (SRE_CHAR*) state->ptr));
322
565M
    return ptr - (SRE_CHAR*) state->ptr;
323
565M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
181M
{
195
181M
    SRE_CODE chr;
196
181M
    SRE_CHAR c;
197
181M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
181M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
181M
    Py_ssize_t i;
200
181M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
181M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.1M
        end = ptr + maxcount;
205
206
181M
    switch (pattern[0]) {
207
208
106M
    case SRE_OP_IN:
209
        /* repeated set */
210
106M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
230M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
123M
            ptr++;
213
106M
        break;
214
215
8.12M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
8.12M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.9M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
16.8M
            ptr++;
220
8.12M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
66.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
66.3M
        chr = pattern[1];
232
66.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
66.3M
        c = (SRE_CHAR) chr;
234
66.3M
#if SIZEOF_SRE_CHAR < 4
235
66.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
66.3M
        else
238
66.3M
#endif
239
68.4M
        while (ptr < end && *ptr == c)
240
2.17M
            ptr++;
241
66.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
621k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
621k
        chr = pattern[1];
270
621k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
621k
        c = (SRE_CHAR) chr;
272
621k
#if SIZEOF_SRE_CHAR < 4
273
621k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
621k
        else
276
621k
#endif
277
6.76M
        while (ptr < end && *ptr != c)
278
6.14M
            ptr++;
279
621k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
181M
    }
319
320
181M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
181M
           ptr - (SRE_CHAR*) state->ptr));
322
181M
    return ptr - (SRE_CHAR*) state->ptr;
323
181M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
236M
{
195
236M
    SRE_CODE chr;
196
236M
    SRE_CHAR c;
197
236M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
236M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
236M
    Py_ssize_t i;
200
236M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
236M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
25.4M
        end = ptr + maxcount;
205
206
236M
    switch (pattern[0]) {
207
208
208M
    case SRE_OP_IN:
209
        /* repeated set */
210
208M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
377M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
169M
            ptr++;
213
208M
        break;
214
215
14.7M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
14.7M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
40.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
25.5M
            ptr++;
220
14.7M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
13.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
13.5M
        chr = pattern[1];
232
13.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
13.5M
        c = (SRE_CHAR) chr;
234
13.5M
#if SIZEOF_SRE_CHAR < 4
235
13.5M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
13.5M
        else
238
13.5M
#endif
239
16.5M
        while (ptr < end && *ptr == c)
240
2.99M
            ptr++;
241
13.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
498k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
498k
        chr = pattern[1];
270
498k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
498k
        c = (SRE_CHAR) chr;
272
498k
#if SIZEOF_SRE_CHAR < 4
273
498k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
498k
        else
276
498k
#endif
277
10.4M
        while (ptr < end && *ptr != c)
278
9.96M
            ptr++;
279
498k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
236M
    }
319
320
236M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
236M
           ptr - (SRE_CHAR*) state->ptr));
322
236M
    return ptr - (SRE_CHAR*) state->ptr;
323
236M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
146M
{
195
146M
    SRE_CODE chr;
196
146M
    SRE_CHAR c;
197
146M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
146M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
146M
    Py_ssize_t i;
200
146M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
146M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.8M
        end = ptr + maxcount;
205
206
146M
    switch (pattern[0]) {
207
208
133M
    case SRE_OP_IN:
209
        /* repeated set */
210
133M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
260M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
127M
            ptr++;
213
133M
        break;
214
215
3.11M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
3.11M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
10.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
7.31M
            ptr++;
220
3.11M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
8.92M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
8.92M
        chr = pattern[1];
232
8.92M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
8.92M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
9.63M
        while (ptr < end && *ptr == c)
240
718k
            ptr++;
241
8.92M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.58M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.58M
        chr = pattern[1];
270
1.58M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.58M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
15.3M
        while (ptr < end && *ptr != c)
278
13.7M
            ptr++;
279
1.58M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
146M
    }
319
320
146M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
146M
           ptr - (SRE_CHAR*) state->ptr));
322
146M
    return ptr - (SRE_CHAR*) state->ptr;
323
146M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
570M
    do { \
355
570M
        ctx->lastmark = state->lastmark; \
356
570M
        ctx->lastindex = state->lastindex; \
357
570M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
366M
    do { \
360
366M
        state->lastmark = ctx->lastmark; \
361
366M
        state->lastindex = ctx->lastindex; \
362
366M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
221M
    do { \
366
221M
        TRACE(("push last_ptr: %zd", \
367
221M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
221M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
221M
    } while (0)
370
#define LAST_PTR_POP()  \
371
221M
    do { \
372
221M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
221M
        TRACE(("pop last_ptr: %zd", \
374
221M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
221M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
895M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
602M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.12G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
145M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
36.4M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.49G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.49G
do { \
390
1.49G
    alloc_pos = state->data_stack_base; \
391
1.49G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.49G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.49G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
166M
        int j = data_stack_grow(state, sizeof(type)); \
395
166M
        if (j < 0) return j; \
396
166M
        if (ctx_pos != -1) \
397
166M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
166M
    } \
399
1.49G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.49G
    state->data_stack_base += sizeof(type); \
401
1.49G
} while (0)
402
403
1.58G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.58G
do { \
405
1.58G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.58G
    ptr = (type*)(state->data_stack+pos); \
407
1.58G
} while (0)
408
409
535M
#define DATA_STACK_PUSH(state, data, size) \
410
535M
do { \
411
535M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
535M
           data, state->data_stack_base, size)); \
413
535M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
84.2k
        int j = data_stack_grow(state, size); \
415
84.2k
        if (j < 0) return j; \
416
84.2k
        if (ctx_pos != -1) \
417
84.2k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
84.2k
    } \
419
535M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
535M
    state->data_stack_base += size; \
421
535M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
359M
#define DATA_STACK_POP(state, data, size, discard) \
427
359M
do { \
428
359M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
359M
           data, state->data_stack_base-size, size)); \
430
359M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
359M
    if (discard) \
432
359M
        state->data_stack_base -= size; \
433
359M
} while (0)
434
435
1.67G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.67G
do { \
437
1.67G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.67G
           state->data_stack_base-size, size)); \
439
1.67G
    state->data_stack_base -= size; \
440
1.67G
} while(0)
441
442
#define DATA_PUSH(x) \
443
221M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
221M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.49G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.49G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.58G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
425M
    do if (lastmark >= 0) { \
473
313M
        MARK_TRACE("push", (lastmark)); \
474
313M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
313M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
425M
    } while (0)
477
#define MARK_POP(lastmark) \
478
154M
    do if (lastmark >= 0) { \
479
136M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
136M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
136M
        MARK_TRACE("pop", (lastmark)); \
482
154M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.98M
    do if (lastmark >= 0) { \
485
1.88M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.88M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.88M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.98M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
270M
    do if (lastmark >= 0) { \
491
177M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
177M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
177M
        MARK_TRACE("pop discard", (lastmark)); \
494
270M
    } while (0)
495
496
482M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
221M
#define JUMP_MAX_UNTIL_2     2
499
145M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
144M
#define JUMP_REPEAT          7
504
12.5M
#define JUMP_REPEAT_ONE_1    8
505
237M
#define JUMP_REPEAT_ONE_2    9
506
26.6M
#define JUMP_MIN_REPEAT_ONE  10
507
163M
#define JUMP_BRANCH          11
508
36.4M
#define JUMP_ASSERT          12
509
25.1M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.01G
    ctx->pattern = pattern; \
516
1.01G
    ctx->ptr = ptr; \
517
1.01G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.01G
    nextctx->pattern = nextpattern; \
519
1.01G
    nextctx->toplevel = toplevel_; \
520
1.01G
    nextctx->jump = jumpvalue; \
521
1.01G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.01G
    pattern = nextpattern; \
523
1.01G
    ctx_pos = alloc_pos; \
524
1.01G
    ctx = nextctx; \
525
1.01G
    goto entrance; \
526
1.01G
    jumplabel: \
527
1.01G
    pattern = ctx->pattern; \
528
1.01G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
953M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
61.5M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.50G
    do {                                                           \
553
2.50G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.50G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.50G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.58G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.50G
        do {                               \
588
2.50G
            MAYBE_CHECK_SIGNALS;           \
589
2.50G
            goto *sre_targets[*pattern++]; \
590
2.50G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
482M
{
601
482M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
482M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
482M
    Py_ssize_t ret = 0;
604
482M
    int jump;
605
482M
    unsigned int sigcount = state->sigcount;
606
607
482M
    SRE(match_context)* ctx;
608
482M
    SRE(match_context)* nextctx;
609
482M
    INIT_TRACE(state);
610
611
482M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
482M
    DATA_ALLOC(SRE(match_context), ctx);
614
482M
    ctx->last_ctx_pos = -1;
615
482M
    ctx->jump = JUMP_NONE;
616
482M
    ctx->toplevel = toplevel;
617
482M
    ctx_pos = alloc_pos;
618
619
482M
#if USE_COMPUTED_GOTOS
620
482M
#include "sre_targets.h"
621
482M
#endif
622
623
1.49G
entrance:
624
625
1.49G
    ;  // Fashion statement.
626
1.49G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.49G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
71.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.40M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.40M
                   end - ptr, (size_t) pattern[3]));
634
6.40M
            RETURN_FAILURE;
635
6.40M
        }
636
65.3M
        pattern += pattern[1] + 1;
637
65.3M
    }
638
639
1.49G
#if USE_COMPUTED_GOTOS
640
1.49G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.49G
    {
647
648
1.49G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
584M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
584M
                   ptr, pattern[0]));
653
584M
            {
654
584M
                int i = pattern[0];
655
584M
                if (i & 1)
656
96.5M
                    state->lastindex = i/2 + 1;
657
584M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
576M
                    int j = state->lastmark + 1;
663
594M
                    while (j < i)
664
17.4M
                        state->mark[j++] = NULL;
665
576M
                    state->lastmark = i;
666
576M
                }
667
584M
                state->mark[i] = ptr;
668
584M
            }
669
584M
            pattern++;
670
584M
            DISPATCH;
671
672
584M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
140M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
140M
                   ptr, *pattern));
677
140M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
59.8M
                RETURN_FAILURE;
679
80.6M
            pattern++;
680
80.6M
            ptr++;
681
80.6M
            DISPATCH;
682
683
80.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
167M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
167M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
167M
            if (ctx->toplevel &&
698
50.4M
                ((state->match_all && ptr != state->end) ||
699
50.4M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
167M
            state->ptr = ptr;
704
167M
            RETURN_SUCCESS;
705
706
113M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
113M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
113M
            if (!SRE(at)(state, ptr, *pattern))
711
93.0M
                RETURN_FAILURE;
712
20.7M
            pattern++;
713
20.7M
            DISPATCH;
714
715
20.7M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
294M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
294M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
294M
            if (ptr >= end ||
749
293M
                !SRE(charset)(state, pattern + 1, *ptr))
750
105M
                RETURN_FAILURE;
751
189M
            pattern += pattern[0];
752
189M
            ptr++;
753
189M
            DISPATCH;
754
755
189M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
6.93M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
6.93M
                   pattern, ptr, pattern[0]));
758
6.93M
            if (ptr >= end ||
759
6.93M
                sre_lower_ascii(*ptr) != *pattern)
760
88.3k
                RETURN_FAILURE;
761
6.84M
            pattern++;
762
6.84M
            ptr++;
763
6.84M
            DISPATCH;
764
765
6.84M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
77.8M
        TARGET(SRE_OP_JUMP):
845
77.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
77.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
77.8M
                   ptr, pattern[0]));
850
77.8M
            pattern += pattern[0];
851
77.8M
            DISPATCH;
852
853
125M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
125M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
125M
            LASTMARK_SAVE();
858
125M
            if (state->repeat)
859
67.2M
                MARK_PUSH(ctx->lastmark);
860
316M
            for (; pattern[0]; pattern += pattern[0]) {
861
266M
                if (pattern[1] == SRE_OP_LITERAL &&
862
122M
                    (ptr >= end ||
863
122M
                     (SRE_CODE) *ptr != pattern[2]))
864
66.6M
                    continue;
865
200M
                if (pattern[1] == SRE_OP_IN &&
866
60.7M
                    (ptr >= end ||
867
60.6M
                     !SRE(charset)(state, pattern + 3,
868
60.6M
                                   (SRE_CODE) *ptr)))
869
36.5M
                    continue;
870
163M
                state->ptr = ptr;
871
163M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
163M
                if (ret) {
873
75.4M
                    if (state->repeat)
874
51.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
75.4M
                    RETURN_ON_ERROR(ret);
876
75.4M
                    RETURN_SUCCESS;
877
75.4M
                }
878
88.1M
                if (state->repeat)
879
24.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
88.1M
                LASTMARK_RESTORE();
881
88.1M
            }
882
49.9M
            if (state->repeat)
883
15.5M
                MARK_POP_DISCARD(ctx->lastmark);
884
49.9M
            RETURN_FAILURE;
885
886
545M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
545M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
545M
                   pattern[1], pattern[2]));
898
899
545M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.30M
                RETURN_FAILURE; /* cannot match */
901
902
543M
            state->ptr = ptr;
903
904
543M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
543M
            RETURN_ON_ERROR(ret);
906
543M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
543M
            ctx->count = ret;
908
543M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
543M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
350M
                RETURN_FAILURE;
917
918
193M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.44M
                ptr == state->end &&
920
71.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
71.8k
            {
922
                /* tail is empty.  we're finished */
923
71.8k
                state->ptr = ptr;
924
71.8k
                RETURN_SUCCESS;
925
71.8k
            }
926
927
193M
            LASTMARK_SAVE();
928
193M
            if (state->repeat)
929
110M
                MARK_PUSH(ctx->lastmark);
930
931
193M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
34.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
34.6M
                for (;;) {
936
80.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
58.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
46.2M
                        ptr--;
939
46.2M
                        ctx->count--;
940
46.2M
                    }
941
34.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
22.0M
                        break;
943
12.5M
                    state->ptr = ptr;
944
12.5M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.5M
                            pattern+pattern[0]);
946
12.5M
                    if (ret) {
947
12.5M
                        if (state->repeat)
948
11.0M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.5M
                        RETURN_ON_ERROR(ret);
950
12.5M
                        RETURN_SUCCESS;
951
12.5M
                    }
952
633
                    if (state->repeat)
953
633
                        MARK_POP_KEEP(ctx->lastmark);
954
633
                    LASTMARK_RESTORE();
955
956
633
                    ptr--;
957
633
                    ctx->count--;
958
633
                }
959
22.0M
                if (state->repeat)
960
20.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
158M
            } else {
962
                /* general case */
963
260M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
237M
                    state->ptr = ptr;
965
237M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
237M
                            pattern+pattern[0]);
967
237M
                    if (ret) {
968
135M
                        if (state->repeat)
969
78.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
135M
                        RETURN_ON_ERROR(ret);
971
135M
                        RETURN_SUCCESS;
972
135M
                    }
973
102M
                    if (state->repeat)
974
1.95M
                        MARK_POP_KEEP(ctx->lastmark);
975
102M
                    LASTMARK_RESTORE();
976
977
102M
                    ptr--;
978
102M
                    ctx->count--;
979
102M
                }
980
23.0M
                if (state->repeat)
981
1.09M
                    MARK_POP_DISCARD(ctx->lastmark);
982
23.0M
            }
983
45.1M
            RETURN_FAILURE;
984
985
4.94M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.94M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.94M
                   pattern[1], pattern[2]));
997
998
4.94M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.94M
            state->ptr = ptr;
1002
1003
4.94M
            if (pattern[1] == 0)
1004
4.94M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.94M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.94M
            } else {
1028
                /* general case */
1029
4.94M
                LASTMARK_SAVE();
1030
4.94M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
26.6M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
26.6M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
26.6M
                    state->ptr = ptr;
1036
26.6M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
26.6M
                            pattern+pattern[0]);
1038
26.6M
                    if (ret) {
1039
4.94M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.94M
                        RETURN_ON_ERROR(ret);
1042
4.94M
                        RETURN_SUCCESS;
1043
4.94M
                    }
1044
21.7M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
21.7M
                    LASTMARK_RESTORE();
1047
1048
21.7M
                    state->ptr = ptr;
1049
21.7M
                    ret = SRE(count)(state, pattern+3, 1);
1050
21.7M
                    RETURN_ON_ERROR(ret);
1051
21.7M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
21.7M
                    if (ret == 0)
1053
0
                        break;
1054
21.7M
                    assert(ret == 1);
1055
21.7M
                    ptr++;
1056
21.7M
                    ctx->count++;
1057
21.7M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
144M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
144M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
144M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
144M
            ctx->u.rep = repeat_pool_malloc(state);
1127
144M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
144M
            ctx->u.rep->count = -1;
1131
144M
            ctx->u.rep->pattern = pattern;
1132
144M
            ctx->u.rep->prev = state->repeat;
1133
144M
            ctx->u.rep->last_ptr = NULL;
1134
144M
            state->repeat = ctx->u.rep;
1135
1136
144M
            state->ptr = ptr;
1137
144M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
144M
            state->repeat = ctx->u.rep->prev;
1139
144M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
144M
            if (ret) {
1142
56.7M
                RETURN_ON_ERROR(ret);
1143
56.7M
                RETURN_SUCCESS;
1144
56.7M
            }
1145
88.0M
            RETURN_FAILURE;
1146
1147
237M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
237M
            ctx->u.rep = state->repeat;
1155
237M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
237M
            state->ptr = ptr;
1159
1160
237M
            ctx->count = ctx->u.rep->count+1;
1161
1162
237M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
237M
                   ptr, ctx->count));
1164
1165
237M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
237M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
16.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
221M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
221M
                ctx->u.rep->count = ctx->count;
1185
221M
                LASTMARK_SAVE();
1186
221M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
221M
                LAST_PTR_PUSH();
1189
221M
                ctx->u.rep->last_ptr = state->ptr;
1190
221M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
221M
                        ctx->u.rep->pattern+3);
1192
221M
                LAST_PTR_POP();
1193
221M
                if (ret) {
1194
92.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
92.2M
                    RETURN_ON_ERROR(ret);
1196
92.2M
                    RETURN_SUCCESS;
1197
92.2M
                }
1198
129M
                MARK_POP(ctx->lastmark);
1199
129M
                LASTMARK_RESTORE();
1200
129M
                ctx->u.rep->count = ctx->count-1;
1201
129M
                state->ptr = ptr;
1202
129M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
145M
            state->repeat = ctx->u.rep->prev;
1207
145M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
145M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
145M
            RETURN_ON_SUCCESS(ret);
1211
88.9M
            state->ptr = ptr;
1212
88.9M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
36.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
36.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
36.4M
                   ptr, pattern[1]));
1565
36.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
36.4M
            state->ptr = ptr - pattern[1];
1568
36.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
36.4M
            RETURN_ON_FAILURE(ret);
1570
29.8M
            pattern += pattern[0];
1571
29.8M
            DISPATCH;
1572
1573
29.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.1M
                   ptr, pattern[1]));
1578
25.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.1M
                state->ptr = ptr - pattern[1];
1580
25.1M
                LASTMARK_SAVE();
1581
25.1M
                if (state->repeat)
1582
25.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
50.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
50.2M
                if (ret) {
1586
18.9k
                    if (state->repeat)
1587
18.9k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
18.9k
                    RETURN_ON_ERROR(ret);
1589
18.9k
                    RETURN_FAILURE;
1590
18.9k
                }
1591
25.0M
                if (state->repeat)
1592
25.0M
                    MARK_POP(ctx->lastmark);
1593
25.0M
                LASTMARK_RESTORE();
1594
25.0M
            }
1595
25.0M
            pattern += pattern[0];
1596
25.0M
            DISPATCH;
1597
1598
25.0M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.49G
exit:
1620
1.49G
    ctx_pos = ctx->last_ctx_pos;
1621
1.49G
    jump = ctx->jump;
1622
1.49G
    DATA_POP_DISCARD(ctx);
1623
1.49G
    if (ctx_pos == -1) {
1624
482M
        state->sigcount = sigcount;
1625
482M
        return ret;
1626
482M
    }
1627
1.01G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.01G
    switch (jump) {
1630
221M
        case JUMP_MAX_UNTIL_2:
1631
221M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
221M
            goto jump_max_until_2;
1633
145M
        case JUMP_MAX_UNTIL_3:
1634
145M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
145M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
163M
        case JUMP_BRANCH:
1643
163M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
163M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
144M
        case JUMP_REPEAT:
1658
144M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
144M
            goto jump_repeat;
1660
12.5M
        case JUMP_REPEAT_ONE_1:
1661
12.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.5M
            goto jump_repeat_one_1;
1663
237M
        case JUMP_REPEAT_ONE_2:
1664
237M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
237M
            goto jump_repeat_one_2;
1666
26.6M
        case JUMP_MIN_REPEAT_ONE:
1667
26.6M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
26.6M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
36.4M
        case JUMP_ASSERT:
1673
36.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
36.4M
            goto jump_assert;
1675
25.1M
        case JUMP_ASSERT_NOT:
1676
25.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.01G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.01G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
168M
{
601
168M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
168M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
168M
    Py_ssize_t ret = 0;
604
168M
    int jump;
605
168M
    unsigned int sigcount = state->sigcount;
606
607
168M
    SRE(match_context)* ctx;
608
168M
    SRE(match_context)* nextctx;
609
168M
    INIT_TRACE(state);
610
611
168M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
168M
    DATA_ALLOC(SRE(match_context), ctx);
614
168M
    ctx->last_ctx_pos = -1;
615
168M
    ctx->jump = JUMP_NONE;
616
168M
    ctx->toplevel = toplevel;
617
168M
    ctx_pos = alloc_pos;
618
619
168M
#if USE_COMPUTED_GOTOS
620
168M
#include "sre_targets.h"
621
168M
#endif
622
623
444M
entrance:
624
625
444M
    ;  // Fashion statement.
626
444M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
444M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
39.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.21M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.21M
                   end - ptr, (size_t) pattern[3]));
634
6.21M
            RETURN_FAILURE;
635
6.21M
        }
636
33.4M
        pattern += pattern[1] + 1;
637
33.4M
    }
638
639
438M
#if USE_COMPUTED_GOTOS
640
438M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
438M
    {
647
648
438M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
181M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
181M
                   ptr, pattern[0]));
653
181M
            {
654
181M
                int i = pattern[0];
655
181M
                if (i & 1)
656
40.9M
                    state->lastindex = i/2 + 1;
657
181M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
178M
                    int j = state->lastmark + 1;
663
190M
                    while (j < i)
664
12.3M
                        state->mark[j++] = NULL;
665
178M
                    state->lastmark = i;
666
178M
                }
667
181M
                state->mark[i] = ptr;
668
181M
            }
669
181M
            pattern++;
670
181M
            DISPATCH;
671
672
181M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
80.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
80.4M
                   ptr, *pattern));
677
80.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
35.7M
                RETURN_FAILURE;
679
44.6M
            pattern++;
680
44.6M
            ptr++;
681
44.6M
            DISPATCH;
682
683
44.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
62.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
62.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
62.4M
            if (ctx->toplevel &&
698
25.0M
                ((state->match_all && ptr != state->end) ||
699
25.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
62.4M
            state->ptr = ptr;
704
62.4M
            RETURN_SUCCESS;
705
706
31.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
31.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
31.6M
            if (!SRE(at)(state, ptr, *pattern))
711
12.6M
                RETURN_FAILURE;
712
18.9M
            pattern++;
713
18.9M
            DISPATCH;
714
715
18.9M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
63.4M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
63.4M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
63.4M
            if (ptr >= end ||
749
63.0M
                !SRE(charset)(state, pattern + 1, *ptr))
750
11.8M
                RETURN_FAILURE;
751
51.6M
            pattern += pattern[0];
752
51.6M
            ptr++;
753
51.6M
            DISPATCH;
754
755
51.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
425k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
425k
                   pattern, ptr, pattern[0]));
758
425k
            if (ptr >= end ||
759
425k
                sre_lower_ascii(*ptr) != *pattern)
760
31.2k
                RETURN_FAILURE;
761
394k
            pattern++;
762
394k
            ptr++;
763
394k
            DISPATCH;
764
765
394k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
31.6M
        TARGET(SRE_OP_JUMP):
845
31.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
31.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
31.6M
                   ptr, pattern[0]));
850
31.6M
            pattern += pattern[0];
851
31.6M
            DISPATCH;
852
853
61.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
61.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
61.6M
            LASTMARK_SAVE();
858
61.6M
            if (state->repeat)
859
12.1M
                MARK_PUSH(ctx->lastmark);
860
178M
            for (; pattern[0]; pattern += pattern[0]) {
861
147M
                if (pattern[1] == SRE_OP_LITERAL &&
862
65.6M
                    (ptr >= end ||
863
65.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
28.3M
                    continue;
865
118M
                if (pattern[1] == SRE_OP_IN &&
866
12.9M
                    (ptr >= end ||
867
12.9M
                     !SRE(charset)(state, pattern + 3,
868
12.9M
                                   (SRE_CODE) *ptr)))
869
7.00M
                    continue;
870
111M
                state->ptr = ptr;
871
111M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
111M
                if (ret) {
873
30.1M
                    if (state->repeat)
874
11.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
30.1M
                    RETURN_ON_ERROR(ret);
876
30.1M
                    RETURN_SUCCESS;
877
30.1M
                }
878
81.5M
                if (state->repeat)
879
6.76k
                    MARK_POP_KEEP(ctx->lastmark);
880
81.5M
                LASTMARK_RESTORE();
881
81.5M
            }
882
31.5M
            if (state->repeat)
883
349k
                MARK_POP_DISCARD(ctx->lastmark);
884
31.5M
            RETURN_FAILURE;
885
886
178M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
178M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
178M
                   pattern[1], pattern[2]));
898
899
178M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.06M
                RETURN_FAILURE; /* cannot match */
901
902
177M
            state->ptr = ptr;
903
904
177M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
177M
            RETURN_ON_ERROR(ret);
906
177M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
177M
            ctx->count = ret;
908
177M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
177M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
123M
                RETURN_FAILURE;
917
918
54.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
653k
                ptr == state->end &&
920
49.3k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
49.3k
            {
922
                /* tail is empty.  we're finished */
923
49.3k
                state->ptr = ptr;
924
49.3k
                RETURN_SUCCESS;
925
49.3k
            }
926
927
54.1M
            LASTMARK_SAVE();
928
54.1M
            if (state->repeat)
929
30.0M
                MARK_PUSH(ctx->lastmark);
930
931
54.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.40M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.40M
                for (;;) {
936
16.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
12.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.76M
                        ptr--;
939
9.76M
                        ctx->count--;
940
9.76M
                    }
941
6.40M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.36M
                        break;
943
3.04M
                    state->ptr = ptr;
944
3.04M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.04M
                            pattern+pattern[0]);
946
3.04M
                    if (ret) {
947
3.04M
                        if (state->repeat)
948
1.58M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.04M
                        RETURN_ON_ERROR(ret);
950
3.04M
                        RETURN_SUCCESS;
951
3.04M
                    }
952
133
                    if (state->repeat)
953
133
                        MARK_POP_KEEP(ctx->lastmark);
954
133
                    LASTMARK_RESTORE();
955
956
133
                    ptr--;
957
133
                    ctx->count--;
958
133
                }
959
3.36M
                if (state->repeat)
960
1.82M
                    MARK_POP_DISCARD(ctx->lastmark);
961
47.7M
            } else {
962
                /* general case */
963
65.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
59.8M
                    state->ptr = ptr;
965
59.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
59.8M
                            pattern+pattern[0]);
967
59.8M
                    if (ret) {
968
42.5M
                        if (state->repeat)
969
25.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
42.5M
                        RETURN_ON_ERROR(ret);
971
42.5M
                        RETURN_SUCCESS;
972
42.5M
                    }
973
17.3M
                    if (state->repeat)
974
1.23M
                        MARK_POP_KEEP(ctx->lastmark);
975
17.3M
                    LASTMARK_RESTORE();
976
977
17.3M
                    ptr--;
978
17.3M
                    ctx->count--;
979
17.3M
                }
980
5.18M
                if (state->repeat)
981
706k
                    MARK_POP_DISCARD(ctx->lastmark);
982
5.18M
            }
983
8.55M
            RETURN_FAILURE;
984
985
4.13M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
4.13M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
4.13M
                   pattern[1], pattern[2]));
997
998
4.13M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
4.13M
            state->ptr = ptr;
1002
1003
4.13M
            if (pattern[1] == 0)
1004
4.13M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
4.13M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
4.13M
            } else {
1028
                /* general case */
1029
4.13M
                LASTMARK_SAVE();
1030
4.13M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
8.36M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
8.36M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
8.36M
                    state->ptr = ptr;
1036
8.36M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
8.36M
                            pattern+pattern[0]);
1038
8.36M
                    if (ret) {
1039
4.13M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
4.13M
                        RETURN_ON_ERROR(ret);
1042
4.13M
                        RETURN_SUCCESS;
1043
4.13M
                    }
1044
4.23M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
4.23M
                    LASTMARK_RESTORE();
1047
1048
4.23M
                    state->ptr = ptr;
1049
4.23M
                    ret = SRE(count)(state, pattern+3, 1);
1050
4.23M
                    RETURN_ON_ERROR(ret);
1051
4.23M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
4.23M
                    if (ret == 0)
1053
0
                        break;
1054
4.23M
                    assert(ret == 1);
1055
4.23M
                    ptr++;
1056
4.23M
                    ctx->count++;
1057
4.23M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
21.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
21.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
21.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
21.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
21.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
21.5M
            ctx->u.rep->count = -1;
1131
21.5M
            ctx->u.rep->pattern = pattern;
1132
21.5M
            ctx->u.rep->prev = state->repeat;
1133
21.5M
            ctx->u.rep->last_ptr = NULL;
1134
21.5M
            state->repeat = ctx->u.rep;
1135
1136
21.5M
            state->ptr = ptr;
1137
21.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
21.5M
            state->repeat = ctx->u.rep->prev;
1139
21.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
21.5M
            if (ret) {
1142
10.9M
                RETURN_ON_ERROR(ret);
1143
10.9M
                RETURN_SUCCESS;
1144
10.9M
            }
1145
10.6M
            RETURN_FAILURE;
1146
1147
49.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
49.6M
            ctx->u.rep = state->repeat;
1155
49.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
49.6M
            state->ptr = ptr;
1159
1160
49.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
49.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
49.6M
                   ptr, ctx->count));
1164
1165
49.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
49.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
8.03M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
41.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
41.6M
                ctx->u.rep->count = ctx->count;
1185
41.6M
                LASTMARK_SAVE();
1186
41.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
41.6M
                LAST_PTR_PUSH();
1189
41.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
41.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
41.6M
                        ctx->u.rep->pattern+3);
1192
41.6M
                LAST_PTR_POP();
1193
41.6M
                if (ret) {
1194
27.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
27.5M
                    RETURN_ON_ERROR(ret);
1196
27.5M
                    RETURN_SUCCESS;
1197
27.5M
                }
1198
14.1M
                MARK_POP(ctx->lastmark);
1199
14.1M
                LASTMARK_RESTORE();
1200
14.1M
                ctx->u.rep->count = ctx->count-1;
1201
14.1M
                state->ptr = ptr;
1202
14.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
22.1M
            state->repeat = ctx->u.rep->prev;
1207
22.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
22.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
22.1M
            RETURN_ON_SUCCESS(ret);
1211
11.1M
            state->ptr = ptr;
1212
11.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.96M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.96M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.96M
                   ptr, pattern[1]));
1565
2.96M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.96M
            state->ptr = ptr - pattern[1];
1568
2.96M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.96M
            RETURN_ON_FAILURE(ret);
1570
2.82M
            pattern += pattern[0];
1571
2.82M
            DISPATCH;
1572
1573
5.47M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.47M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.47M
                   ptr, pattern[1]));
1578
5.47M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.47M
                state->ptr = ptr - pattern[1];
1580
5.47M
                LASTMARK_SAVE();
1581
5.47M
                if (state->repeat)
1582
5.47M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.9M
                if (ret) {
1586
1.86k
                    if (state->repeat)
1587
1.86k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.86k
                    RETURN_ON_ERROR(ret);
1589
1.86k
                    RETURN_FAILURE;
1590
1.86k
                }
1591
5.47M
                if (state->repeat)
1592
5.47M
                    MARK_POP(ctx->lastmark);
1593
5.47M
                LASTMARK_RESTORE();
1594
5.47M
            }
1595
5.47M
            pattern += pattern[0];
1596
5.47M
            DISPATCH;
1597
1598
5.47M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
444M
exit:
1620
444M
    ctx_pos = ctx->last_ctx_pos;
1621
444M
    jump = ctx->jump;
1622
444M
    DATA_POP_DISCARD(ctx);
1623
444M
    if (ctx_pos == -1) {
1624
168M
        state->sigcount = sigcount;
1625
168M
        return ret;
1626
168M
    }
1627
276M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
276M
    switch (jump) {
1630
41.6M
        case JUMP_MAX_UNTIL_2:
1631
41.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
41.6M
            goto jump_max_until_2;
1633
22.1M
        case JUMP_MAX_UNTIL_3:
1634
22.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
22.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
111M
        case JUMP_BRANCH:
1643
111M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
111M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
21.5M
        case JUMP_REPEAT:
1658
21.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
21.5M
            goto jump_repeat;
1660
3.04M
        case JUMP_REPEAT_ONE_1:
1661
3.04M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.04M
            goto jump_repeat_one_1;
1663
59.8M
        case JUMP_REPEAT_ONE_2:
1664
59.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
59.8M
            goto jump_repeat_one_2;
1666
8.36M
        case JUMP_MIN_REPEAT_ONE:
1667
8.36M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
8.36M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.96M
        case JUMP_ASSERT:
1673
2.96M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.96M
            goto jump_assert;
1675
5.47M
        case JUMP_ASSERT_NOT:
1676
5.47M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.47M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
276M
    }
1683
1684
0
    return ret; /* should never get here */
1685
276M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
227M
{
601
227M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
227M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
227M
    Py_ssize_t ret = 0;
604
227M
    int jump;
605
227M
    unsigned int sigcount = state->sigcount;
606
607
227M
    SRE(match_context)* ctx;
608
227M
    SRE(match_context)* nextctx;
609
227M
    INIT_TRACE(state);
610
611
227M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
227M
    DATA_ALLOC(SRE(match_context), ctx);
614
227M
    ctx->last_ctx_pos = -1;
615
227M
    ctx->jump = JUMP_NONE;
616
227M
    ctx->toplevel = toplevel;
617
227M
    ctx_pos = alloc_pos;
618
619
227M
#if USE_COMPUTED_GOTOS
620
227M
#include "sre_targets.h"
621
227M
#endif
622
623
644M
entrance:
624
625
644M
    ;  // Fashion statement.
626
644M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
644M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
18.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
188k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
188k
                   end - ptr, (size_t) pattern[3]));
634
188k
            RETURN_FAILURE;
635
188k
        }
636
18.5M
        pattern += pattern[1] + 1;
637
18.5M
    }
638
639
644M
#if USE_COMPUTED_GOTOS
640
644M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
644M
    {
647
648
644M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
270M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
270M
                   ptr, pattern[0]));
653
270M
            {
654
270M
                int i = pattern[0];
655
270M
                if (i & 1)
656
30.9M
                    state->lastindex = i/2 + 1;
657
270M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
269M
                    int j = state->lastmark + 1;
663
272M
                    while (j < i)
664
2.82M
                        state->mark[j++] = NULL;
665
269M
                    state->lastmark = i;
666
269M
                }
667
270M
                state->mark[i] = ptr;
668
270M
            }
669
270M
            pattern++;
670
270M
            DISPATCH;
671
672
270M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
29.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
29.8M
                   ptr, *pattern));
677
29.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
13.2M
                RETURN_FAILURE;
679
16.6M
            pattern++;
680
16.6M
            ptr++;
681
16.6M
            DISPATCH;
682
683
16.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
66.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
66.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
66.1M
            if (ctx->toplevel &&
698
13.6M
                ((state->match_all && ptr != state->end) ||
699
13.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
66.1M
            state->ptr = ptr;
704
66.1M
            RETURN_SUCCESS;
705
706
64.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
64.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
64.6M
            if (!SRE(at)(state, ptr, *pattern))
711
62.8M
                RETURN_FAILURE;
712
1.74M
            pattern++;
713
1.74M
            DISPATCH;
714
715
1.74M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
147M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
147M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
147M
            if (ptr >= end ||
749
147M
                !SRE(charset)(state, pattern + 1, *ptr))
750
71.2M
                RETURN_FAILURE;
751
76.4M
            pattern += pattern[0];
752
76.4M
            ptr++;
753
76.4M
            DISPATCH;
754
755
76.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.26M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.26M
                   pattern, ptr, pattern[0]));
758
4.26M
            if (ptr >= end ||
759
4.26M
                sre_lower_ascii(*ptr) != *pattern)
760
36.3k
                RETURN_FAILURE;
761
4.22M
            pattern++;
762
4.22M
            ptr++;
763
4.22M
            DISPATCH;
764
765
4.22M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.2M
        TARGET(SRE_OP_JUMP):
845
17.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.2M
                   ptr, pattern[0]));
850
17.2M
            pattern += pattern[0];
851
17.2M
            DISPATCH;
852
853
24.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
24.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
24.6M
            LASTMARK_SAVE();
858
24.6M
            if (state->repeat)
859
20.3M
                MARK_PUSH(ctx->lastmark);
860
53.9M
            for (; pattern[0]; pattern += pattern[0]) {
861
46.1M
                if (pattern[1] == SRE_OP_LITERAL &&
862
20.1M
                    (ptr >= end ||
863
20.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
12.2M
                    continue;
865
33.9M
                if (pattern[1] == SRE_OP_IN &&
866
18.0M
                    (ptr >= end ||
867
18.0M
                     !SRE(charset)(state, pattern + 3,
868
18.0M
                                   (SRE_CODE) *ptr)))
869
11.2M
                    continue;
870
22.6M
                state->ptr = ptr;
871
22.6M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
22.6M
                if (ret) {
873
16.8M
                    if (state->repeat)
874
15.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
16.8M
                    RETURN_ON_ERROR(ret);
876
16.8M
                    RETURN_SUCCESS;
877
16.8M
                }
878
5.78M
                if (state->repeat)
879
3.21k
                    MARK_POP_KEEP(ctx->lastmark);
880
5.78M
                LASTMARK_RESTORE();
881
5.78M
            }
882
7.74M
            if (state->repeat)
883
5.22M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.74M
            RETURN_FAILURE;
885
886
222M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
222M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
222M
                   pattern[1], pattern[2]));
898
899
222M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
217k
                RETURN_FAILURE; /* cannot match */
901
902
222M
            state->ptr = ptr;
903
904
222M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
222M
            RETURN_ON_ERROR(ret);
906
222M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
222M
            ctx->count = ret;
908
222M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
222M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
162M
                RETURN_FAILURE;
917
918
59.7M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
4.74M
                ptr == state->end &&
920
18.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.5k
            {
922
                /* tail is empty.  we're finished */
923
18.5k
                state->ptr = ptr;
924
18.5k
                RETURN_SUCCESS;
925
18.5k
            }
926
927
59.7M
            LASTMARK_SAVE();
928
59.7M
            if (state->repeat)
929
24.7M
                MARK_PUSH(ctx->lastmark);
930
931
59.7M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.61M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.61M
                for (;;) {
936
14.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
10.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
7.39M
                        ptr--;
939
7.39M
                        ctx->count--;
940
7.39M
                    }
941
6.61M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.65M
                        break;
943
2.95M
                    state->ptr = ptr;
944
2.95M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.95M
                            pattern+pattern[0]);
946
2.95M
                    if (ret) {
947
2.95M
                        if (state->repeat)
948
2.92M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.95M
                        RETURN_ON_ERROR(ret);
950
2.95M
                        RETURN_SUCCESS;
951
2.95M
                    }
952
204
                    if (state->repeat)
953
204
                        MARK_POP_KEEP(ctx->lastmark);
954
204
                    LASTMARK_RESTORE();
955
956
204
                    ptr--;
957
204
                    ctx->count--;
958
204
                }
959
3.65M
                if (state->repeat)
960
3.65M
                    MARK_POP_DISCARD(ctx->lastmark);
961
53.1M
            } else {
962
                /* general case */
963
119M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
104M
                    state->ptr = ptr;
965
104M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
104M
                            pattern+pattern[0]);
967
104M
                    if (ret) {
968
38.4M
                        if (state->repeat)
969
17.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
38.4M
                        RETURN_ON_ERROR(ret);
971
38.4M
                        RETURN_SUCCESS;
972
38.4M
                    }
973
66.4M
                    if (state->repeat)
974
550k
                        MARK_POP_KEEP(ctx->lastmark);
975
66.4M
                    LASTMARK_RESTORE();
976
977
66.4M
                    ptr--;
978
66.4M
                    ctx->count--;
979
66.4M
                }
980
14.6M
                if (state->repeat)
981
295k
                    MARK_POP_DISCARD(ctx->lastmark);
982
14.6M
            }
983
18.3M
            RETURN_FAILURE;
984
985
791k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
791k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
791k
                   pattern[1], pattern[2]));
997
998
791k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
791k
            state->ptr = ptr;
1002
1003
791k
            if (pattern[1] == 0)
1004
791k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
791k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
791k
            } else {
1028
                /* general case */
1029
791k
                LASTMARK_SAVE();
1030
791k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
15.1M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
15.1M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
15.1M
                    state->ptr = ptr;
1036
15.1M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
15.1M
                            pattern+pattern[0]);
1038
15.1M
                    if (ret) {
1039
791k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
791k
                        RETURN_ON_ERROR(ret);
1042
791k
                        RETURN_SUCCESS;
1043
791k
                    }
1044
14.3M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
14.3M
                    LASTMARK_RESTORE();
1047
1048
14.3M
                    state->ptr = ptr;
1049
14.3M
                    ret = SRE(count)(state, pattern+3, 1);
1050
14.3M
                    RETURN_ON_ERROR(ret);
1051
14.3M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
14.3M
                    if (ret == 0)
1053
0
                        break;
1054
14.3M
                    assert(ret == 1);
1055
14.3M
                    ptr++;
1056
14.3M
                    ctx->count++;
1057
14.3M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
76.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
76.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
76.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
76.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
76.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
76.3M
            ctx->u.rep->count = -1;
1131
76.3M
            ctx->u.rep->pattern = pattern;
1132
76.3M
            ctx->u.rep->prev = state->repeat;
1133
76.3M
            ctx->u.rep->last_ptr = NULL;
1134
76.3M
            state->repeat = ctx->u.rep;
1135
1136
76.3M
            state->ptr = ptr;
1137
76.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
76.3M
            state->repeat = ctx->u.rep->prev;
1139
76.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
76.3M
            if (ret) {
1142
14.9M
                RETURN_ON_ERROR(ret);
1143
14.9M
                RETURN_SUCCESS;
1144
14.9M
            }
1145
61.4M
            RETURN_FAILURE;
1146
1147
101M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
101M
            ctx->u.rep = state->repeat;
1155
101M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
101M
            state->ptr = ptr;
1159
1160
101M
            ctx->count = ctx->u.rep->count+1;
1161
1162
101M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
101M
                   ptr, ctx->count));
1164
1165
101M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
101M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.89M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
98.5M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
98.5M
                ctx->u.rep->count = ctx->count;
1185
98.5M
                LASTMARK_SAVE();
1186
98.5M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
98.5M
                LAST_PTR_PUSH();
1189
98.5M
                ctx->u.rep->last_ptr = state->ptr;
1190
98.5M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
98.5M
                        ctx->u.rep->pattern+3);
1192
98.5M
                LAST_PTR_POP();
1193
98.5M
                if (ret) {
1194
24.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
24.8M
                    RETURN_ON_ERROR(ret);
1196
24.8M
                    RETURN_SUCCESS;
1197
24.8M
                }
1198
73.7M
                MARK_POP(ctx->lastmark);
1199
73.7M
                LASTMARK_RESTORE();
1200
73.7M
                ctx->u.rep->count = ctx->count-1;
1201
73.7M
                state->ptr = ptr;
1202
73.7M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
76.6M
            state->repeat = ctx->u.rep->prev;
1207
76.6M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
76.6M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
76.6M
            RETURN_ON_SUCCESS(ret);
1211
61.6M
            state->ptr = ptr;
1212
61.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
11.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
11.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
11.8M
                   ptr, pattern[1]));
1565
11.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
11.8M
            state->ptr = ptr - pattern[1];
1568
11.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
11.8M
            RETURN_ON_FAILURE(ret);
1570
6.69M
            pattern += pattern[0];
1571
6.69M
            DISPATCH;
1572
1573
7.93M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.93M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.93M
                   ptr, pattern[1]));
1578
7.93M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.93M
                state->ptr = ptr - pattern[1];
1580
7.93M
                LASTMARK_SAVE();
1581
7.93M
                if (state->repeat)
1582
7.93M
                    MARK_PUSH(ctx->lastmark);
1583
1584
15.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
15.8M
                if (ret) {
1586
2.98k
                    if (state->repeat)
1587
2.98k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.98k
                    RETURN_ON_ERROR(ret);
1589
2.98k
                    RETURN_FAILURE;
1590
2.98k
                }
1591
7.93M
                if (state->repeat)
1592
7.93M
                    MARK_POP(ctx->lastmark);
1593
7.93M
                LASTMARK_RESTORE();
1594
7.93M
            }
1595
7.93M
            pattern += pattern[0];
1596
7.93M
            DISPATCH;
1597
1598
7.93M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
644M
exit:
1620
644M
    ctx_pos = ctx->last_ctx_pos;
1621
644M
    jump = ctx->jump;
1622
644M
    DATA_POP_DISCARD(ctx);
1623
644M
    if (ctx_pos == -1) {
1624
227M
        state->sigcount = sigcount;
1625
227M
        return ret;
1626
227M
    }
1627
416M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
416M
    switch (jump) {
1630
98.5M
        case JUMP_MAX_UNTIL_2:
1631
98.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
98.5M
            goto jump_max_until_2;
1633
76.6M
        case JUMP_MAX_UNTIL_3:
1634
76.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
76.6M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
22.6M
        case JUMP_BRANCH:
1643
22.6M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
22.6M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
76.3M
        case JUMP_REPEAT:
1658
76.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
76.3M
            goto jump_repeat;
1660
2.95M
        case JUMP_REPEAT_ONE_1:
1661
2.95M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.95M
            goto jump_repeat_one_1;
1663
104M
        case JUMP_REPEAT_ONE_2:
1664
104M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
104M
            goto jump_repeat_one_2;
1666
15.1M
        case JUMP_MIN_REPEAT_ONE:
1667
15.1M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
15.1M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
11.8M
        case JUMP_ASSERT:
1673
11.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
11.8M
            goto jump_assert;
1675
7.93M
        case JUMP_ASSERT_NOT:
1676
7.93M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.93M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
416M
    }
1683
1684
0
    return ret; /* should never get here */
1685
416M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
86.6M
{
601
86.6M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
86.6M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
86.6M
    Py_ssize_t ret = 0;
604
86.6M
    int jump;
605
86.6M
    unsigned int sigcount = state->sigcount;
606
607
86.6M
    SRE(match_context)* ctx;
608
86.6M
    SRE(match_context)* nextctx;
609
86.6M
    INIT_TRACE(state);
610
611
86.6M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
86.6M
    DATA_ALLOC(SRE(match_context), ctx);
614
86.6M
    ctx->last_ctx_pos = -1;
615
86.6M
    ctx->jump = JUMP_NONE;
616
86.6M
    ctx->toplevel = toplevel;
617
86.6M
    ctx_pos = alloc_pos;
618
619
86.6M
#if USE_COMPUTED_GOTOS
620
86.6M
#include "sre_targets.h"
621
86.6M
#endif
622
623
407M
entrance:
624
625
407M
    ;  // Fashion statement.
626
407M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
407M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
4.72k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
4.72k
                   end - ptr, (size_t) pattern[3]));
634
4.72k
            RETURN_FAILURE;
635
4.72k
        }
636
13.3M
        pattern += pattern[1] + 1;
637
13.3M
    }
638
639
407M
#if USE_COMPUTED_GOTOS
640
407M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
407M
    {
647
648
407M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
131M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
131M
                   ptr, pattern[0]));
653
131M
            {
654
131M
                int i = pattern[0];
655
131M
                if (i & 1)
656
24.6M
                    state->lastindex = i/2 + 1;
657
131M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
129M
                    int j = state->lastmark + 1;
663
131M
                    while (j < i)
664
2.33M
                        state->mark[j++] = NULL;
665
129M
                    state->lastmark = i;
666
129M
                }
667
131M
                state->mark[i] = ptr;
668
131M
            }
669
131M
            pattern++;
670
131M
            DISPATCH;
671
672
131M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
30.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
30.1M
                   ptr, *pattern));
677
30.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
10.7M
                RETURN_FAILURE;
679
19.3M
            pattern++;
680
19.3M
            ptr++;
681
19.3M
            DISPATCH;
682
683
19.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
39.4M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
39.4M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
39.4M
            if (ctx->toplevel &&
698
11.6M
                ((state->match_all && ptr != state->end) ||
699
11.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
39.4M
            state->ptr = ptr;
704
39.4M
            RETURN_SUCCESS;
705
706
17.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.5M
            if (!SRE(at)(state, ptr, *pattern))
711
17.4M
                RETURN_FAILURE;
712
45.5k
            pattern++;
713
45.5k
            DISPATCH;
714
715
45.5k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
83.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
83.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
83.6M
            if (ptr >= end ||
749
83.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
21.9M
                RETURN_FAILURE;
751
61.7M
            pattern += pattern[0];
752
61.7M
            ptr++;
753
61.7M
            DISPATCH;
754
755
61.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.24M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.24M
                   pattern, ptr, pattern[0]));
758
2.24M
            if (ptr >= end ||
759
2.24M
                sre_lower_ascii(*ptr) != *pattern)
760
20.7k
                RETURN_FAILURE;
761
2.22M
            pattern++;
762
2.22M
            ptr++;
763
2.22M
            DISPATCH;
764
765
2.22M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
28.9M
        TARGET(SRE_OP_JUMP):
845
28.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
28.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
28.9M
                   ptr, pattern[0]));
850
28.9M
            pattern += pattern[0];
851
28.9M
            DISPATCH;
852
853
39.1M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
39.1M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
39.1M
            LASTMARK_SAVE();
858
39.1M
            if (state->repeat)
859
34.6M
                MARK_PUSH(ctx->lastmark);
860
84.2M
            for (; pattern[0]; pattern += pattern[0]) {
861
73.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
36.6M
                    (ptr >= end ||
863
36.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.9M
                    continue;
865
47.6M
                if (pattern[1] == SRE_OP_IN &&
866
29.7M
                    (ptr >= end ||
867
29.7M
                     !SRE(charset)(state, pattern + 3,
868
29.7M
                                   (SRE_CODE) *ptr)))
869
18.3M
                    continue;
870
29.3M
                state->ptr = ptr;
871
29.3M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
29.3M
                if (ret) {
873
28.5M
                    if (state->repeat)
874
24.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
28.5M
                    RETURN_ON_ERROR(ret);
876
28.5M
                    RETURN_SUCCESS;
877
28.5M
                }
878
827k
                if (state->repeat)
879
14.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
827k
                LASTMARK_RESTORE();
881
827k
            }
882
10.6M
            if (state->repeat)
883
9.99M
                MARK_POP_DISCARD(ctx->lastmark);
884
10.6M
            RETURN_FAILURE;
885
886
143M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
143M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
143M
                   pattern[1], pattern[2]));
898
899
143M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
21.3k
                RETURN_FAILURE; /* cannot match */
901
902
143M
            state->ptr = ptr;
903
904
143M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
143M
            RETURN_ON_ERROR(ret);
906
143M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
143M
            ctx->count = ret;
908
143M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
143M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
64.3M
                RETURN_FAILURE;
917
918
79.3M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.04M
                ptr == state->end &&
920
3.93k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.93k
            {
922
                /* tail is empty.  we're finished */
923
3.93k
                state->ptr = ptr;
924
3.93k
                RETURN_SUCCESS;
925
3.93k
            }
926
927
79.3M
            LASTMARK_SAVE();
928
79.3M
            if (state->repeat)
929
56.1M
                MARK_PUSH(ctx->lastmark);
930
931
79.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
21.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
21.6M
                for (;;) {
936
50.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
35.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
29.1M
                        ptr--;
939
29.1M
                        ctx->count--;
940
29.1M
                    }
941
21.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
15.0M
                        break;
943
6.57M
                    state->ptr = ptr;
944
6.57M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.57M
                            pattern+pattern[0]);
946
6.57M
                    if (ret) {
947
6.57M
                        if (state->repeat)
948
6.56M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.57M
                        RETURN_ON_ERROR(ret);
950
6.57M
                        RETURN_SUCCESS;
951
6.57M
                    }
952
296
                    if (state->repeat)
953
296
                        MARK_POP_KEEP(ctx->lastmark);
954
296
                    LASTMARK_RESTORE();
955
956
296
                    ptr--;
957
296
                    ctx->count--;
958
296
                }
959
15.0M
                if (state->repeat)
960
15.0M
                    MARK_POP_DISCARD(ctx->lastmark);
961
57.6M
            } else {
962
                /* general case */
963
76.2M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
73.0M
                    state->ptr = ptr;
965
73.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
73.0M
                            pattern+pattern[0]);
967
73.0M
                    if (ret) {
968
54.4M
                        if (state->repeat)
969
34.3M
                            MARK_POP_DISCARD(ctx->lastmark);
970
54.4M
                        RETURN_ON_ERROR(ret);
971
54.4M
                        RETURN_SUCCESS;
972
54.4M
                    }
973
18.5M
                    if (state->repeat)
974
174k
                        MARK_POP_KEEP(ctx->lastmark);
975
18.5M
                    LASTMARK_RESTORE();
976
977
18.5M
                    ptr--;
978
18.5M
                    ctx->count--;
979
18.5M
                }
980
3.21M
                if (state->repeat)
981
94.5k
                    MARK_POP_DISCARD(ctx->lastmark);
982
3.21M
            }
983
18.2M
            RETURN_FAILURE;
984
985
19.6k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
19.6k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
19.6k
                   pattern[1], pattern[2]));
997
998
19.6k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
19.6k
            state->ptr = ptr;
1002
1003
19.6k
            if (pattern[1] == 0)
1004
19.6k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
19.6k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
19.6k
            } else {
1028
                /* general case */
1029
19.6k
                LASTMARK_SAVE();
1030
19.6k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
3.12M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
3.12M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
3.12M
                    state->ptr = ptr;
1036
3.12M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
3.12M
                            pattern+pattern[0]);
1038
3.12M
                    if (ret) {
1039
19.6k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
19.6k
                        RETURN_ON_ERROR(ret);
1042
19.6k
                        RETURN_SUCCESS;
1043
19.6k
                    }
1044
3.10M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
3.10M
                    LASTMARK_RESTORE();
1047
1048
3.10M
                    state->ptr = ptr;
1049
3.10M
                    ret = SRE(count)(state, pattern+3, 1);
1050
3.10M
                    RETURN_ON_ERROR(ret);
1051
3.10M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
3.10M
                    if (ret == 0)
1053
0
                        break;
1054
3.10M
                    assert(ret == 1);
1055
3.10M
                    ptr++;
1056
3.10M
                    ctx->count++;
1057
3.10M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
46.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
46.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
46.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
46.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
46.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
46.8M
            ctx->u.rep->count = -1;
1131
46.8M
            ctx->u.rep->pattern = pattern;
1132
46.8M
            ctx->u.rep->prev = state->repeat;
1133
46.8M
            ctx->u.rep->last_ptr = NULL;
1134
46.8M
            state->repeat = ctx->u.rep;
1135
1136
46.8M
            state->ptr = ptr;
1137
46.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
46.8M
            state->repeat = ctx->u.rep->prev;
1139
46.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
46.8M
            if (ret) {
1142
30.8M
                RETURN_ON_ERROR(ret);
1143
30.8M
                RETURN_SUCCESS;
1144
30.8M
            }
1145
16.0M
            RETURN_FAILURE;
1146
1147
86.8M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
86.8M
            ctx->u.rep = state->repeat;
1155
86.8M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
86.8M
            state->ptr = ptr;
1159
1160
86.8M
            ctx->count = ctx->u.rep->count+1;
1161
1162
86.8M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
86.8M
                   ptr, ctx->count));
1164
1165
86.8M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
86.8M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.07M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
81.7M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
81.7M
                ctx->u.rep->count = ctx->count;
1185
81.7M
                LASTMARK_SAVE();
1186
81.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
81.7M
                LAST_PTR_PUSH();
1189
81.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
81.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
81.7M
                        ctx->u.rep->pattern+3);
1192
81.7M
                LAST_PTR_POP();
1193
81.7M
                if (ret) {
1194
39.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
39.9M
                    RETURN_ON_ERROR(ret);
1196
39.9M
                    RETURN_SUCCESS;
1197
39.9M
                }
1198
41.8M
                MARK_POP(ctx->lastmark);
1199
41.8M
                LASTMARK_RESTORE();
1200
41.8M
                ctx->u.rep->count = ctx->count-1;
1201
41.8M
                state->ptr = ptr;
1202
41.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
46.9M
            state->repeat = ctx->u.rep->prev;
1207
46.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
46.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
46.9M
            RETURN_ON_SUCCESS(ret);
1211
16.1M
            state->ptr = ptr;
1212
16.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
21.6M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
21.6M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
21.6M
                   ptr, pattern[1]));
1565
21.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
21.6M
            state->ptr = ptr - pattern[1];
1568
21.6M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
21.6M
            RETURN_ON_FAILURE(ret);
1570
20.3M
            pattern += pattern[0];
1571
20.3M
            DISPATCH;
1572
1573
20.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
11.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
11.6M
                   ptr, pattern[1]));
1578
11.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
11.6M
                state->ptr = ptr - pattern[1];
1580
11.6M
                LASTMARK_SAVE();
1581
11.6M
                if (state->repeat)
1582
11.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
23.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
23.3M
                if (ret) {
1586
14.1k
                    if (state->repeat)
1587
14.1k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.1k
                    RETURN_ON_ERROR(ret);
1589
14.1k
                    RETURN_FAILURE;
1590
14.1k
                }
1591
11.6M
                if (state->repeat)
1592
11.6M
                    MARK_POP(ctx->lastmark);
1593
11.6M
                LASTMARK_RESTORE();
1594
11.6M
            }
1595
11.6M
            pattern += pattern[0];
1596
11.6M
            DISPATCH;
1597
1598
11.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
407M
exit:
1620
407M
    ctx_pos = ctx->last_ctx_pos;
1621
407M
    jump = ctx->jump;
1622
407M
    DATA_POP_DISCARD(ctx);
1623
407M
    if (ctx_pos == -1) {
1624
86.6M
        state->sigcount = sigcount;
1625
86.6M
        return ret;
1626
86.6M
    }
1627
320M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
320M
    switch (jump) {
1630
81.7M
        case JUMP_MAX_UNTIL_2:
1631
81.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
81.7M
            goto jump_max_until_2;
1633
46.9M
        case JUMP_MAX_UNTIL_3:
1634
46.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
46.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
29.3M
        case JUMP_BRANCH:
1643
29.3M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
29.3M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
46.8M
        case JUMP_REPEAT:
1658
46.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
46.8M
            goto jump_repeat;
1660
6.57M
        case JUMP_REPEAT_ONE_1:
1661
6.57M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.57M
            goto jump_repeat_one_1;
1663
73.0M
        case JUMP_REPEAT_ONE_2:
1664
73.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
73.0M
            goto jump_repeat_one_2;
1666
3.12M
        case JUMP_MIN_REPEAT_ONE:
1667
3.12M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
3.12M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
21.6M
        case JUMP_ASSERT:
1673
21.6M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
21.6M
            goto jump_assert;
1675
11.6M
        case JUMP_ASSERT_NOT:
1676
11.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
11.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
320M
    }
1683
1684
0
    return ret; /* should never get here */
1685
320M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
316M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
104M
{
1694
104M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
104M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
104M
    Py_ssize_t status = 0;
1697
104M
    Py_ssize_t prefix_len = 0;
1698
104M
    Py_ssize_t prefix_skip = 0;
1699
104M
    SRE_CODE* prefix = NULL;
1700
104M
    SRE_CODE* charset = NULL;
1701
104M
    SRE_CODE* overlap = NULL;
1702
104M
    int flags = 0;
1703
104M
    INIT_TRACE(state);
1704
1705
104M
    if (ptr > end)
1706
0
        return 0;
1707
1708
104M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
104M
        flags = pattern[2];
1713
1714
104M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.03M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.03M
                   end - ptr, (size_t) pattern[3]));
1717
5.03M
            return 0;
1718
5.03M
        }
1719
99.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.66M
            end -= pattern[3] - 1;
1723
9.66M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.66M
        }
1726
1727
99.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.66M
            prefix_len = pattern[5];
1731
9.66M
            prefix_skip = pattern[6];
1732
9.66M
            prefix = pattern + 7;
1733
9.66M
            overlap = prefix + prefix_len - 1;
1734
89.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
79.0M
            charset = pattern + 5;
1738
1739
99.0M
        pattern += 1 + pattern[1];
1740
99.0M
    }
1741
1742
99.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
99.0M
           prefix, prefix_len, prefix_skip));
1744
99.0M
    TRACE(("charset = %p\n", charset));
1745
1746
99.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.77M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
5.01M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
5.01M
#endif
1753
5.01M
        end = (SRE_CHAR *)state->end;
1754
5.01M
        state->must_advance = 0;
1755
9.21M
        while (ptr < end) {
1756
105M
            while (*ptr != c) {
1757
97.3M
                if (++ptr >= end)
1758
1.16M
                    return 0;
1759
97.3M
            }
1760
7.98M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.98M
            state->start = ptr;
1762
7.98M
            state->ptr = ptr + prefix_skip;
1763
7.98M
            if (flags & SRE_INFO_LITERAL)
1764
4.41k
                return 1; /* we got all of it */
1765
7.98M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.98M
            if (status != 0)
1767
7.54M
                return status;
1768
437k
            ++ptr;
1769
437k
            RESET_CAPTURE_GROUP();
1770
437k
        }
1771
60.8k
        return 0;
1772
5.01M
    }
1773
1774
90.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
886k
        Py_ssize_t i = 0;
1778
1779
886k
        end = (SRE_CHAR *)state->end;
1780
886k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.96M
        for (i = 0; i < prefix_len; i++)
1784
1.31M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
656k
#endif
1787
1.50M
        while (ptr < end) {
1788
1.50M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.98M
            while (*ptr++ != c) {
1790
5.48M
                if (ptr >= end)
1791
305
                    return 0;
1792
5.48M
            }
1793
1.50M
            if (ptr >= end)
1794
62
                return 0;
1795
1796
1.50M
            i = 1;
1797
1.50M
            state->must_advance = 0;
1798
1.50M
            do {
1799
1.50M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.43M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.43M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.43M
                    state->start = ptr - (prefix_len - 1);
1808
1.43M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.43M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.43M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.43M
                    if (status != 0)
1813
886k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
544k
                    if (++ptr >= end)
1816
44
                        return 0;
1817
544k
                    RESET_CAPTURE_GROUP();
1818
544k
                }
1819
616k
                i = overlap[i];
1820
616k
            } while (i != 0);
1821
1.50M
        }
1822
0
        return 0;
1823
886k
    }
1824
1825
89.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
79.0M
        end = (SRE_CHAR *)state->end;
1828
79.0M
        state->must_advance = 0;
1829
81.4M
        for (;;) {
1830
350M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
269M
                ptr++;
1832
81.4M
            if (ptr >= end)
1833
3.88M
                return 0;
1834
77.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
77.5M
            state->start = ptr;
1836
77.5M
            state->ptr = ptr;
1837
77.5M
            status = SRE(match)(state, pattern, 0);
1838
77.5M
            if (status != 0)
1839
75.1M
                break;
1840
2.37M
            ptr++;
1841
2.37M
            RESET_CAPTURE_GROUP();
1842
2.37M
        }
1843
79.0M
    } else {
1844
        /* general case */
1845
10.3M
        assert(ptr <= end);
1846
10.3M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
10.3M
        state->start = state->ptr = ptr;
1848
10.3M
        status = SRE(match)(state, pattern, 1);
1849
10.3M
        state->must_advance = 0;
1850
10.3M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
5.27M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
66
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
5.27M
        {
1854
5.27M
            state->start = state->ptr = ptr = end;
1855
5.27M
            return 0;
1856
5.27M
        }
1857
318M
        while (status == 0 && ptr < end) {
1858
313M
            ptr++;
1859
313M
            RESET_CAPTURE_GROUP();
1860
313M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
313M
            state->start = state->ptr = ptr;
1862
313M
            status = SRE(match)(state, pattern, 0);
1863
313M
        }
1864
5.05M
    }
1865
1866
80.2M
    return status;
1867
89.3M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
48.3M
{
1694
48.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.3M
    Py_ssize_t status = 0;
1697
48.3M
    Py_ssize_t prefix_len = 0;
1698
48.3M
    Py_ssize_t prefix_skip = 0;
1699
48.3M
    SRE_CODE* prefix = NULL;
1700
48.3M
    SRE_CODE* charset = NULL;
1701
48.3M
    SRE_CODE* overlap = NULL;
1702
48.3M
    int flags = 0;
1703
48.3M
    INIT_TRACE(state);
1704
1705
48.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.3M
        flags = pattern[2];
1713
1714
48.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.87M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.87M
                   end - ptr, (size_t) pattern[3]));
1717
4.87M
            return 0;
1718
4.87M
        }
1719
43.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.36M
            end -= pattern[3] - 1;
1723
3.36M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.36M
        }
1726
1727
43.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.36M
            prefix_len = pattern[5];
1731
3.36M
            prefix_skip = pattern[6];
1732
3.36M
            prefix = pattern + 7;
1733
3.36M
            overlap = prefix + prefix_len - 1;
1734
40.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
32.0M
            charset = pattern + 5;
1738
1739
43.4M
        pattern += 1 + pattern[1];
1740
43.4M
    }
1741
1742
43.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
43.4M
           prefix, prefix_len, prefix_skip));
1744
43.4M
    TRACE(("charset = %p\n", charset));
1745
1746
43.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.33M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
3.33M
#if SIZEOF_SRE_CHAR < 4
1750
3.33M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
3.33M
#endif
1753
3.33M
        end = (SRE_CHAR *)state->end;
1754
3.33M
        state->must_advance = 0;
1755
3.53M
        while (ptr < end) {
1756
27.1M
            while (*ptr != c) {
1757
24.8M
                if (++ptr >= end)
1758
1.09M
                    return 0;
1759
24.8M
            }
1760
2.38M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.38M
            state->start = ptr;
1762
2.38M
            state->ptr = ptr + prefix_skip;
1763
2.38M
            if (flags & SRE_INFO_LITERAL)
1764
274
                return 1; /* we got all of it */
1765
2.38M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.38M
            if (status != 0)
1767
2.18M
                return status;
1768
203k
            ++ptr;
1769
203k
            RESET_CAPTURE_GROUP();
1770
203k
        }
1771
56.4k
        return 0;
1772
3.33M
    }
1773
1774
40.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
34.4k
        Py_ssize_t i = 0;
1778
1779
34.4k
        end = (SRE_CHAR *)state->end;
1780
34.4k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
34.4k
#if SIZEOF_SRE_CHAR < 4
1783
103k
        for (i = 0; i < prefix_len; i++)
1784
68.9k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
34.4k
#endif
1787
102k
        while (ptr < end) {
1788
102k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
995k
            while (*ptr++ != c) {
1790
892k
                if (ptr >= end)
1791
51
                    return 0;
1792
892k
            }
1793
102k
            if (ptr >= end)
1794
25
                return 0;
1795
1796
102k
            i = 1;
1797
102k
            state->must_advance = 0;
1798
102k
            do {
1799
102k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
87.7k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
87.7k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
87.7k
                    state->start = ptr - (prefix_len - 1);
1808
87.7k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
87.7k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
87.7k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
87.7k
                    if (status != 0)
1813
34.3k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
53.3k
                    if (++ptr >= end)
1816
18
                        return 0;
1817
53.3k
                    RESET_CAPTURE_GROUP();
1818
53.3k
                }
1819
68.5k
                i = overlap[i];
1820
68.5k
            } while (i != 0);
1821
102k
        }
1822
0
        return 0;
1823
34.4k
    }
1824
1825
40.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
32.0M
        end = (SRE_CHAR *)state->end;
1828
32.0M
        state->must_advance = 0;
1829
33.5M
        for (;;) {
1830
94.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
60.8M
                ptr++;
1832
33.5M
            if (ptr >= end)
1833
2.75M
                return 0;
1834
30.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
30.7M
            state->start = ptr;
1836
30.7M
            state->ptr = ptr;
1837
30.7M
            status = SRE(match)(state, pattern, 0);
1838
30.7M
            if (status != 0)
1839
29.3M
                break;
1840
1.46M
            ptr++;
1841
1.46M
            RESET_CAPTURE_GROUP();
1842
1.46M
        }
1843
32.0M
    } else {
1844
        /* general case */
1845
8.02M
        assert(ptr <= end);
1846
8.02M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.02M
        state->start = state->ptr = ptr;
1848
8.02M
        status = SRE(match)(state, pattern, 1);
1849
8.02M
        state->must_advance = 0;
1850
8.02M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.33M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.33M
        {
1854
4.33M
            state->start = state->ptr = ptr = end;
1855
4.33M
            return 0;
1856
4.33M
        }
1857
91.0M
        while (status == 0 && ptr < end) {
1858
87.3M
            ptr++;
1859
87.3M
            RESET_CAPTURE_GROUP();
1860
87.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
87.3M
            state->start = state->ptr = ptr;
1862
87.3M
            status = SRE(match)(state, pattern, 0);
1863
87.3M
        }
1864
3.69M
    }
1865
1866
33.0M
    return status;
1867
40.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
48.2M
{
1694
48.2M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.2M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.2M
    Py_ssize_t status = 0;
1697
48.2M
    Py_ssize_t prefix_len = 0;
1698
48.2M
    Py_ssize_t prefix_skip = 0;
1699
48.2M
    SRE_CODE* prefix = NULL;
1700
48.2M
    SRE_CODE* charset = NULL;
1701
48.2M
    SRE_CODE* overlap = NULL;
1702
48.2M
    int flags = 0;
1703
48.2M
    INIT_TRACE(state);
1704
1705
48.2M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.2M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.2M
        flags = pattern[2];
1713
1714
48.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
153k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
153k
                   end - ptr, (size_t) pattern[3]));
1717
153k
            return 0;
1718
153k
        }
1719
48.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.30M
            end -= pattern[3] - 1;
1723
2.30M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.30M
        }
1726
1727
48.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.30M
            prefix_len = pattern[5];
1731
2.30M
            prefix_skip = pattern[6];
1732
2.30M
            prefix = pattern + 7;
1733
2.30M
            overlap = prefix + prefix_len - 1;
1734
45.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
43.6M
            charset = pattern + 5;
1738
1739
48.0M
        pattern += 1 + pattern[1];
1740
48.0M
    }
1741
1742
48.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
48.0M
           prefix, prefix_len, prefix_skip));
1744
48.0M
    TRACE(("charset = %p\n", charset));
1745
1746
48.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.68M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.68M
#if SIZEOF_SRE_CHAR < 4
1750
1.68M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.68M
#endif
1753
1.68M
        end = (SRE_CHAR *)state->end;
1754
1.68M
        state->must_advance = 0;
1755
1.78M
        while (ptr < end) {
1756
46.1M
            while (*ptr != c) {
1757
44.4M
                if (++ptr >= end)
1758
69.6k
                    return 0;
1759
44.4M
            }
1760
1.71M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.71M
            state->start = ptr;
1762
1.71M
            state->ptr = ptr + prefix_skip;
1763
1.71M
            if (flags & SRE_INFO_LITERAL)
1764
2.92k
                return 1; /* we got all of it */
1765
1.71M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.71M
            if (status != 0)
1767
1.60M
                return status;
1768
105k
            ++ptr;
1769
105k
            RESET_CAPTURE_GROUP();
1770
105k
        }
1771
3.70k
        return 0;
1772
1.68M
    }
1773
1774
46.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
621k
        Py_ssize_t i = 0;
1778
1779
621k
        end = (SRE_CHAR *)state->end;
1780
621k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
621k
#if SIZEOF_SRE_CHAR < 4
1783
1.86M
        for (i = 0; i < prefix_len; i++)
1784
1.24M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
621k
#endif
1787
914k
        while (ptr < end) {
1788
914k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.57M
            while (*ptr++ != c) {
1790
1.65M
                if (ptr >= end)
1791
110
                    return 0;
1792
1.65M
            }
1793
914k
            if (ptr >= end)
1794
16
                return 0;
1795
1796
914k
            i = 1;
1797
914k
            state->must_advance = 0;
1798
914k
            do {
1799
914k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
881k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
881k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
881k
                    state->start = ptr - (prefix_len - 1);
1808
881k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
881k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
881k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
881k
                    if (status != 0)
1813
621k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
260k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
260k
                    RESET_CAPTURE_GROUP();
1818
260k
                }
1819
293k
                i = overlap[i];
1820
293k
            } while (i != 0);
1821
914k
        }
1822
0
        return 0;
1823
621k
    }
1824
1825
45.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
43.6M
        end = (SRE_CHAR *)state->end;
1828
43.6M
        state->must_advance = 0;
1829
44.1M
        for (;;) {
1830
184M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
139M
                ptr++;
1832
44.1M
            if (ptr >= end)
1833
1.08M
                return 0;
1834
43.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
43.0M
            state->start = ptr;
1836
43.0M
            state->ptr = ptr;
1837
43.0M
            status = SRE(match)(state, pattern, 0);
1838
43.0M
            if (status != 0)
1839
42.6M
                break;
1840
421k
            ptr++;
1841
421k
            RESET_CAPTURE_GROUP();
1842
421k
        }
1843
43.6M
    } else {
1844
        /* general case */
1845
2.10M
        assert(ptr <= end);
1846
2.10M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.10M
        state->start = state->ptr = ptr;
1848
2.10M
        status = SRE(match)(state, pattern, 1);
1849
2.10M
        state->must_advance = 0;
1850
2.10M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
920k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
16
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
920k
        {
1854
920k
            state->start = state->ptr = ptr = end;
1855
920k
            return 0;
1856
920k
        }
1857
162M
        while (status == 0 && ptr < end) {
1858
161M
            ptr++;
1859
161M
            RESET_CAPTURE_GROUP();
1860
161M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
161M
            state->start = state->ptr = ptr;
1862
161M
            status = SRE(match)(state, pattern, 0);
1863
161M
        }
1864
1.18M
    }
1865
1866
43.7M
    return status;
1867
45.7M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.50M
{
1694
7.50M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.50M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.50M
    Py_ssize_t status = 0;
1697
7.50M
    Py_ssize_t prefix_len = 0;
1698
7.50M
    Py_ssize_t prefix_skip = 0;
1699
7.50M
    SRE_CODE* prefix = NULL;
1700
7.50M
    SRE_CODE* charset = NULL;
1701
7.50M
    SRE_CODE* overlap = NULL;
1702
7.50M
    int flags = 0;
1703
7.50M
    INIT_TRACE(state);
1704
1705
7.50M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.50M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.50M
        flags = pattern[2];
1713
1714
7.50M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.67k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.67k
                   end - ptr, (size_t) pattern[3]));
1717
6.67k
            return 0;
1718
6.67k
        }
1719
7.49M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.99M
            end -= pattern[3] - 1;
1723
3.99M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.99M
        }
1726
1727
7.49M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.99M
            prefix_len = pattern[5];
1731
3.99M
            prefix_skip = pattern[6];
1732
3.99M
            prefix = pattern + 7;
1733
3.99M
            overlap = prefix + prefix_len - 1;
1734
3.99M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.30M
            charset = pattern + 5;
1738
1739
7.49M
        pattern += 1 + pattern[1];
1740
7.49M
    }
1741
1742
7.49M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.49M
           prefix, prefix_len, prefix_skip));
1744
7.49M
    TRACE(("charset = %p\n", charset));
1745
1746
7.49M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.76M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.76M
        end = (SRE_CHAR *)state->end;
1754
3.76M
        state->must_advance = 0;
1755
3.89M
        while (ptr < end) {
1756
31.9M
            while (*ptr != c) {
1757
28.0M
                if (++ptr >= end)
1758
3.87k
                    return 0;
1759
28.0M
            }
1760
3.88M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.88M
            state->start = ptr;
1762
3.88M
            state->ptr = ptr + prefix_skip;
1763
3.88M
            if (flags & SRE_INFO_LITERAL)
1764
1.21k
                return 1; /* we got all of it */
1765
3.88M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.88M
            if (status != 0)
1767
3.75M
                return status;
1768
128k
            ++ptr;
1769
128k
            RESET_CAPTURE_GROUP();
1770
128k
        }
1771
705
        return 0;
1772
3.76M
    }
1773
1774
3.73M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
230k
        Py_ssize_t i = 0;
1778
1779
230k
        end = (SRE_CHAR *)state->end;
1780
230k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
483k
        while (ptr < end) {
1788
483k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.41M
            while (*ptr++ != c) {
1790
2.93M
                if (ptr >= end)
1791
144
                    return 0;
1792
2.93M
            }
1793
483k
            if (ptr >= end)
1794
21
                return 0;
1795
1796
483k
            i = 1;
1797
483k
            state->must_advance = 0;
1798
484k
            do {
1799
484k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
461k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
461k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
461k
                    state->start = ptr - (prefix_len - 1);
1808
461k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
461k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
461k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
461k
                    if (status != 0)
1813
230k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
230k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
230k
                    RESET_CAPTURE_GROUP();
1818
230k
                }
1819
254k
                i = overlap[i];
1820
254k
            } while (i != 0);
1821
483k
        }
1822
0
        return 0;
1823
230k
    }
1824
1825
3.50M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.30M
        end = (SRE_CHAR *)state->end;
1828
3.30M
        state->must_advance = 0;
1829
3.79M
        for (;;) {
1830
72.3M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
68.5M
                ptr++;
1832
3.79M
            if (ptr >= end)
1833
49.2k
                return 0;
1834
3.75M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.75M
            state->start = ptr;
1836
3.75M
            state->ptr = ptr;
1837
3.75M
            status = SRE(match)(state, pattern, 0);
1838
3.75M
            if (status != 0)
1839
3.25M
                break;
1840
494k
            ptr++;
1841
494k
            RESET_CAPTURE_GROUP();
1842
494k
        }
1843
3.30M
    } else {
1844
        /* general case */
1845
198k
        assert(ptr <= end);
1846
198k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
198k
        state->start = state->ptr = ptr;
1848
198k
        status = SRE(match)(state, pattern, 1);
1849
198k
        state->must_advance = 0;
1850
198k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
21.5k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
28
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
21.5k
        {
1854
21.5k
            state->start = state->ptr = ptr = end;
1855
21.5k
            return 0;
1856
21.5k
        }
1857
65.2M
        while (status == 0 && ptr < end) {
1858
65.0M
            ptr++;
1859
65.0M
            RESET_CAPTURE_GROUP();
1860
65.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
65.0M
            state->start = state->ptr = ptr;
1862
65.0M
            status = SRE(match)(state, pattern, 0);
1863
65.0M
        }
1864
177k
    }
1865
1866
3.43M
    return status;
1867
3.50M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/