Coverage Report

Created: 2025-11-24 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
62.4M
{
18
    /* check if pointer is at given position */
19
20
62.4M
    Py_ssize_t thisp, thatp;
21
22
62.4M
    switch (at) {
23
24
10.9M
    case SRE_AT_BEGINNING:
25
10.9M
    case SRE_AT_BEGINNING_STRING:
26
10.9M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
48.0M
    case SRE_AT_END:
33
48.0M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
928k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
48.0M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.41M
    case SRE_AT_END_STRING:
42
3.41M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
62.4M
    }
87
88
0
    return 0;
89
62.4M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
27.7M
{
18
    /* check if pointer is at given position */
19
20
27.7M
    Py_ssize_t thisp, thatp;
21
22
27.7M
    switch (at) {
23
24
10.3M
    case SRE_AT_BEGINNING:
25
10.3M
    case SRE_AT_BEGINNING_STRING:
26
10.3M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
15.9M
    case SRE_AT_END:
33
15.9M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
566k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
15.9M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.48M
    case SRE_AT_END_STRING:
42
1.48M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
27.7M
    }
87
88
0
    return 0;
89
27.7M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
16.8M
{
18
    /* check if pointer is at given position */
19
20
16.8M
    Py_ssize_t thisp, thatp;
21
22
16.8M
    switch (at) {
23
24
656k
    case SRE_AT_BEGINNING:
25
656k
    case SRE_AT_BEGINNING_STRING:
26
656k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
14.9M
    case SRE_AT_END:
33
14.9M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
354k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
14.9M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.20M
    case SRE_AT_END_STRING:
42
1.20M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
16.8M
    }
87
88
0
    return 0;
89
16.8M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
17.8M
{
18
    /* check if pointer is at given position */
19
20
17.8M
    Py_ssize_t thisp, thatp;
21
22
17.8M
    switch (at) {
23
24
25.0k
    case SRE_AT_BEGINNING:
25
25.0k
    case SRE_AT_BEGINNING_STRING:
26
25.0k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
17.1M
    case SRE_AT_END:
33
17.1M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
7.18k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
17.1M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
712k
    case SRE_AT_END_STRING:
42
712k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
17.8M
    }
87
88
0
    return 0;
89
17.8M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.58G
{
94
    /* check if character is a member of the given set */
95
96
1.58G
    int ok = 1;
97
98
3.52G
    for (;;) {
99
3.52G
        switch (*set++) {
100
101
1.02G
        case SRE_OP_FAILURE:
102
1.02G
            return !ok;
103
104
1.11G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.11G
            if (ch == set[0])
107
7.25M
                return ok;
108
1.10G
            set++;
109
1.10G
            break;
110
111
65.0M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
65.0M
            if (sre_category(set[0], (int) ch))
114
41.7M
                return ok;
115
23.3M
            set++;
116
23.3M
            break;
117
118
671M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
671M
            if (ch < 256 &&
121
624M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
293M
                return ok;
123
377M
            set += 256/SRE_CODE_BITS;
124
377M
            break;
125
126
353M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
353M
            if (set[0] <= ch && ch <= set[1])
129
219M
                return ok;
130
133M
            set += 2;
131
133M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
302M
        case SRE_OP_NEGATE:
148
302M
            ok = !ok;
149
302M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.52G
        }
175
3.52G
    }
176
1.58G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
363M
{
94
    /* check if character is a member of the given set */
95
96
363M
    int ok = 1;
97
98
725M
    for (;;) {
99
725M
        switch (*set++) {
100
101
192M
        case SRE_OP_FAILURE:
102
192M
            return !ok;
103
104
207M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
207M
            if (ch == set[0])
107
4.83M
                return ok;
108
202M
            set++;
109
202M
            break;
110
111
30.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
30.9M
            if (sre_category(set[0], (int) ch))
114
17.8M
                return ok;
115
13.1M
            set++;
116
13.1M
            break;
117
118
90.9M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
90.9M
            if (ch < 256 &&
121
90.9M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
46.7M
                return ok;
123
44.1M
            set += 256/SRE_CODE_BITS;
124
44.1M
            break;
125
126
166M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
166M
            if (set[0] <= ch && ch <= set[1])
129
101M
                return ok;
130
64.7M
            set += 2;
131
64.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
37.8M
        case SRE_OP_NEGATE:
148
37.8M
            ok = !ok;
149
37.8M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
725M
        }
175
725M
    }
176
363M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
672M
{
94
    /* check if character is a member of the given set */
95
96
672M
    int ok = 1;
97
98
1.62G
    for (;;) {
99
1.62G
        switch (*set++) {
100
101
489M
        case SRE_OP_FAILURE:
102
489M
            return !ok;
103
104
623M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
623M
            if (ch == set[0])
107
1.60M
                return ok;
108
622M
            set++;
109
622M
            break;
110
111
16.3M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
16.3M
            if (sre_category(set[0], (int) ch))
114
8.89M
                return ok;
115
7.40M
            set++;
116
7.40M
            break;
117
118
210M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
210M
            if (ch < 256 &&
121
191M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
68.9M
                return ok;
123
141M
            set += 256/SRE_CODE_BITS;
124
141M
            break;
125
126
161M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
161M
            if (set[0] <= ch && ch <= set[1])
129
103M
                return ok;
130
57.6M
            set += 2;
131
57.6M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
120M
        case SRE_OP_NEGATE:
148
120M
            ok = !ok;
149
120M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.62G
        }
175
1.62G
    }
176
672M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
549M
{
94
    /* check if character is a member of the given set */
95
96
549M
    int ok = 1;
97
98
1.18G
    for (;;) {
99
1.18G
        switch (*set++) {
100
101
340M
        case SRE_OP_FAILURE:
102
340M
            return !ok;
103
104
283M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
283M
            if (ch == set[0])
107
823k
                return ok;
108
282M
            set++;
109
282M
            break;
110
111
17.8M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
17.8M
            if (sre_category(set[0], (int) ch))
114
15.0M
                return ok;
115
2.79M
            set++;
116
2.79M
            break;
117
118
369M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
369M
            if (ch < 256 &&
121
342M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
178M
                return ok;
123
191M
            set += 256/SRE_CODE_BITS;
124
191M
            break;
125
126
25.5M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
25.5M
            if (set[0] <= ch && ch <= set[1])
129
14.7M
                return ok;
130
10.7M
            set += 2;
131
10.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
144M
        case SRE_OP_NEGATE:
148
144M
            ok = !ok;
149
144M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.18G
        }
175
1.18G
    }
176
549M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
593M
{
195
593M
    SRE_CODE chr;
196
593M
    SRE_CHAR c;
197
593M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
593M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
593M
    Py_ssize_t i;
200
593M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
593M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
42.9M
        end = ptr + maxcount;
205
206
593M
    switch (pattern[0]) {
207
208
490M
    case SRE_OP_IN:
209
        /* repeated set */
210
490M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
884M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
393M
            ptr++;
213
490M
        break;
214
215
17.2M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
17.2M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
55.4M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
38.2M
            ptr++;
220
17.2M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
81.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
81.2M
        chr = pattern[1];
232
81.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
81.2M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
73.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
73.3M
        else
238
73.3M
#endif
239
87.2M
        while (ptr < end && *ptr == c)
240
5.98M
            ptr++;
241
81.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
4.28M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
4.28M
        chr = pattern[1];
270
4.28M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
4.28M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
2.06M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.06M
        else
276
2.06M
#endif
277
39.8M
        while (ptr < end && *ptr != c)
278
35.5M
            ptr++;
279
4.28M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
593M
    }
319
320
593M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
593M
           ptr - (SRE_CHAR*) state->ptr));
322
593M
    return ptr - (SRE_CHAR*) state->ptr;
323
593M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
171M
{
195
171M
    SRE_CODE chr;
196
171M
    SRE_CHAR c;
197
171M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
171M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
171M
    Py_ssize_t i;
200
171M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
171M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
11.9M
        end = ptr + maxcount;
205
206
171M
    switch (pattern[0]) {
207
208
102M
    case SRE_OP_IN:
209
        /* repeated set */
210
102M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
219M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
116M
            ptr++;
213
102M
        break;
214
215
7.63M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
7.63M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
19.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
11.9M
            ptr++;
220
7.63M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
60.2M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
60.2M
        chr = pattern[1];
232
60.2M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
60.2M
        c = (SRE_CHAR) chr;
234
60.2M
#if SIZEOF_SRE_CHAR < 4
235
60.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
60.2M
        else
238
60.2M
#endif
239
62.2M
        while (ptr < end && *ptr == c)
240
1.99M
            ptr++;
241
60.2M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
647k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
647k
        chr = pattern[1];
270
647k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
647k
        c = (SRE_CHAR) chr;
272
647k
#if SIZEOF_SRE_CHAR < 4
273
647k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
647k
        else
276
647k
#endif
277
8.73M
        while (ptr < end && *ptr != c)
278
8.09M
            ptr++;
279
647k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
171M
    }
319
320
171M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
171M
           ptr - (SRE_CHAR*) state->ptr));
322
171M
    return ptr - (SRE_CHAR*) state->ptr;
323
171M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
242M
{
195
242M
    SRE_CODE chr;
196
242M
    SRE_CHAR c;
197
242M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
242M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
242M
    Py_ssize_t i;
200
242M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
242M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
16.9M
        end = ptr + maxcount;
205
206
242M
    switch (pattern[0]) {
207
208
220M
    case SRE_OP_IN:
209
        /* repeated set */
210
220M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
351M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
130M
            ptr++;
213
220M
        break;
214
215
6.95M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
6.95M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
25.6M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
18.6M
            ptr++;
220
6.95M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
13.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
13.0M
        chr = pattern[1];
232
13.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
13.0M
        c = (SRE_CHAR) chr;
234
13.0M
#if SIZEOF_SRE_CHAR < 4
235
13.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
13.0M
        else
238
13.0M
#endif
239
16.3M
        while (ptr < end && *ptr == c)
240
3.33M
            ptr++;
241
13.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.42M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.42M
        chr = pattern[1];
270
1.42M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.42M
        c = (SRE_CHAR) chr;
272
1.42M
#if SIZEOF_SRE_CHAR < 4
273
1.42M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.42M
        else
276
1.42M
#endif
277
13.2M
        while (ptr < end && *ptr != c)
278
11.8M
            ptr++;
279
1.42M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
242M
    }
319
320
242M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
242M
           ptr - (SRE_CHAR*) state->ptr));
322
242M
    return ptr - (SRE_CHAR*) state->ptr;
323
242M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
179M
{
195
179M
    SRE_CODE chr;
196
179M
    SRE_CHAR c;
197
179M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
179M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
179M
    Py_ssize_t i;
200
179M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
179M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
14.1M
        end = ptr + maxcount;
205
206
179M
    switch (pattern[0]) {
207
208
167M
    case SRE_OP_IN:
209
        /* repeated set */
210
167M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
313M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
146M
            ptr++;
213
167M
        break;
214
215
2.63M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
2.63M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
10.2M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
7.57M
            ptr++;
220
2.63M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
7.91M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
7.91M
        chr = pattern[1];
232
7.91M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
7.91M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
8.56M
        while (ptr < end && *ptr == c)
240
655k
            ptr++;
241
7.91M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.21M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.21M
        chr = pattern[1];
270
2.21M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.21M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
17.8M
        while (ptr < end && *ptr != c)
278
15.6M
            ptr++;
279
2.21M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
179M
    }
319
320
179M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
179M
           ptr - (SRE_CHAR*) state->ptr));
322
179M
    return ptr - (SRE_CHAR*) state->ptr;
323
179M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
617M
    do { \
355
617M
        ctx->lastmark = state->lastmark; \
356
617M
        ctx->lastindex = state->lastindex; \
357
617M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
288M
    do { \
360
288M
        state->lastmark = ctx->lastmark; \
361
288M
        state->lastindex = ctx->lastindex; \
362
288M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
218M
    do { \
366
218M
        TRACE(("push last_ptr: %zd", \
367
218M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
218M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
218M
    } while (0)
370
#define LAST_PTR_POP()  \
371
218M
    do { \
372
218M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
218M
        TRACE(("pop last_ptr: %zd", \
374
218M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
218M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
722M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
728M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.22G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
124M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
55.5M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.45G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.45G
do { \
390
1.45G
    alloc_pos = state->data_stack_base; \
391
1.45G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.45G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.45G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
168M
        int j = data_stack_grow(state, sizeof(type)); \
395
168M
        if (j < 0) return j; \
396
168M
        if (ctx_pos != -1) \
397
168M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
168M
    } \
399
1.45G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.45G
    state->data_stack_base += sizeof(type); \
401
1.45G
} while (0)
402
403
1.55G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.55G
do { \
405
1.55G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.55G
    ptr = (type*)(state->data_stack+pos); \
407
1.55G
} while (0)
408
409
540M
#define DATA_STACK_PUSH(state, data, size) \
410
540M
do { \
411
540M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
540M
           data, state->data_stack_base, size)); \
413
540M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
82.4k
        int j = data_stack_grow(state, size); \
415
82.4k
        if (j < 0) return j; \
416
82.4k
        if (ctx_pos != -1) \
417
82.4k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
82.4k
    } \
419
540M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
540M
    state->data_stack_base += size; \
421
540M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
330M
#define DATA_STACK_POP(state, data, size, discard) \
427
330M
do { \
428
330M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
330M
           data, state->data_stack_base-size, size)); \
430
330M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
330M
    if (discard) \
432
330M
        state->data_stack_base -= size; \
433
330M
} while (0)
434
435
1.66G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.66G
do { \
437
1.66G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.66G
           state->data_stack_base-size, size)); \
439
1.66G
    state->data_stack_base -= size; \
440
1.66G
} while(0)
441
442
#define DATA_PUSH(x) \
443
218M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
218M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.45G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.45G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.55G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
478M
    do if (lastmark >= 0) { \
473
321M
        MARK_TRACE("push", (lastmark)); \
474
321M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
321M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
478M
    } while (0)
477
#define MARK_POP(lastmark) \
478
137M
    do if (lastmark >= 0) { \
479
110M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
110M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
110M
        MARK_TRACE("pop", (lastmark)); \
482
137M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.94M
    do if (lastmark >= 0) { \
485
1.88M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.88M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.88M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.94M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
340M
    do if (lastmark >= 0) { \
491
211M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
211M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
211M
        MARK_TRACE("pop discard", (lastmark)); \
494
340M
    } while (0)
495
496
491M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
218M
#define JUMP_MAX_UNTIL_2     2
499
124M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
123M
#define JUMP_REPEAT          7
504
13.9M
#define JUMP_REPEAT_ONE_1    8
505
207M
#define JUMP_REPEAT_ONE_2    9
506
17.8M
#define JUMP_MIN_REPEAT_ONE  10
507
169M
#define JUMP_BRANCH          11
508
55.5M
#define JUMP_ASSERT          12
509
28.4M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
958M
    ctx->pattern = pattern; \
516
958M
    ctx->ptr = ptr; \
517
958M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
958M
    nextctx->pattern = nextpattern; \
519
958M
    nextctx->toplevel = toplevel_; \
520
958M
    nextctx->jump = jumpvalue; \
521
958M
    nextctx->last_ctx_pos = ctx_pos; \
522
958M
    pattern = nextpattern; \
523
958M
    ctx_pos = alloc_pos; \
524
958M
    ctx = nextctx; \
525
958M
    goto entrance; \
526
958M
    jumplabel: \
527
958M
    pattern = ctx->pattern; \
528
958M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
875M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
83.9M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.50G
    do {                                                           \
553
2.50G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.50G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.50G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.59G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.50G
        do {                               \
588
2.50G
            MAYBE_CHECK_SIGNALS;           \
589
2.50G
            goto *sre_targets[*pattern++]; \
590
2.50G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
491M
{
601
491M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
491M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
491M
    Py_ssize_t ret = 0;
604
491M
    int jump;
605
491M
    unsigned int sigcount = state->sigcount;
606
607
491M
    SRE(match_context)* ctx;
608
491M
    SRE(match_context)* nextctx;
609
491M
    INIT_TRACE(state);
610
611
491M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
491M
    DATA_ALLOC(SRE(match_context), ctx);
614
491M
    ctx->last_ctx_pos = -1;
615
491M
    ctx->jump = JUMP_NONE;
616
491M
    ctx->toplevel = toplevel;
617
491M
    ctx_pos = alloc_pos;
618
619
491M
#if USE_COMPUTED_GOTOS
620
491M
#include "sre_targets.h"
621
491M
#endif
622
623
1.45G
entrance:
624
625
1.45G
    ;  // Fashion statement.
626
1.45G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.45G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
77.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.60M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.60M
                   end - ptr, (size_t) pattern[3]));
634
5.60M
            RETURN_FAILURE;
635
5.60M
        }
636
72.1M
        pattern += pattern[1] + 1;
637
72.1M
    }
638
639
1.44G
#if USE_COMPUTED_GOTOS
640
1.44G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.44G
    {
647
648
1.44G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
552M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
552M
                   ptr, pattern[0]));
653
552M
            {
654
552M
                int i = pattern[0];
655
552M
                if (i & 1)
656
93.8M
                    state->lastindex = i/2 + 1;
657
552M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
544M
                    int j = state->lastmark + 1;
663
560M
                    while (j < i)
664
15.1M
                        state->mark[j++] = NULL;
665
544M
                    state->lastmark = i;
666
544M
                }
667
552M
                state->mark[i] = ptr;
668
552M
            }
669
552M
            pattern++;
670
552M
            DISPATCH;
671
672
552M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
150M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
150M
                   ptr, *pattern));
677
150M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
61.9M
                RETURN_FAILURE;
679
88.6M
            pattern++;
680
88.6M
            ptr++;
681
88.6M
            DISPATCH;
682
683
88.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
193M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
193M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
193M
            if (ctx->toplevel &&
698
59.2M
                ((state->match_all && ptr != state->end) ||
699
59.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
193M
            state->ptr = ptr;
704
193M
            RETURN_SUCCESS;
705
706
62.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
62.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
62.4M
            if (!SRE(at)(state, ptr, *pattern))
711
44.0M
                RETURN_FAILURE;
712
18.3M
            pattern++;
713
18.3M
            DISPATCH;
714
715
18.3M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
283M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
283M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
283M
            if (ptr >= end ||
749
282M
                !SRE(charset)(state, pattern + 1, *ptr))
750
54.3M
                RETURN_FAILURE;
751
229M
            pattern += pattern[0];
752
229M
            ptr++;
753
229M
            DISPATCH;
754
755
229M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
8.27M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
8.27M
                   pattern, ptr, pattern[0]));
758
8.27M
            if (ptr >= end ||
759
8.27M
                sre_lower_ascii(*ptr) != *pattern)
760
151k
                RETURN_FAILURE;
761
8.12M
            pattern++;
762
8.12M
            ptr++;
763
8.12M
            DISPATCH;
764
765
8.12M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
88.8M
        TARGET(SRE_OP_JUMP):
845
88.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
88.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
88.8M
                   ptr, pattern[0]));
850
88.8M
            pattern += pattern[0];
851
88.8M
            DISPATCH;
852
853
142M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
142M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
142M
            LASTMARK_SAVE();
858
142M
            if (state->repeat)
859
88.7M
                MARK_PUSH(ctx->lastmark);
860
344M
            for (; pattern[0]; pattern += pattern[0]) {
861
288M
                if (pattern[1] == SRE_OP_LITERAL &&
862
131M
                    (ptr >= end ||
863
131M
                     (SRE_CODE) *ptr != pattern[2]))
864
68.1M
                    continue;
865
220M
                if (pattern[1] == SRE_OP_IN &&
866
82.1M
                    (ptr >= end ||
867
82.1M
                     !SRE(charset)(state, pattern + 3,
868
82.1M
                                   (SRE_CODE) *ptr)))
869
50.9M
                    continue;
870
169M
                state->ptr = ptr;
871
169M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
169M
                if (ret) {
873
86.4M
                    if (state->repeat)
874
63.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
86.4M
                    RETURN_ON_ERROR(ret);
876
86.4M
                    RETURN_SUCCESS;
877
86.4M
                }
878
82.7M
                if (state->repeat)
879
25.4k
                    MARK_POP_KEEP(ctx->lastmark);
880
82.7M
                LASTMARK_RESTORE();
881
82.7M
            }
882
56.1M
            if (state->repeat)
883
25.0M
                MARK_POP_DISCARD(ctx->lastmark);
884
56.1M
            RETURN_FAILURE;
885
886
580M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
580M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
580M
                   pattern[1], pattern[2]));
898
899
580M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.27M
                RETURN_FAILURE; /* cannot match */
901
902
579M
            state->ptr = ptr;
903
904
579M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
579M
            RETURN_ON_ERROR(ret);
906
579M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
579M
            ctx->count = ret;
908
579M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
579M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
355M
                RETURN_FAILURE;
917
918
223M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
6.79M
                ptr == state->end &&
920
73.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
73.8k
            {
922
                /* tail is empty.  we're finished */
923
73.8k
                state->ptr = ptr;
924
73.8k
                RETURN_SUCCESS;
925
73.8k
            }
926
927
223M
            LASTMARK_SAVE();
928
223M
            if (state->repeat)
929
142M
                MARK_PUSH(ctx->lastmark);
930
931
223M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
54.4M
                ctx->u.chr = pattern[pattern[0]+1];
935
54.4M
                for (;;) {
936
132M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
92.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
78.1M
                        ptr--;
939
78.1M
                        ctx->count--;
940
78.1M
                    }
941
54.4M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
40.5M
                        break;
943
13.9M
                    state->ptr = ptr;
944
13.9M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
13.9M
                            pattern+pattern[0]);
946
13.9M
                    if (ret) {
947
13.9M
                        if (state->repeat)
948
12.5M
                            MARK_POP_DISCARD(ctx->lastmark);
949
13.9M
                        RETURN_ON_ERROR(ret);
950
13.9M
                        RETURN_SUCCESS;
951
13.9M
                    }
952
657
                    if (state->repeat)
953
657
                        MARK_POP_KEEP(ctx->lastmark);
954
657
                    LASTMARK_RESTORE();
955
956
657
                    ptr--;
957
657
                    ctx->count--;
958
657
                }
959
40.5M
                if (state->repeat)
960
39.2M
                    MARK_POP_DISCARD(ctx->lastmark);
961
169M
            } else {
962
                /* general case */
963
222M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
207M
                    state->ptr = ptr;
965
207M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
207M
                            pattern+pattern[0]);
967
207M
                    if (ret) {
968
153M
                        if (state->repeat)
969
89.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
153M
                        RETURN_ON_ERROR(ret);
971
153M
                        RETURN_SUCCESS;
972
153M
                    }
973
53.6M
                    if (state->repeat)
974
1.91M
                        MARK_POP_KEEP(ctx->lastmark);
975
53.6M
                    LASTMARK_RESTORE();
976
977
53.6M
                    ptr--;
978
53.6M
                    ctx->count--;
979
53.6M
                }
980
15.3M
                if (state->repeat)
981
1.05M
                    MARK_POP_DISCARD(ctx->lastmark);
982
15.3M
            }
983
55.8M
            RETURN_FAILURE;
984
985
3.84M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.84M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.84M
                   pattern[1], pattern[2]));
997
998
3.84M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.84M
            state->ptr = ptr;
1002
1003
3.84M
            if (pattern[1] == 0)
1004
3.84M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.84M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.84M
            } else {
1028
                /* general case */
1029
3.84M
                LASTMARK_SAVE();
1030
3.84M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
17.8M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
17.8M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
17.8M
                    state->ptr = ptr;
1036
17.8M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
17.8M
                            pattern+pattern[0]);
1038
17.8M
                    if (ret) {
1039
3.84M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.84M
                        RETURN_ON_ERROR(ret);
1042
3.84M
                        RETURN_SUCCESS;
1043
3.84M
                    }
1044
14.0M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
14.0M
                    LASTMARK_RESTORE();
1047
1048
14.0M
                    state->ptr = ptr;
1049
14.0M
                    ret = SRE(count)(state, pattern+3, 1);
1050
14.0M
                    RETURN_ON_ERROR(ret);
1051
14.0M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
14.0M
                    if (ret == 0)
1053
0
                        break;
1054
14.0M
                    assert(ret == 1);
1055
14.0M
                    ptr++;
1056
14.0M
                    ctx->count++;
1057
14.0M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
123M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
123M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
123M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
123M
            ctx->u.rep = repeat_pool_malloc(state);
1127
123M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
123M
            ctx->u.rep->count = -1;
1131
123M
            ctx->u.rep->pattern = pattern;
1132
123M
            ctx->u.rep->prev = state->repeat;
1133
123M
            ctx->u.rep->last_ptr = NULL;
1134
123M
            state->repeat = ctx->u.rep;
1135
1136
123M
            state->ptr = ptr;
1137
123M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
123M
            state->repeat = ctx->u.rep->prev;
1139
123M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
123M
            if (ret) {
1142
83.8M
                RETURN_ON_ERROR(ret);
1143
83.8M
                RETURN_SUCCESS;
1144
83.8M
            }
1145
39.7M
            RETURN_FAILURE;
1146
1147
233M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
233M
            ctx->u.rep = state->repeat;
1155
233M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
233M
            state->ptr = ptr;
1159
1160
233M
            ctx->count = ctx->u.rep->count+1;
1161
1162
233M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
233M
                   ptr, ctx->count));
1164
1165
233M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
233M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
218M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
218M
                ctx->u.rep->count = ctx->count;
1185
218M
                LASTMARK_SAVE();
1186
218M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
218M
                LAST_PTR_PUSH();
1189
218M
                ctx->u.rep->last_ptr = state->ptr;
1190
218M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
218M
                        ctx->u.rep->pattern+3);
1192
218M
                LAST_PTR_POP();
1193
218M
                if (ret) {
1194
109M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
109M
                    RETURN_ON_ERROR(ret);
1196
109M
                    RETURN_SUCCESS;
1197
109M
                }
1198
109M
                MARK_POP(ctx->lastmark);
1199
109M
                LASTMARK_RESTORE();
1200
109M
                ctx->u.rep->count = ctx->count-1;
1201
109M
                state->ptr = ptr;
1202
109M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
124M
            state->repeat = ctx->u.rep->prev;
1207
124M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
124M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
124M
            RETURN_ON_SUCCESS(ret);
1211
40.6M
            state->ptr = ptr;
1212
40.6M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
55.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
55.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
55.5M
                   ptr, pattern[1]));
1565
55.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
55.5M
            state->ptr = ptr - pattern[1];
1568
55.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
55.5M
            RETURN_ON_FAILURE(ret);
1570
48.5M
            pattern += pattern[0];
1571
48.5M
            DISPATCH;
1572
1573
48.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
28.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
28.4M
                   ptr, pattern[1]));
1578
28.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
28.4M
                state->ptr = ptr - pattern[1];
1580
28.4M
                LASTMARK_SAVE();
1581
28.4M
                if (state->repeat)
1582
28.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
56.8M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
56.8M
                if (ret) {
1586
19.0k
                    if (state->repeat)
1587
19.0k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
19.0k
                    RETURN_ON_ERROR(ret);
1589
19.0k
                    RETURN_FAILURE;
1590
19.0k
                }
1591
28.4M
                if (state->repeat)
1592
28.4M
                    MARK_POP(ctx->lastmark);
1593
28.4M
                LASTMARK_RESTORE();
1594
28.4M
            }
1595
28.4M
            pattern += pattern[0];
1596
28.4M
            DISPATCH;
1597
1598
28.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.45G
exit:
1620
1.45G
    ctx_pos = ctx->last_ctx_pos;
1621
1.45G
    jump = ctx->jump;
1622
1.45G
    DATA_POP_DISCARD(ctx);
1623
1.45G
    if (ctx_pos == -1) {
1624
491M
        state->sigcount = sigcount;
1625
491M
        return ret;
1626
491M
    }
1627
958M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
958M
    switch (jump) {
1630
218M
        case JUMP_MAX_UNTIL_2:
1631
218M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
218M
            goto jump_max_until_2;
1633
124M
        case JUMP_MAX_UNTIL_3:
1634
124M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
124M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
169M
        case JUMP_BRANCH:
1643
169M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
169M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
123M
        case JUMP_REPEAT:
1658
123M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
123M
            goto jump_repeat;
1660
13.9M
        case JUMP_REPEAT_ONE_1:
1661
13.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
13.9M
            goto jump_repeat_one_1;
1663
207M
        case JUMP_REPEAT_ONE_2:
1664
207M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
207M
            goto jump_repeat_one_2;
1666
17.8M
        case JUMP_MIN_REPEAT_ONE:
1667
17.8M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
17.8M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
55.5M
        case JUMP_ASSERT:
1673
55.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
55.5M
            goto jump_assert;
1675
28.4M
        case JUMP_ASSERT_NOT:
1676
28.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
28.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
958M
    }
1683
1684
0
    return ret; /* should never get here */
1685
958M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
160M
{
601
160M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
160M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
160M
    Py_ssize_t ret = 0;
604
160M
    int jump;
605
160M
    unsigned int sigcount = state->sigcount;
606
607
160M
    SRE(match_context)* ctx;
608
160M
    SRE(match_context)* nextctx;
609
160M
    INIT_TRACE(state);
610
611
160M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
160M
    DATA_ALLOC(SRE(match_context), ctx);
614
160M
    ctx->last_ctx_pos = -1;
615
160M
    ctx->jump = JUMP_NONE;
616
160M
    ctx->toplevel = toplevel;
617
160M
    ctx_pos = alloc_pos;
618
619
160M
#if USE_COMPUTED_GOTOS
620
160M
#include "sre_targets.h"
621
160M
#endif
622
623
406M
entrance:
624
625
406M
    ;  // Fashion statement.
626
406M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
406M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
36.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.42M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.42M
                   end - ptr, (size_t) pattern[3]));
634
5.42M
            RETURN_FAILURE;
635
5.42M
        }
636
31.2M
        pattern += pattern[1] + 1;
637
31.2M
    }
638
639
401M
#if USE_COMPUTED_GOTOS
640
401M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
401M
    {
647
648
401M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
167M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
167M
                   ptr, pattern[0]));
653
167M
            {
654
167M
                int i = pattern[0];
655
167M
                if (i & 1)
656
36.4M
                    state->lastindex = i/2 + 1;
657
167M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
163M
                    int j = state->lastmark + 1;
663
174M
                    while (j < i)
664
10.5M
                        state->mark[j++] = NULL;
665
163M
                    state->lastmark = i;
666
163M
                }
667
167M
                state->mark[i] = ptr;
668
167M
            }
669
167M
            pattern++;
670
167M
            DISPATCH;
671
672
167M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
76.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
76.2M
                   ptr, *pattern));
677
76.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
31.8M
                RETURN_FAILURE;
679
44.3M
            pattern++;
680
44.3M
            ptr++;
681
44.3M
            DISPATCH;
682
683
44.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
59.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
59.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
59.9M
            if (ctx->toplevel &&
698
24.0M
                ((state->match_all && ptr != state->end) ||
699
24.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
59.9M
            state->ptr = ptr;
704
59.9M
            RETURN_SUCCESS;
705
706
27.7M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
27.7M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
27.7M
            if (!SRE(at)(state, ptr, *pattern))
711
10.6M
                RETURN_FAILURE;
712
17.0M
            pattern++;
713
17.0M
            DISPATCH;
714
715
17.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
56.1M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
56.1M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
56.1M
            if (ptr >= end ||
749
55.5M
                !SRE(charset)(state, pattern + 1, *ptr))
750
9.50M
                RETURN_FAILURE;
751
46.6M
            pattern += pattern[0];
752
46.6M
            ptr++;
753
46.6M
            DISPATCH;
754
755
46.6M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
710k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
710k
                   pattern, ptr, pattern[0]));
758
710k
            if (ptr >= end ||
759
710k
                sre_lower_ascii(*ptr) != *pattern)
760
90.2k
                RETURN_FAILURE;
761
620k
            pattern++;
762
620k
            ptr++;
763
620k
            DISPATCH;
764
765
620k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
30.5M
        TARGET(SRE_OP_JUMP):
845
30.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
30.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
30.5M
                   ptr, pattern[0]));
850
30.5M
            pattern += pattern[0];
851
30.5M
            DISPATCH;
852
853
57.5M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
57.5M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
57.5M
            LASTMARK_SAVE();
858
57.5M
            if (state->repeat)
859
11.4M
                MARK_PUSH(ctx->lastmark);
860
164M
            for (; pattern[0]; pattern += pattern[0]) {
861
136M
                if (pattern[1] == SRE_OP_LITERAL &&
862
61.7M
                    (ptr >= end ||
863
61.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.4M
                    continue;
865
112M
                if (pattern[1] == SRE_OP_IN &&
866
12.0M
                    (ptr >= end ||
867
12.0M
                     !SRE(charset)(state, pattern + 3,
868
12.0M
                                   (SRE_CODE) *ptr)))
869
6.45M
                    continue;
870
105M
                state->ptr = ptr;
871
105M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
105M
                if (ret) {
873
29.0M
                    if (state->repeat)
874
11.2M
                        MARK_POP_DISCARD(ctx->lastmark);
875
29.0M
                    RETURN_ON_ERROR(ret);
876
29.0M
                    RETURN_SUCCESS;
877
29.0M
                }
878
76.5M
                if (state->repeat)
879
7.24k
                    MARK_POP_KEEP(ctx->lastmark);
880
76.5M
                LASTMARK_RESTORE();
881
76.5M
            }
882
28.4M
            if (state->repeat)
883
278k
                MARK_POP_DISCARD(ctx->lastmark);
884
28.4M
            RETURN_FAILURE;
885
886
167M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
167M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
167M
                   pattern[1], pattern[2]));
898
899
167M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.04M
                RETURN_FAILURE; /* cannot match */
901
902
166M
            state->ptr = ptr;
903
904
166M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
166M
            RETURN_ON_ERROR(ret);
906
166M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
166M
            ctx->count = ret;
908
166M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
166M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
119M
                RETURN_FAILURE;
917
918
47.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
612k
                ptr == state->end &&
920
50.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
50.9k
            {
922
                /* tail is empty.  we're finished */
923
50.9k
                state->ptr = ptr;
924
50.9k
                RETURN_SUCCESS;
925
50.9k
            }
926
927
47.3M
            LASTMARK_SAVE();
928
47.3M
            if (state->repeat)
929
24.9M
                MARK_PUSH(ctx->lastmark);
930
931
47.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.88M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.88M
                for (;;) {
936
15.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
12.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.0M
                        ptr--;
939
10.0M
                        ctx->count--;
940
10.0M
                    }
941
5.88M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
2.98M
                        break;
943
2.90M
                    state->ptr = ptr;
944
2.90M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.90M
                            pattern+pattern[0]);
946
2.90M
                    if (ret) {
947
2.90M
                        if (state->repeat)
948
1.50M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.90M
                        RETURN_ON_ERROR(ret);
950
2.90M
                        RETURN_SUCCESS;
951
2.90M
                    }
952
161
                    if (state->repeat)
953
161
                        MARK_POP_KEEP(ctx->lastmark);
954
161
                    LASTMARK_RESTORE();
955
956
161
                    ptr--;
957
161
                    ctx->count--;
958
161
                }
959
2.98M
                if (state->repeat)
960
1.66M
                    MARK_POP_DISCARD(ctx->lastmark);
961
41.4M
            } else {
962
                /* general case */
963
54.8M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
49.2M
                    state->ptr = ptr;
965
49.2M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
49.2M
                            pattern+pattern[0]);
967
49.2M
                    if (ret) {
968
35.8M
                        if (state->repeat)
969
21.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
35.8M
                        RETURN_ON_ERROR(ret);
971
35.8M
                        RETURN_SUCCESS;
972
35.8M
                    }
973
13.3M
                    if (state->repeat)
974
1.20M
                        MARK_POP_KEEP(ctx->lastmark);
975
13.3M
                    LASTMARK_RESTORE();
976
977
13.3M
                    ptr--;
978
13.3M
                    ctx->count--;
979
13.3M
                }
980
5.62M
                if (state->repeat)
981
670k
                    MARK_POP_DISCARD(ctx->lastmark);
982
5.62M
            }
983
8.60M
            RETURN_FAILURE;
984
985
3.29M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.29M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.29M
                   pattern[1], pattern[2]));
997
998
3.29M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.29M
            state->ptr = ptr;
1002
1003
3.29M
            if (pattern[1] == 0)
1004
3.29M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.29M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.29M
            } else {
1028
                /* general case */
1029
3.29M
                LASTMARK_SAVE();
1030
3.29M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
8.01M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
8.01M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
8.01M
                    state->ptr = ptr;
1036
8.01M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
8.01M
                            pattern+pattern[0]);
1038
8.01M
                    if (ret) {
1039
3.29M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.29M
                        RETURN_ON_ERROR(ret);
1042
3.29M
                        RETURN_SUCCESS;
1043
3.29M
                    }
1044
4.71M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
4.71M
                    LASTMARK_RESTORE();
1047
1048
4.71M
                    state->ptr = ptr;
1049
4.71M
                    ret = SRE(count)(state, pattern+3, 1);
1050
4.71M
                    RETURN_ON_ERROR(ret);
1051
4.71M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
4.71M
                    if (ret == 0)
1053
0
                        break;
1054
4.71M
                    assert(ret == 1);
1055
4.71M
                    ptr++;
1056
4.71M
                    ctx->count++;
1057
4.71M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
18.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
18.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
18.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
18.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
18.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
18.3M
            ctx->u.rep->count = -1;
1131
18.3M
            ctx->u.rep->pattern = pattern;
1132
18.3M
            ctx->u.rep->prev = state->repeat;
1133
18.3M
            ctx->u.rep->last_ptr = NULL;
1134
18.3M
            state->repeat = ctx->u.rep;
1135
1136
18.3M
            state->ptr = ptr;
1137
18.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
18.3M
            state->repeat = ctx->u.rep->prev;
1139
18.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
18.3M
            if (ret) {
1142
9.75M
                RETURN_ON_ERROR(ret);
1143
9.75M
                RETURN_SUCCESS;
1144
9.75M
            }
1145
8.58M
            RETURN_FAILURE;
1146
1147
42.2M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
42.2M
            ctx->u.rep = state->repeat;
1155
42.2M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
42.2M
            state->ptr = ptr;
1159
1160
42.2M
            ctx->count = ctx->u.rep->count+1;
1161
1162
42.2M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
42.2M
                   ptr, ctx->count));
1164
1165
42.2M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
42.2M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.11M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
35.1M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
35.1M
                ctx->u.rep->count = ctx->count;
1185
35.1M
                LASTMARK_SAVE();
1186
35.1M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
35.1M
                LAST_PTR_PUSH();
1189
35.1M
                ctx->u.rep->last_ptr = state->ptr;
1190
35.1M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
35.1M
                        ctx->u.rep->pattern+3);
1192
35.1M
                LAST_PTR_POP();
1193
35.1M
                if (ret) {
1194
23.2M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
23.2M
                    RETURN_ON_ERROR(ret);
1196
23.2M
                    RETURN_SUCCESS;
1197
23.2M
                }
1198
11.8M
                MARK_POP(ctx->lastmark);
1199
11.8M
                LASTMARK_RESTORE();
1200
11.8M
                ctx->u.rep->count = ctx->count-1;
1201
11.8M
                state->ptr = ptr;
1202
11.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
18.9M
            state->repeat = ctx->u.rep->prev;
1207
18.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
18.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
18.9M
            RETURN_ON_SUCCESS(ret);
1211
9.17M
            state->ptr = ptr;
1212
9.17M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.69M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.69M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.69M
                   ptr, pattern[1]));
1565
2.69M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.69M
            state->ptr = ptr - pattern[1];
1568
2.69M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.69M
            RETURN_ON_FAILURE(ret);
1570
2.55M
            pattern += pattern[0];
1571
2.55M
            DISPATCH;
1572
1573
5.10M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.10M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.10M
                   ptr, pattern[1]));
1578
5.10M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.10M
                state->ptr = ptr - pattern[1];
1580
5.10M
                LASTMARK_SAVE();
1581
5.10M
                if (state->repeat)
1582
5.10M
                    MARK_PUSH(ctx->lastmark);
1583
1584
10.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
10.2M
                if (ret) {
1586
1.40k
                    if (state->repeat)
1587
1.40k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.40k
                    RETURN_ON_ERROR(ret);
1589
1.40k
                    RETURN_FAILURE;
1590
1.40k
                }
1591
5.10M
                if (state->repeat)
1592
5.10M
                    MARK_POP(ctx->lastmark);
1593
5.10M
                LASTMARK_RESTORE();
1594
5.10M
            }
1595
5.10M
            pattern += pattern[0];
1596
5.10M
            DISPATCH;
1597
1598
5.10M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
406M
exit:
1620
406M
    ctx_pos = ctx->last_ctx_pos;
1621
406M
    jump = ctx->jump;
1622
406M
    DATA_POP_DISCARD(ctx);
1623
406M
    if (ctx_pos == -1) {
1624
160M
        state->sigcount = sigcount;
1625
160M
        return ret;
1626
160M
    }
1627
245M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
245M
    switch (jump) {
1630
35.1M
        case JUMP_MAX_UNTIL_2:
1631
35.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
35.1M
            goto jump_max_until_2;
1633
18.9M
        case JUMP_MAX_UNTIL_3:
1634
18.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
18.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
105M
        case JUMP_BRANCH:
1643
105M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
105M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
18.3M
        case JUMP_REPEAT:
1658
18.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
18.3M
            goto jump_repeat;
1660
2.90M
        case JUMP_REPEAT_ONE_1:
1661
2.90M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.90M
            goto jump_repeat_one_1;
1663
49.2M
        case JUMP_REPEAT_ONE_2:
1664
49.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
49.2M
            goto jump_repeat_one_2;
1666
8.01M
        case JUMP_MIN_REPEAT_ONE:
1667
8.01M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
8.01M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.69M
        case JUMP_ASSERT:
1673
2.69M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.69M
            goto jump_assert;
1675
5.10M
        case JUMP_ASSERT_NOT:
1676
5.10M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.10M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
245M
    }
1683
1684
0
    return ret; /* should never get here */
1685
245M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
241M
{
601
241M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
241M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
241M
    Py_ssize_t ret = 0;
604
241M
    int jump;
605
241M
    unsigned int sigcount = state->sigcount;
606
607
241M
    SRE(match_context)* ctx;
608
241M
    SRE(match_context)* nextctx;
609
241M
    INIT_TRACE(state);
610
611
241M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
241M
    DATA_ALLOC(SRE(match_context), ctx);
614
241M
    ctx->last_ctx_pos = -1;
615
241M
    ctx->jump = JUMP_NONE;
616
241M
    ctx->toplevel = toplevel;
617
241M
    ctx_pos = alloc_pos;
618
619
241M
#if USE_COMPUTED_GOTOS
620
241M
#include "sre_targets.h"
621
241M
#endif
622
623
494M
entrance:
624
625
494M
    ;  // Fashion statement.
626
494M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
494M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
21.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
180k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
180k
                   end - ptr, (size_t) pattern[3]));
634
180k
            RETURN_FAILURE;
635
180k
        }
636
20.8M
        pattern += pattern[1] + 1;
637
20.8M
    }
638
639
494M
#if USE_COMPUTED_GOTOS
640
494M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
494M
    {
647
648
494M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
233M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
233M
                   ptr, pattern[0]));
653
233M
            {
654
233M
                int i = pattern[0];
655
233M
                if (i & 1)
656
25.8M
                    state->lastindex = i/2 + 1;
657
233M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
232M
                    int j = state->lastmark + 1;
663
234M
                    while (j < i)
664
2.24M
                        state->mark[j++] = NULL;
665
232M
                    state->lastmark = i;
666
232M
                }
667
233M
                state->mark[i] = ptr;
668
233M
            }
669
233M
            pattern++;
670
233M
            DISPATCH;
671
672
233M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
36.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
36.8M
                   ptr, *pattern));
677
36.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
15.8M
                RETURN_FAILURE;
679
21.0M
            pattern++;
680
21.0M
            ptr++;
681
21.0M
            DISPATCH;
682
683
21.0M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
71.0M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
71.0M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
71.0M
            if (ctx->toplevel &&
698
15.9M
                ((state->match_all && ptr != state->end) ||
699
15.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
71.0M
            state->ptr = ptr;
704
71.0M
            RETURN_SUCCESS;
705
706
16.8M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
16.8M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
16.8M
            if (!SRE(at)(state, ptr, *pattern))
711
15.6M
                RETURN_FAILURE;
712
1.18M
            pattern++;
713
1.18M
            DISPATCH;
714
715
1.18M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
107M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
107M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
107M
            if (ptr >= end ||
749
107M
                !SRE(charset)(state, pattern + 1, *ptr))
750
23.4M
                RETURN_FAILURE;
751
84.0M
            pattern += pattern[0];
752
84.0M
            ptr++;
753
84.0M
            DISPATCH;
754
755
84.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.28M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.28M
                   pattern, ptr, pattern[0]));
758
5.28M
            if (ptr >= end ||
759
5.28M
                sre_lower_ascii(*ptr) != *pattern)
760
40.5k
                RETURN_FAILURE;
761
5.24M
            pattern++;
762
5.24M
            ptr++;
763
5.24M
            DISPATCH;
764
765
5.24M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
19.9M
        TARGET(SRE_OP_JUMP):
845
19.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
19.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
19.9M
                   ptr, pattern[0]));
850
19.9M
            pattern += pattern[0];
851
19.9M
            DISPATCH;
852
853
28.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
28.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
28.8M
            LASTMARK_SAVE();
858
28.8M
            if (state->repeat)
859
25.2M
                MARK_PUSH(ctx->lastmark);
860
60.6M
            for (; pattern[0]; pattern += pattern[0]) {
861
51.3M
                if (pattern[1] == SRE_OP_LITERAL &&
862
22.2M
                    (ptr >= end ||
863
22.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
11.4M
                    continue;
865
39.9M
                if (pattern[1] == SRE_OP_IN &&
866
22.8M
                    (ptr >= end ||
867
22.8M
                     !SRE(charset)(state, pattern + 3,
868
22.8M
                                   (SRE_CODE) *ptr)))
869
15.0M
                    continue;
870
24.8M
                state->ptr = ptr;
871
24.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
24.8M
                if (ret) {
873
19.4M
                    if (state->repeat)
874
18.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.4M
                    RETURN_ON_ERROR(ret);
876
19.4M
                    RETURN_SUCCESS;
877
19.4M
                }
878
5.35M
                if (state->repeat)
879
3.26k
                    MARK_POP_KEEP(ctx->lastmark);
880
5.35M
                LASTMARK_RESTORE();
881
5.35M
            }
882
9.34M
            if (state->repeat)
883
7.07M
                MARK_POP_DISCARD(ctx->lastmark);
884
9.34M
            RETURN_FAILURE;
885
886
235M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
235M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
235M
                   pattern[1], pattern[2]));
898
899
235M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
214k
                RETURN_FAILURE; /* cannot match */
901
902
235M
            state->ptr = ptr;
903
904
235M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
235M
            RETURN_ON_ERROR(ret);
906
235M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
235M
            ctx->count = ret;
908
235M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
235M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
175M
                RETURN_FAILURE;
917
918
60.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.11M
                ptr == state->end &&
920
18.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.7k
            {
922
                /* tail is empty.  we're finished */
923
18.7k
                state->ptr = ptr;
924
18.7k
                RETURN_SUCCESS;
925
18.7k
            }
926
927
60.0M
            LASTMARK_SAVE();
928
60.0M
            if (state->repeat)
929
30.8M
                MARK_PUSH(ctx->lastmark);
930
931
60.0M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
10.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
10.7M
                for (;;) {
936
23.2M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
16.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
12.4M
                        ptr--;
939
12.4M
                        ctx->count--;
940
12.4M
                    }
941
10.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
6.90M
                        break;
943
3.88M
                    state->ptr = ptr;
944
3.88M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.88M
                            pattern+pattern[0]);
946
3.88M
                    if (ret) {
947
3.88M
                        if (state->repeat)
948
3.85M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.88M
                        RETURN_ON_ERROR(ret);
950
3.88M
                        RETURN_SUCCESS;
951
3.88M
                    }
952
220
                    if (state->repeat)
953
220
                        MARK_POP_KEEP(ctx->lastmark);
954
220
                    LASTMARK_RESTORE();
955
956
220
                    ptr--;
957
220
                    ctx->count--;
958
220
                }
959
6.90M
                if (state->repeat)
960
6.90M
                    MARK_POP_DISCARD(ctx->lastmark);
961
49.2M
            } else {
962
                /* general case */
963
69.8M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
62.9M
                    state->ptr = ptr;
965
62.9M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
62.9M
                            pattern+pattern[0]);
967
62.9M
                    if (ret) {
968
42.2M
                        if (state->repeat)
969
19.8M
                            MARK_POP_DISCARD(ctx->lastmark);
970
42.2M
                        RETURN_ON_ERROR(ret);
971
42.2M
                        RETURN_SUCCESS;
972
42.2M
                    }
973
20.6M
                    if (state->repeat)
974
541k
                        MARK_POP_KEEP(ctx->lastmark);
975
20.6M
                    LASTMARK_RESTORE();
976
977
20.6M
                    ptr--;
978
20.6M
                    ctx->count--;
979
20.6M
                }
980
6.98M
                if (state->repeat)
981
286k
                    MARK_POP_DISCARD(ctx->lastmark);
982
6.98M
            }
983
13.8M
            RETURN_FAILURE;
984
985
531k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
531k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
531k
                   pattern[1], pattern[2]));
997
998
531k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
531k
            state->ptr = ptr;
1002
1003
531k
            if (pattern[1] == 0)
1004
531k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
531k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
531k
            } else {
1028
                /* general case */
1029
531k
                LASTMARK_SAVE();
1030
531k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
7.20M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
7.20M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
7.20M
                    state->ptr = ptr;
1036
7.20M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
7.20M
                            pattern+pattern[0]);
1038
7.20M
                    if (ret) {
1039
531k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
531k
                        RETURN_ON_ERROR(ret);
1042
531k
                        RETURN_SUCCESS;
1043
531k
                    }
1044
6.66M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
6.66M
                    LASTMARK_RESTORE();
1047
1048
6.66M
                    state->ptr = ptr;
1049
6.66M
                    ret = SRE(count)(state, pattern+3, 1);
1050
6.66M
                    RETURN_ON_ERROR(ret);
1051
6.66M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
6.66M
                    if (ret == 0)
1053
0
                        break;
1054
6.66M
                    assert(ret == 1);
1055
6.66M
                    ptr++;
1056
6.66M
                    ctx->count++;
1057
6.66M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
34.2M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
34.2M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
34.2M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
34.2M
            ctx->u.rep = repeat_pool_malloc(state);
1127
34.2M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
34.2M
            ctx->u.rep->count = -1;
1131
34.2M
            ctx->u.rep->pattern = pattern;
1132
34.2M
            ctx->u.rep->prev = state->repeat;
1133
34.2M
            ctx->u.rep->last_ptr = NULL;
1134
34.2M
            state->repeat = ctx->u.rep;
1135
1136
34.2M
            state->ptr = ptr;
1137
34.2M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
34.2M
            state->repeat = ctx->u.rep->prev;
1139
34.2M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
34.2M
            if (ret) {
1142
20.1M
                RETURN_ON_ERROR(ret);
1143
20.1M
                RETURN_SUCCESS;
1144
20.1M
            }
1145
14.1M
            RETURN_FAILURE;
1146
1147
63.5M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
63.5M
            ctx->u.rep = state->repeat;
1155
63.5M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
63.5M
            state->ptr = ptr;
1159
1160
63.5M
            ctx->count = ctx->u.rep->count+1;
1161
1162
63.5M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
63.5M
                   ptr, ctx->count));
1164
1165
63.5M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
63.5M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.88M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
60.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
60.6M
                ctx->u.rep->count = ctx->count;
1185
60.6M
                LASTMARK_SAVE();
1186
60.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
60.6M
                LAST_PTR_PUSH();
1189
60.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
60.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
60.6M
                        ctx->u.rep->pattern+3);
1192
60.6M
                LAST_PTR_POP();
1193
60.6M
                if (ret) {
1194
29.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
29.0M
                    RETURN_ON_ERROR(ret);
1196
29.0M
                    RETURN_SUCCESS;
1197
29.0M
                }
1198
31.6M
                MARK_POP(ctx->lastmark);
1199
31.6M
                LASTMARK_RESTORE();
1200
31.6M
                ctx->u.rep->count = ctx->count-1;
1201
31.6M
                state->ptr = ptr;
1202
31.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
34.5M
            state->repeat = ctx->u.rep->prev;
1207
34.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
34.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
34.5M
            RETURN_ON_SUCCESS(ret);
1211
14.4M
            state->ptr = ptr;
1212
14.4M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
15.7M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
15.7M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
15.7M
                   ptr, pattern[1]));
1565
15.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
15.7M
            state->ptr = ptr - pattern[1];
1568
15.7M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
15.7M
            RETURN_ON_FAILURE(ret);
1570
10.1M
            pattern += pattern[0];
1571
10.1M
            DISPATCH;
1572
1573
10.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
8.95M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
8.95M
                   ptr, pattern[1]));
1578
8.95M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
8.95M
                state->ptr = ptr - pattern[1];
1580
8.95M
                LASTMARK_SAVE();
1581
8.95M
                if (state->repeat)
1582
8.95M
                    MARK_PUSH(ctx->lastmark);
1583
1584
17.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
17.9M
                if (ret) {
1586
3.01k
                    if (state->repeat)
1587
3.01k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.01k
                    RETURN_ON_ERROR(ret);
1589
3.01k
                    RETURN_FAILURE;
1590
3.01k
                }
1591
8.95M
                if (state->repeat)
1592
8.95M
                    MARK_POP(ctx->lastmark);
1593
8.95M
                LASTMARK_RESTORE();
1594
8.95M
            }
1595
8.95M
            pattern += pattern[0];
1596
8.95M
            DISPATCH;
1597
1598
8.95M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
494M
exit:
1620
494M
    ctx_pos = ctx->last_ctx_pos;
1621
494M
    jump = ctx->jump;
1622
494M
    DATA_POP_DISCARD(ctx);
1623
494M
    if (ctx_pos == -1) {
1624
241M
        state->sigcount = sigcount;
1625
241M
        return ret;
1626
241M
    }
1627
253M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
253M
    switch (jump) {
1630
60.6M
        case JUMP_MAX_UNTIL_2:
1631
60.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
60.6M
            goto jump_max_until_2;
1633
34.5M
        case JUMP_MAX_UNTIL_3:
1634
34.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
34.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
24.8M
        case JUMP_BRANCH:
1643
24.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
24.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
34.2M
        case JUMP_REPEAT:
1658
34.2M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
34.2M
            goto jump_repeat;
1660
3.88M
        case JUMP_REPEAT_ONE_1:
1661
3.88M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.88M
            goto jump_repeat_one_1;
1663
62.9M
        case JUMP_REPEAT_ONE_2:
1664
62.9M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
62.9M
            goto jump_repeat_one_2;
1666
7.20M
        case JUMP_MIN_REPEAT_ONE:
1667
7.20M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
7.20M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
15.7M
        case JUMP_ASSERT:
1673
15.7M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
15.7M
            goto jump_assert;
1675
8.95M
        case JUMP_ASSERT_NOT:
1676
8.95M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
8.95M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
253M
    }
1683
1684
0
    return ret; /* should never get here */
1685
253M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
89.1M
{
601
89.1M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
89.1M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
89.1M
    Py_ssize_t ret = 0;
604
89.1M
    int jump;
605
89.1M
    unsigned int sigcount = state->sigcount;
606
607
89.1M
    SRE(match_context)* ctx;
608
89.1M
    SRE(match_context)* nextctx;
609
89.1M
    INIT_TRACE(state);
610
611
89.1M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
89.1M
    DATA_ALLOC(SRE(match_context), ctx);
614
89.1M
    ctx->last_ctx_pos = -1;
615
89.1M
    ctx->jump = JUMP_NONE;
616
89.1M
    ctx->toplevel = toplevel;
617
89.1M
    ctx_pos = alloc_pos;
618
619
89.1M
#if USE_COMPUTED_GOTOS
620
89.1M
#include "sre_targets.h"
621
89.1M
#endif
622
623
549M
entrance:
624
625
549M
    ;  // Fashion statement.
626
549M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
549M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
20.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.28k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.28k
                   end - ptr, (size_t) pattern[3]));
634
3.28k
            RETURN_FAILURE;
635
3.28k
        }
636
20.0M
        pattern += pattern[1] + 1;
637
20.0M
    }
638
639
549M
#if USE_COMPUTED_GOTOS
640
549M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
549M
    {
647
648
549M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
151M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
151M
                   ptr, pattern[0]));
653
151M
            {
654
151M
                int i = pattern[0];
655
151M
                if (i & 1)
656
31.6M
                    state->lastindex = i/2 + 1;
657
151M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
148M
                    int j = state->lastmark + 1;
663
151M
                    while (j < i)
664
2.31M
                        state->mark[j++] = NULL;
665
148M
                    state->lastmark = i;
666
148M
                }
667
151M
                state->mark[i] = ptr;
668
151M
            }
669
151M
            pattern++;
670
151M
            DISPATCH;
671
672
151M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
37.5M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
37.5M
                   ptr, *pattern));
677
37.5M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
14.2M
                RETURN_FAILURE;
679
23.3M
            pattern++;
680
23.3M
            ptr++;
681
23.3M
            DISPATCH;
682
683
23.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
62.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
62.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
62.2M
            if (ctx->toplevel &&
698
19.2M
                ((state->match_all && ptr != state->end) ||
699
19.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
62.2M
            state->ptr = ptr;
704
62.2M
            RETURN_SUCCESS;
705
706
17.8M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
17.8M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
17.8M
            if (!SRE(at)(state, ptr, *pattern))
711
17.7M
                RETURN_FAILURE;
712
44.3k
            pattern++;
713
44.3k
            DISPATCH;
714
715
44.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
119M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
119M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
119M
            if (ptr >= end ||
749
119M
                !SRE(charset)(state, pattern + 1, *ptr))
750
21.4M
                RETURN_FAILURE;
751
98.4M
            pattern += pattern[0];
752
98.4M
            ptr++;
753
98.4M
            DISPATCH;
754
755
98.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.27M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.27M
                   pattern, ptr, pattern[0]));
758
2.27M
            if (ptr >= end ||
759
2.27M
                sre_lower_ascii(*ptr) != *pattern)
760
20.7k
                RETURN_FAILURE;
761
2.25M
            pattern++;
762
2.25M
            ptr++;
763
2.25M
            DISPATCH;
764
765
2.25M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
38.3M
        TARGET(SRE_OP_JUMP):
845
38.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
38.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
38.3M
                   ptr, pattern[0]));
850
38.3M
            pattern += pattern[0];
851
38.3M
            DISPATCH;
852
853
56.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
56.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
56.2M
            LASTMARK_SAVE();
858
56.2M
            if (state->repeat)
859
52.0M
                MARK_PUSH(ctx->lastmark);
860
118M
            for (; pattern[0]; pattern += pattern[0]) {
861
100M
                if (pattern[1] == SRE_OP_LITERAL &&
862
47.1M
                    (ptr >= end ||
863
47.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
32.3M
                    continue;
865
68.1M
                if (pattern[1] == SRE_OP_IN &&
866
47.1M
                    (ptr >= end ||
867
47.1M
                     !SRE(charset)(state, pattern + 3,
868
47.1M
                                   (SRE_CODE) *ptr)))
869
29.3M
                    continue;
870
38.7M
                state->ptr = ptr;
871
38.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
38.7M
                if (ret) {
873
37.9M
                    if (state->repeat)
874
34.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
37.9M
                    RETURN_ON_ERROR(ret);
876
37.9M
                    RETURN_SUCCESS;
877
37.9M
                }
878
883k
                if (state->repeat)
879
14.9k
                    MARK_POP_KEEP(ctx->lastmark);
880
883k
                LASTMARK_RESTORE();
881
883k
            }
882
18.3M
            if (state->repeat)
883
17.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
18.3M
            RETURN_FAILURE;
885
886
177M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
177M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
177M
                   pattern[1], pattern[2]));
898
899
177M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
20.4k
                RETURN_FAILURE; /* cannot match */
901
902
177M
            state->ptr = ptr;
903
904
177M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
177M
            RETURN_ON_ERROR(ret);
906
177M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
177M
            ctx->count = ret;
908
177M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
177M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
61.0M
                RETURN_FAILURE;
917
918
116M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.06M
                ptr == state->end &&
920
4.14k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
4.14k
            {
922
                /* tail is empty.  we're finished */
923
4.14k
                state->ptr = ptr;
924
4.14k
                RETURN_SUCCESS;
925
4.14k
            }
926
927
116M
            LASTMARK_SAVE();
928
116M
            if (state->repeat)
929
86.4M
                MARK_PUSH(ctx->lastmark);
930
931
116M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
37.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
37.7M
                for (;;) {
936
93.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
62.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
55.6M
                        ptr--;
939
55.6M
                        ctx->count--;
940
55.6M
                    }
941
37.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
30.6M
                        break;
943
7.14M
                    state->ptr = ptr;
944
7.14M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.14M
                            pattern+pattern[0]);
946
7.14M
                    if (ret) {
947
7.14M
                        if (state->repeat)
948
7.14M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.14M
                        RETURN_ON_ERROR(ret);
950
7.14M
                        RETURN_SUCCESS;
951
7.14M
                    }
952
276
                    if (state->repeat)
953
276
                        MARK_POP_KEEP(ctx->lastmark);
954
276
                    LASTMARK_RESTORE();
955
956
276
                    ptr--;
957
276
                    ctx->count--;
958
276
                }
959
30.6M
                if (state->repeat)
960
30.6M
                    MARK_POP_DISCARD(ctx->lastmark);
961
78.4M
            } else {
962
                /* general case */
963
98.0M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
95.2M
                    state->ptr = ptr;
965
95.2M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
95.2M
                            pattern+pattern[0]);
967
95.2M
                    if (ret) {
968
75.6M
                        if (state->repeat)
969
48.6M
                            MARK_POP_DISCARD(ctx->lastmark);
970
75.6M
                        RETURN_ON_ERROR(ret);
971
75.6M
                        RETURN_SUCCESS;
972
75.6M
                    }
973
19.5M
                    if (state->repeat)
974
175k
                        MARK_POP_KEEP(ctx->lastmark);
975
19.5M
                    LASTMARK_RESTORE();
976
977
19.5M
                    ptr--;
978
19.5M
                    ctx->count--;
979
19.5M
                }
980
2.72M
                if (state->repeat)
981
95.0k
                    MARK_POP_DISCARD(ctx->lastmark);
982
2.72M
            }
983
33.3M
            RETURN_FAILURE;
984
985
18.9k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
18.9k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
18.9k
                   pattern[1], pattern[2]));
997
998
18.9k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
18.9k
            state->ptr = ptr;
1002
1003
18.9k
            if (pattern[1] == 0)
1004
18.9k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
18.9k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
18.9k
            } else {
1028
                /* general case */
1029
18.9k
                LASTMARK_SAVE();
1030
18.9k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
2.64M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
2.64M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
2.64M
                    state->ptr = ptr;
1036
2.64M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
2.64M
                            pattern+pattern[0]);
1038
2.64M
                    if (ret) {
1039
18.9k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
18.9k
                        RETURN_ON_ERROR(ret);
1042
18.9k
                        RETURN_SUCCESS;
1043
18.9k
                    }
1044
2.62M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
2.62M
                    LASTMARK_RESTORE();
1047
1048
2.62M
                    state->ptr = ptr;
1049
2.62M
                    ret = SRE(count)(state, pattern+3, 1);
1050
2.62M
                    RETURN_ON_ERROR(ret);
1051
2.62M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
2.62M
                    if (ret == 0)
1053
0
                        break;
1054
2.62M
                    assert(ret == 1);
1055
2.62M
                    ptr++;
1056
2.62M
                    ctx->count++;
1057
2.62M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
70.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
70.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
70.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
70.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
70.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
70.9M
            ctx->u.rep->count = -1;
1131
70.9M
            ctx->u.rep->pattern = pattern;
1132
70.9M
            ctx->u.rep->prev = state->repeat;
1133
70.9M
            ctx->u.rep->last_ptr = NULL;
1134
70.9M
            state->repeat = ctx->u.rep;
1135
1136
70.9M
            state->ptr = ptr;
1137
70.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
70.9M
            state->repeat = ctx->u.rep->prev;
1139
70.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
70.9M
            if (ret) {
1142
53.9M
                RETURN_ON_ERROR(ret);
1143
53.9M
                RETURN_SUCCESS;
1144
53.9M
            }
1145
17.0M
            RETURN_FAILURE;
1146
1147
127M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
127M
            ctx->u.rep = state->repeat;
1155
127M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
127M
            state->ptr = ptr;
1159
1160
127M
            ctx->count = ctx->u.rep->count+1;
1161
1162
127M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
127M
                   ptr, ctx->count));
1164
1165
127M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
127M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.02M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
122M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
122M
                ctx->u.rep->count = ctx->count;
1185
122M
                LASTMARK_SAVE();
1186
122M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
122M
                LAST_PTR_PUSH();
1189
122M
                ctx->u.rep->last_ptr = state->ptr;
1190
122M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
122M
                        ctx->u.rep->pattern+3);
1192
122M
                LAST_PTR_POP();
1193
122M
                if (ret) {
1194
56.7M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
56.7M
                    RETURN_ON_ERROR(ret);
1196
56.7M
                    RETURN_SUCCESS;
1197
56.7M
                }
1198
66.0M
                MARK_POP(ctx->lastmark);
1199
66.0M
                LASTMARK_RESTORE();
1200
66.0M
                ctx->u.rep->count = ctx->count-1;
1201
66.0M
                state->ptr = ptr;
1202
66.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
71.0M
            state->repeat = ctx->u.rep->prev;
1207
71.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
71.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
71.0M
            RETURN_ON_SUCCESS(ret);
1211
17.0M
            state->ptr = ptr;
1212
17.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
37.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
37.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
37.0M
                   ptr, pattern[1]));
1565
37.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
37.0M
            state->ptr = ptr - pattern[1];
1568
37.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
37.0M
            RETURN_ON_FAILURE(ret);
1570
35.8M
            pattern += pattern[0];
1571
35.8M
            DISPATCH;
1572
1573
35.8M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
14.3M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
14.3M
                   ptr, pattern[1]));
1578
14.3M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
14.3M
                state->ptr = ptr - pattern[1];
1580
14.3M
                LASTMARK_SAVE();
1581
14.3M
                if (state->repeat)
1582
14.3M
                    MARK_PUSH(ctx->lastmark);
1583
1584
28.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
28.7M
                if (ret) {
1586
14.6k
                    if (state->repeat)
1587
14.6k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.6k
                    RETURN_ON_ERROR(ret);
1589
14.6k
                    RETURN_FAILURE;
1590
14.6k
                }
1591
14.3M
                if (state->repeat)
1592
14.3M
                    MARK_POP(ctx->lastmark);
1593
14.3M
                LASTMARK_RESTORE();
1594
14.3M
            }
1595
14.3M
            pattern += pattern[0];
1596
14.3M
            DISPATCH;
1597
1598
14.3M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
549M
exit:
1620
549M
    ctx_pos = ctx->last_ctx_pos;
1621
549M
    jump = ctx->jump;
1622
549M
    DATA_POP_DISCARD(ctx);
1623
549M
    if (ctx_pos == -1) {
1624
89.1M
        state->sigcount = sigcount;
1625
89.1M
        return ret;
1626
89.1M
    }
1627
460M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
460M
    switch (jump) {
1630
122M
        case JUMP_MAX_UNTIL_2:
1631
122M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
122M
            goto jump_max_until_2;
1633
71.0M
        case JUMP_MAX_UNTIL_3:
1634
71.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
71.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
38.7M
        case JUMP_BRANCH:
1643
38.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
38.7M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
70.9M
        case JUMP_REPEAT:
1658
70.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
70.9M
            goto jump_repeat;
1660
7.14M
        case JUMP_REPEAT_ONE_1:
1661
7.14M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.14M
            goto jump_repeat_one_1;
1663
95.2M
        case JUMP_REPEAT_ONE_2:
1664
95.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
95.2M
            goto jump_repeat_one_2;
1666
2.64M
        case JUMP_MIN_REPEAT_ONE:
1667
2.64M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
2.64M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
37.0M
        case JUMP_ASSERT:
1673
37.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
37.0M
            goto jump_assert;
1675
14.3M
        case JUMP_ASSERT_NOT:
1676
14.3M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
14.3M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
460M
    }
1683
1684
0
    return ret; /* should never get here */
1685
460M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
323M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
99.6M
{
1694
99.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
99.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
99.6M
    Py_ssize_t status = 0;
1697
99.6M
    Py_ssize_t prefix_len = 0;
1698
99.6M
    Py_ssize_t prefix_skip = 0;
1699
99.6M
    SRE_CODE* prefix = NULL;
1700
99.6M
    SRE_CODE* charset = NULL;
1701
99.6M
    SRE_CODE* overlap = NULL;
1702
99.6M
    int flags = 0;
1703
99.6M
    INIT_TRACE(state);
1704
1705
99.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
99.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
99.6M
        flags = pattern[2];
1713
1714
99.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.25M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.25M
                   end - ptr, (size_t) pattern[3]));
1717
4.25M
            return 0;
1718
4.25M
        }
1719
95.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.61M
            end -= pattern[3] - 1;
1723
8.61M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.61M
        }
1726
1727
95.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.61M
            prefix_len = pattern[5];
1731
8.61M
            prefix_skip = pattern[6];
1732
8.61M
            prefix = pattern + 7;
1733
8.61M
            overlap = prefix + prefix_len - 1;
1734
86.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
77.7M
            charset = pattern + 5;
1738
1739
95.4M
        pattern += 1 + pattern[1];
1740
95.4M
    }
1741
1742
95.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
95.4M
           prefix, prefix_len, prefix_skip));
1744
95.4M
    TRACE(("charset = %p\n", charset));
1745
1746
95.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.53M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.02M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.02M
#endif
1753
4.02M
        end = (SRE_CHAR *)state->end;
1754
4.02M
        state->must_advance = 0;
1755
8.00M
        while (ptr < end) {
1756
101M
            while (*ptr != c) {
1757
94.1M
                if (++ptr >= end)
1758
968k
                    return 0;
1759
94.1M
            }
1760
6.99M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.99M
            state->start = ptr;
1762
6.99M
            state->ptr = ptr + prefix_skip;
1763
6.99M
            if (flags & SRE_INFO_LITERAL)
1764
3.39k
                return 1; /* we got all of it */
1765
6.99M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.99M
            if (status != 0)
1767
6.52M
                return status;
1768
464k
            ++ptr;
1769
464k
            RESET_CAPTURE_GROUP();
1770
464k
        }
1771
39.9k
        return 0;
1772
4.02M
    }
1773
1774
87.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
1.08M
        Py_ssize_t i = 0;
1778
1779
1.08M
        end = (SRE_CHAR *)state->end;
1780
1.08M
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
2.55M
        for (i = 0; i < prefix_len; i++)
1784
1.70M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
850k
#endif
1787
1.80M
        while (ptr < end) {
1788
1.80M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
7.71M
            while (*ptr++ != c) {
1790
5.91M
                if (ptr >= end)
1791
304
                    return 0;
1792
5.91M
            }
1793
1.79M
            if (ptr >= end)
1794
54
                return 0;
1795
1796
1.79M
            i = 1;
1797
1.79M
            state->must_advance = 0;
1798
1.80M
            do {
1799
1.80M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.69M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.69M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.69M
                    state->start = ptr - (prefix_len - 1);
1808
1.69M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.69M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.69M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.69M
                    if (status != 0)
1813
1.07M
                        return status;
1814
                    /* close but no cigar -- try again */
1815
616k
                    if (++ptr >= end)
1816
38
                        return 0;
1817
616k
                    RESET_CAPTURE_GROUP();
1818
616k
                }
1819
721k
                i = overlap[i];
1820
721k
            } while (i != 0);
1821
1.79M
        }
1822
0
        return 0;
1823
1.08M
    }
1824
1825
86.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
77.7M
        end = (SRE_CHAR *)state->end;
1828
77.7M
        state->must_advance = 0;
1829
80.1M
        for (;;) {
1830
347M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
266M
                ptr++;
1832
80.1M
            if (ptr >= end)
1833
3.91M
                return 0;
1834
76.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
76.2M
            state->start = ptr;
1836
76.2M
            state->ptr = ptr;
1837
76.2M
            status = SRE(match)(state, pattern, 0);
1838
76.2M
            if (status != 0)
1839
73.8M
                break;
1840
2.38M
            ptr++;
1841
2.38M
            RESET_CAPTURE_GROUP();
1842
2.38M
        }
1843
77.7M
    } else {
1844
        /* general case */
1845
9.05M
        assert(ptr <= end);
1846
9.05M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
9.05M
        state->start = state->ptr = ptr;
1848
9.05M
        status = SRE(match)(state, pattern, 1);
1849
9.05M
        state->must_advance = 0;
1850
9.05M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.08M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
60
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.08M
        {
1854
4.08M
            state->start = state->ptr = ptr = end;
1855
4.08M
            return 0;
1856
4.08M
        }
1857
325M
        while (status == 0 && ptr < end) {
1858
320M
            ptr++;
1859
320M
            RESET_CAPTURE_GROUP();
1860
320M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
320M
            state->start = state->ptr = ptr;
1862
320M
            status = SRE(match)(state, pattern, 0);
1863
320M
        }
1864
4.96M
    }
1865
1866
78.7M
    return status;
1867
86.8M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
45.2M
{
1694
45.2M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
45.2M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
45.2M
    Py_ssize_t status = 0;
1697
45.2M
    Py_ssize_t prefix_len = 0;
1698
45.2M
    Py_ssize_t prefix_skip = 0;
1699
45.2M
    SRE_CODE* prefix = NULL;
1700
45.2M
    SRE_CODE* charset = NULL;
1701
45.2M
    SRE_CODE* overlap = NULL;
1702
45.2M
    int flags = 0;
1703
45.2M
    INIT_TRACE(state);
1704
1705
45.2M
    if (ptr > end)
1706
0
        return 0;
1707
1708
45.2M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
45.2M
        flags = pattern[2];
1713
1714
45.2M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.11M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.11M
                   end - ptr, (size_t) pattern[3]));
1717
4.11M
            return 0;
1718
4.11M
        }
1719
41.1M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.78M
            end -= pattern[3] - 1;
1723
2.78M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.78M
        }
1726
1727
41.1M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.78M
            prefix_len = pattern[5];
1731
2.78M
            prefix_skip = pattern[6];
1732
2.78M
            prefix = pattern + 7;
1733
2.78M
            overlap = prefix + prefix_len - 1;
1734
38.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
31.3M
            charset = pattern + 5;
1738
1739
41.1M
        pattern += 1 + pattern[1];
1740
41.1M
    }
1741
1742
41.1M
    TRACE(("prefix = %p %zd %zd\n",
1743
41.1M
           prefix, prefix_len, prefix_skip));
1744
41.1M
    TRACE(("charset = %p\n", charset));
1745
1746
41.1M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.75M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.75M
#if SIZEOF_SRE_CHAR < 4
1750
2.75M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.75M
#endif
1753
2.75M
        end = (SRE_CHAR *)state->end;
1754
2.75M
        state->must_advance = 0;
1755
2.97M
        while (ptr < end) {
1756
27.0M
            while (*ptr != c) {
1757
24.9M
                if (++ptr >= end)
1758
902k
                    return 0;
1759
24.9M
            }
1760
2.03M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.03M
            state->start = ptr;
1762
2.03M
            state->ptr = ptr + prefix_skip;
1763
2.03M
            if (flags & SRE_INFO_LITERAL)
1764
255
                return 1; /* we got all of it */
1765
2.03M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.03M
            if (status != 0)
1767
1.81M
                return status;
1768
221k
            ++ptr;
1769
221k
            RESET_CAPTURE_GROUP();
1770
221k
        }
1771
36.1k
        return 0;
1772
2.75M
    }
1773
1774
38.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
32.9k
        Py_ssize_t i = 0;
1778
1779
32.9k
        end = (SRE_CHAR *)state->end;
1780
32.9k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
32.9k
#if SIZEOF_SRE_CHAR < 4
1783
98.9k
        for (i = 0; i < prefix_len; i++)
1784
65.9k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
32.9k
#endif
1787
184k
        while (ptr < end) {
1788
184k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.45M
            while (*ptr++ != c) {
1790
1.27M
                if (ptr >= end)
1791
55
                    return 0;
1792
1.27M
            }
1793
183k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
183k
            i = 1;
1797
183k
            state->must_advance = 0;
1798
184k
            do {
1799
184k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
145k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
145k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
145k
                    state->start = ptr - (prefix_len - 1);
1808
145k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
145k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
145k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
145k
                    if (status != 0)
1813
32.8k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
112k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
112k
                    RESET_CAPTURE_GROUP();
1818
112k
                }
1819
151k
                i = overlap[i];
1820
151k
            } while (i != 0);
1821
183k
        }
1822
0
        return 0;
1823
32.9k
    }
1824
1825
38.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
31.3M
        end = (SRE_CHAR *)state->end;
1828
31.3M
        state->must_advance = 0;
1829
32.8M
        for (;;) {
1830
85.7M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
52.9M
                ptr++;
1832
32.8M
            if (ptr >= end)
1833
2.72M
                return 0;
1834
30.1M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
30.1M
            state->start = ptr;
1836
30.1M
            state->ptr = ptr;
1837
30.1M
            status = SRE(match)(state, pattern, 0);
1838
30.1M
            if (status != 0)
1839
28.6M
                break;
1840
1.47M
            ptr++;
1841
1.47M
            RESET_CAPTURE_GROUP();
1842
1.47M
        }
1843
31.3M
    } else {
1844
        /* general case */
1845
6.97M
        assert(ptr <= end);
1846
6.97M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
6.97M
        state->start = state->ptr = ptr;
1848
6.97M
        status = SRE(match)(state, pattern, 1);
1849
6.97M
        state->must_advance = 0;
1850
6.97M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.43M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
16
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
3.43M
        {
1854
3.43M
            state->start = state->ptr = ptr = end;
1855
3.43M
            return 0;
1856
3.43M
        }
1857
88.4M
        while (status == 0 && ptr < end) {
1858
84.8M
            ptr++;
1859
84.8M
            RESET_CAPTURE_GROUP();
1860
84.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
84.8M
            state->start = state->ptr = ptr;
1862
84.8M
            status = SRE(match)(state, pattern, 0);
1863
84.8M
        }
1864
3.54M
    }
1865
1866
32.1M
    return status;
1867
38.3M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
47.1M
{
1694
47.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
47.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
47.1M
    Py_ssize_t status = 0;
1697
47.1M
    Py_ssize_t prefix_len = 0;
1698
47.1M
    Py_ssize_t prefix_skip = 0;
1699
47.1M
    SRE_CODE* prefix = NULL;
1700
47.1M
    SRE_CODE* charset = NULL;
1701
47.1M
    SRE_CODE* overlap = NULL;
1702
47.1M
    int flags = 0;
1703
47.1M
    INIT_TRACE(state);
1704
1705
47.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
47.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
47.1M
        flags = pattern[2];
1713
1714
47.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
133k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
133k
                   end - ptr, (size_t) pattern[3]));
1717
133k
            return 0;
1718
133k
        }
1719
47.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.08M
            end -= pattern[3] - 1;
1723
2.08M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.08M
        }
1726
1727
47.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.08M
            prefix_len = pattern[5];
1731
2.08M
            prefix_skip = pattern[6];
1732
2.08M
            prefix = pattern + 7;
1733
2.08M
            overlap = prefix + prefix_len - 1;
1734
44.9M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
43.0M
            charset = pattern + 5;
1738
1739
47.0M
        pattern += 1 + pattern[1];
1740
47.0M
    }
1741
1742
47.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
47.0M
           prefix, prefix_len, prefix_skip));
1744
47.0M
    TRACE(("charset = %p\n", charset));
1745
1746
47.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.26M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.26M
#if SIZEOF_SRE_CHAR < 4
1750
1.26M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.26M
#endif
1753
1.26M
        end = (SRE_CHAR *)state->end;
1754
1.26M
        state->must_advance = 0;
1755
1.36M
        while (ptr < end) {
1756
44.9M
            while (*ptr != c) {
1757
43.6M
                if (++ptr >= end)
1758
62.6k
                    return 0;
1759
43.6M
            }
1760
1.29M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.29M
            state->start = ptr;
1762
1.29M
            state->ptr = ptr + prefix_skip;
1763
1.29M
            if (flags & SRE_INFO_LITERAL)
1764
1.84k
                return 1; /* we got all of it */
1765
1.29M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.29M
            if (status != 0)
1767
1.19M
                return status;
1768
99.6k
            ++ptr;
1769
99.6k
            RESET_CAPTURE_GROUP();
1770
99.6k
        }
1771
3.11k
        return 0;
1772
1.26M
    }
1773
1774
45.7M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
817k
        Py_ssize_t i = 0;
1778
1779
817k
        end = (SRE_CHAR *)state->end;
1780
817k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
817k
#if SIZEOF_SRE_CHAR < 4
1783
2.45M
        for (i = 0; i < prefix_len; i++)
1784
1.63M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
817k
#endif
1787
1.12M
        while (ptr < end) {
1788
1.12M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.20M
            while (*ptr++ != c) {
1790
2.08M
                if (ptr >= end)
1791
113
                    return 0;
1792
2.08M
            }
1793
1.12M
            if (ptr >= end)
1794
12
                return 0;
1795
1796
1.12M
            i = 1;
1797
1.12M
            state->must_advance = 0;
1798
1.12M
            do {
1799
1.12M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.08M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.08M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.08M
                    state->start = ptr - (prefix_len - 1);
1808
1.08M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.08M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.08M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.08M
                    if (status != 0)
1813
817k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
265k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
265k
                    RESET_CAPTURE_GROUP();
1818
265k
                }
1819
305k
                i = overlap[i];
1820
305k
            } while (i != 0);
1821
1.12M
        }
1822
0
        return 0;
1823
817k
    }
1824
1825
44.9M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
43.0M
        end = (SRE_CHAR *)state->end;
1828
43.0M
        state->must_advance = 0;
1829
43.4M
        for (;;) {
1830
192M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
148M
                ptr++;
1832
43.4M
            if (ptr >= end)
1833
1.14M
                return 0;
1834
42.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
42.3M
            state->start = ptr;
1836
42.3M
            state->ptr = ptr;
1837
42.3M
            status = SRE(match)(state, pattern, 0);
1838
42.3M
            if (status != 0)
1839
41.9M
                break;
1840
421k
            ptr++;
1841
421k
            RESET_CAPTURE_GROUP();
1842
421k
        }
1843
43.0M
    } else {
1844
        /* general case */
1845
1.88M
        assert(ptr <= end);
1846
1.88M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.88M
        state->start = state->ptr = ptr;
1848
1.88M
        status = SRE(match)(state, pattern, 1);
1849
1.88M
        state->must_advance = 0;
1850
1.88M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
629k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
629k
        {
1854
629k
            state->start = state->ptr = ptr = end;
1855
629k
            return 0;
1856
629k
        }
1857
175M
        while (status == 0 && ptr < end) {
1858
174M
            ptr++;
1859
174M
            RESET_CAPTURE_GROUP();
1860
174M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
174M
            state->start = state->ptr = ptr;
1862
174M
            status = SRE(match)(state, pattern, 0);
1863
174M
        }
1864
1.25M
    }
1865
1866
43.1M
    return status;
1867
44.9M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.25M
{
1694
7.25M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.25M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.25M
    Py_ssize_t status = 0;
1697
7.25M
    Py_ssize_t prefix_len = 0;
1698
7.25M
    Py_ssize_t prefix_skip = 0;
1699
7.25M
    SRE_CODE* prefix = NULL;
1700
7.25M
    SRE_CODE* charset = NULL;
1701
7.25M
    SRE_CODE* overlap = NULL;
1702
7.25M
    int flags = 0;
1703
7.25M
    INIT_TRACE(state);
1704
1705
7.25M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.25M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.25M
        flags = pattern[2];
1713
1714
7.25M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.47k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.47k
                   end - ptr, (size_t) pattern[3]));
1717
6.47k
            return 0;
1718
6.47k
        }
1719
7.25M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.74M
            end -= pattern[3] - 1;
1723
3.74M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.74M
        }
1726
1727
7.25M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.74M
            prefix_len = pattern[5];
1731
3.74M
            prefix_skip = pattern[6];
1732
3.74M
            prefix = pattern + 7;
1733
3.74M
            overlap = prefix + prefix_len - 1;
1734
3.74M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.31M
            charset = pattern + 5;
1738
1739
7.25M
        pattern += 1 + pattern[1];
1740
7.25M
    }
1741
1742
7.25M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.25M
           prefix, prefix_len, prefix_skip));
1744
7.25M
    TRACE(("charset = %p\n", charset));
1745
1746
7.25M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.51M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.51M
        end = (SRE_CHAR *)state->end;
1754
3.51M
        state->must_advance = 0;
1755
3.66M
        while (ptr < end) {
1756
29.2M
            while (*ptr != c) {
1757
25.5M
                if (++ptr >= end)
1758
3.38k
                    return 0;
1759
25.5M
            }
1760
3.65M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.65M
            state->start = ptr;
1762
3.65M
            state->ptr = ptr + prefix_skip;
1763
3.65M
            if (flags & SRE_INFO_LITERAL)
1764
1.29k
                return 1; /* we got all of it */
1765
3.65M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.65M
            if (status != 0)
1767
3.51M
                return status;
1768
143k
            ++ptr;
1769
143k
            RESET_CAPTURE_GROUP();
1770
143k
        }
1771
700
        return 0;
1772
3.51M
    }
1773
1774
3.73M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
229k
        Py_ssize_t i = 0;
1778
1779
229k
        end = (SRE_CHAR *)state->end;
1780
229k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
493k
        while (ptr < end) {
1788
493k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.05M
            while (*ptr++ != c) {
1790
2.56M
                if (ptr >= end)
1791
136
                    return 0;
1792
2.56M
            }
1793
493k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
493k
            i = 1;
1797
493k
            state->must_advance = 0;
1798
494k
            do {
1799
494k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
467k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
467k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
467k
                    state->start = ptr - (prefix_len - 1);
1808
467k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
467k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
467k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
467k
                    if (status != 0)
1813
229k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
238k
                    if (++ptr >= end)
1816
8
                        return 0;
1817
238k
                    RESET_CAPTURE_GROUP();
1818
238k
                }
1819
265k
                i = overlap[i];
1820
265k
            } while (i != 0);
1821
493k
        }
1822
0
        return 0;
1823
229k
    }
1824
1825
3.50M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.31M
        end = (SRE_CHAR *)state->end;
1828
3.31M
        state->must_advance = 0;
1829
3.80M
        for (;;) {
1830
69.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
65.2M
                ptr++;
1832
3.80M
            if (ptr >= end)
1833
47.5k
                return 0;
1834
3.75M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.75M
            state->start = ptr;
1836
3.75M
            state->ptr = ptr;
1837
3.75M
            status = SRE(match)(state, pattern, 0);
1838
3.75M
            if (status != 0)
1839
3.26M
                break;
1840
490k
            ptr++;
1841
490k
            RESET_CAPTURE_GROUP();
1842
490k
        }
1843
3.31M
    } else {
1844
        /* general case */
1845
189k
        assert(ptr <= end);
1846
189k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
189k
        state->start = state->ptr = ptr;
1848
189k
        status = SRE(match)(state, pattern, 1);
1849
189k
        state->must_advance = 0;
1850
189k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
21.2k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
21.2k
        {
1854
21.2k
            state->start = state->ptr = ptr = end;
1855
21.2k
            return 0;
1856
21.2k
        }
1857
61.1M
        while (status == 0 && ptr < end) {
1858
61.0M
            ptr++;
1859
61.0M
            RESET_CAPTURE_GROUP();
1860
61.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
61.0M
            state->start = state->ptr = ptr;
1862
61.0M
            status = SRE(match)(state, pattern, 0);
1863
61.0M
        }
1864
168k
    }
1865
1866
3.43M
    return status;
1867
3.50M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/