Coverage Report

Created: 2025-08-29 06:15

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.5M
{
18
    /* check if pointer is at given position */
19
20
14.5M
    Py_ssize_t thisp, thatp;
21
22
14.5M
    switch (at) {
23
24
6.88M
    case SRE_AT_BEGINNING:
25
6.88M
    case SRE_AT_BEGINNING_STRING:
26
6.88M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.84M
    case SRE_AT_END:
33
4.84M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.84M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.84M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.80M
    case SRE_AT_END_STRING:
42
2.80M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.5M
    }
87
88
0
    return 0;
89
14.5M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
13.0M
{
18
    /* check if pointer is at given position */
19
20
13.0M
    Py_ssize_t thisp, thatp;
21
22
13.0M
    switch (at) {
23
24
6.85M
    case SRE_AT_BEGINNING:
25
6.85M
    case SRE_AT_BEGINNING_STRING:
26
6.85M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.27M
    case SRE_AT_END:
33
4.27M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.27M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.27M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.87M
    case SRE_AT_END_STRING:
42
1.87M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.0M
    }
87
88
0
    return 0;
89
13.0M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
936k
{
18
    /* check if pointer is at given position */
19
20
936k
    Py_ssize_t thisp, thatp;
21
22
936k
    switch (at) {
23
24
29.5k
    case SRE_AT_BEGINNING:
25
29.5k
    case SRE_AT_BEGINNING_STRING:
26
29.5k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
482k
    case SRE_AT_END:
33
482k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
482k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
482k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
424k
    case SRE_AT_END_STRING:
42
424k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
936k
    }
87
88
0
    return 0;
89
936k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
588k
{
18
    /* check if pointer is at given position */
19
20
588k
    Py_ssize_t thisp, thatp;
21
22
588k
    switch (at) {
23
24
4.89k
    case SRE_AT_BEGINNING:
25
4.89k
    case SRE_AT_BEGINNING_STRING:
26
4.89k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
85.8k
    case SRE_AT_END:
33
85.8k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
85.8k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
85.8k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
497k
    case SRE_AT_END_STRING:
42
497k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
588k
    }
87
88
0
    return 0;
89
588k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.70G
{
94
    /* check if character is a member of the given set */
95
96
1.70G
    int ok = 1;
97
98
3.78G
    for (;;) {
99
3.78G
        switch (*set++) {
100
101
1.11G
        case SRE_OP_FAILURE:
102
1.11G
            return !ok;
103
104
1.05G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.05G
            if (ch == set[0])
107
5.00M
                return ok;
108
1.04G
            set++;
109
1.04G
            break;
110
111
11.7M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.7M
            if (sre_category(set[0], (int) ch))
114
8.00M
                return ok;
115
3.76M
            set++;
116
3.76M
            break;
117
118
910M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
910M
            if (ch < 256 &&
121
910M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
391M
                return ok;
123
519M
            set += 256/SRE_CODE_BITS;
124
519M
            break;
125
126
323M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
323M
            if (set[0] <= ch && ch <= set[1])
129
193M
                return ok;
130
130M
            set += 2;
131
130M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
378M
        case SRE_OP_NEGATE:
148
378M
            ok = !ok;
149
378M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.78G
        }
175
3.78G
    }
176
1.70G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
325M
{
94
    /* check if character is a member of the given set */
95
96
325M
    int ok = 1;
97
98
667M
    for (;;) {
99
667M
        switch (*set++) {
100
101
176M
        case SRE_OP_FAILURE:
102
176M
            return !ok;
103
104
203M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
203M
            if (ch == set[0])
107
2.54M
                return ok;
108
201M
            set++;
109
201M
            break;
110
111
10.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.9M
            if (sre_category(set[0], (int) ch))
114
7.18M
                return ok;
115
3.75M
            set++;
116
3.75M
            break;
117
118
81.5M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
81.5M
            if (ch < 256 &&
121
81.5M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
42.0M
                return ok;
123
39.5M
            set += 256/SRE_CODE_BITS;
124
39.5M
            break;
125
126
159M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
159M
            if (set[0] <= ch && ch <= set[1])
129
96.8M
                return ok;
130
62.9M
            set += 2;
131
62.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
35.0M
        case SRE_OP_NEGATE:
148
35.0M
            ok = !ok;
149
35.0M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
667M
        }
175
667M
    }
176
325M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
775M
{
94
    /* check if character is a member of the given set */
95
96
775M
    int ok = 1;
97
98
1.80G
    for (;;) {
99
1.80G
        switch (*set++) {
100
101
543M
        case SRE_OP_FAILURE:
102
543M
            return !ok;
103
104
563M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
563M
            if (ch == set[0])
107
1.43M
                return ok;
108
562M
            set++;
109
562M
            break;
110
111
177k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
177k
            if (sre_category(set[0], (int) ch))
114
170k
                return ok;
115
7.31k
            set++;
116
7.31k
            break;
117
118
377M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
377M
            if (ch < 256 &&
121
377M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
143M
                return ok;
123
233M
            set += 256/SRE_CODE_BITS;
124
233M
            break;
125
126
145M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
145M
            if (set[0] <= ch && ch <= set[1])
129
87.1M
                return ok;
130
58.2M
            set += 2;
131
58.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
170M
        case SRE_OP_NEGATE:
148
170M
            ok = !ok;
149
170M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.80G
        }
175
1.80G
    }
176
775M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
608M
{
94
    /* check if character is a member of the given set */
95
96
608M
    int ok = 1;
97
98
1.31G
    for (;;) {
99
1.31G
        switch (*set++) {
100
101
390M
        case SRE_OP_FAILURE:
102
390M
            return !ok;
103
104
283M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
283M
            if (ch == set[0])
107
1.03M
                return ok;
108
282M
            set++;
109
282M
            break;
110
111
651k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
651k
            if (sre_category(set[0], (int) ch))
114
650k
                return ok;
115
899
            set++;
116
899
            break;
117
118
451M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
451M
            if (ch < 256 &&
121
451M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
206M
                return ok;
123
245M
            set += 256/SRE_CODE_BITS;
124
245M
            break;
125
126
18.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
18.6M
            if (set[0] <= ch && ch <= set[1])
129
9.71M
                return ok;
130
8.94M
            set += 2;
131
8.94M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
172M
        case SRE_OP_NEGATE:
148
172M
            ok = !ok;
149
172M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.31G
        }
175
1.31G
    }
176
608M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
669M
{
195
669M
    SRE_CODE chr;
196
669M
    SRE_CHAR c;
197
669M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
669M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
669M
    Py_ssize_t i;
200
669M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
669M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
16.5M
        end = ptr + maxcount;
205
206
669M
    switch (pattern[0]) {
207
208
588M
    case SRE_OP_IN:
209
        /* repeated set */
210
588M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
958M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
369M
            ptr++;
213
588M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
72.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
72.5M
        chr = pattern[1];
232
72.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
72.5M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
69.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
69.6M
        else
238
69.6M
#endif
239
76.9M
        while (ptr < end && *ptr == c)
240
4.41M
            ptr++;
241
72.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
7.85M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
7.85M
        chr = pattern[1];
270
7.85M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
7.85M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
3.39M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.39M
        else
276
3.39M
#endif
277
42.3M
        while (ptr < end && *ptr != c)
278
34.5M
            ptr++;
279
7.85M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
669M
    }
319
320
669M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
669M
           ptr - (SRE_CHAR*) state->ptr));
322
669M
    return ptr - (SRE_CHAR*) state->ptr;
323
669M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
159M
{
195
159M
    SRE_CODE chr;
196
159M
    SRE_CHAR c;
197
159M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
159M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
159M
    Py_ssize_t i;
200
159M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
159M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.15M
        end = ptr + maxcount;
205
206
159M
    switch (pattern[0]) {
207
208
96.9M
    case SRE_OP_IN:
209
        /* repeated set */
210
96.9M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
203M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
106M
            ptr++;
213
96.9M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
62.7M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
62.7M
        chr = pattern[1];
232
62.7M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
62.7M
        c = (SRE_CHAR) chr;
234
62.7M
#if SIZEOF_SRE_CHAR < 4
235
62.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
62.7M
        else
238
62.7M
#endif
239
64.8M
        while (ptr < end && *ptr == c)
240
2.05M
            ptr++;
241
62.7M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
197k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
197k
        chr = pattern[1];
270
197k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
197k
        c = (SRE_CHAR) chr;
272
197k
#if SIZEOF_SRE_CHAR < 4
273
197k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
197k
        else
276
197k
#endif
277
6.66M
        while (ptr < end && *ptr != c)
278
6.47M
            ptr++;
279
197k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
159M
    }
319
320
159M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
159M
           ptr - (SRE_CHAR*) state->ptr));
322
159M
    return ptr - (SRE_CHAR*) state->ptr;
323
159M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
285M
{
195
285M
    SRE_CODE chr;
196
285M
    SRE_CHAR c;
197
285M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
285M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
285M
    Py_ssize_t i;
200
285M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
285M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
6.38M
        end = ptr + maxcount;
205
206
285M
    switch (pattern[0]) {
207
208
275M
    case SRE_OP_IN:
209
        /* repeated set */
210
275M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
405M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
130M
            ptr++;
213
275M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.89M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.89M
        chr = pattern[1];
232
6.89M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.89M
        c = (SRE_CHAR) chr;
234
6.89M
#if SIZEOF_SRE_CHAR < 4
235
6.89M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.89M
        else
238
6.89M
#endif
239
9.04M
        while (ptr < end && *ptr == c)
240
2.15M
            ptr++;
241
6.89M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.19M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.19M
        chr = pattern[1];
270
3.19M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.19M
        c = (SRE_CHAR) chr;
272
3.19M
#if SIZEOF_SRE_CHAR < 4
273
3.19M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.19M
        else
276
3.19M
#endif
277
10.0M
        while (ptr < end && *ptr != c)
278
6.88M
            ptr++;
279
3.19M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
285M
    }
319
320
285M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
285M
           ptr - (SRE_CHAR*) state->ptr));
322
285M
    return ptr - (SRE_CHAR*) state->ptr;
323
285M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
223M
{
195
223M
    SRE_CODE chr;
196
223M
    SRE_CHAR c;
197
223M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
223M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
223M
    Py_ssize_t i;
200
223M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
223M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.96M
        end = ptr + maxcount;
205
206
223M
    switch (pattern[0]) {
207
208
216M
    case SRE_OP_IN:
209
        /* repeated set */
210
216M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
349M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
132M
            ptr++;
213
216M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.84M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.84M
        chr = pattern[1];
232
2.84M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.84M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
3.04M
        while (ptr < end && *ptr == c)
240
198k
            ptr++;
241
2.84M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
4.46M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
4.46M
        chr = pattern[1];
270
4.46M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
4.46M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
25.6M
        while (ptr < end && *ptr != c)
278
21.1M
            ptr++;
279
4.46M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
223M
    }
319
320
223M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
223M
           ptr - (SRE_CHAR*) state->ptr));
322
223M
    return ptr - (SRE_CHAR*) state->ptr;
323
223M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
787M
    do { \
355
787M
        ctx->lastmark = state->lastmark; \
356
787M
        ctx->lastindex = state->lastindex; \
357
787M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
259M
    do { \
360
259M
        state->lastmark = ctx->lastmark; \
361
259M
        state->lastindex = ctx->lastindex; \
362
259M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
266M
    do { \
366
266M
        TRACE(("push last_ptr: %zd", \
367
266M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
266M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
266M
    } while (0)
370
#define LAST_PTR_POP()  \
371
266M
    do { \
372
266M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
266M
        TRACE(("pop last_ptr: %zd", \
374
266M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
266M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
619M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
977M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.50G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
145M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
97.1M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.59G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.59G
do { \
390
1.59G
    alloc_pos = state->data_stack_base; \
391
1.59G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.59G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.59G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
168M
        int j = data_stack_grow(state, sizeof(type)); \
395
168M
        if (j < 0) return j; \
396
168M
        if (ctx_pos != -1) \
397
168M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
168M
    } \
399
1.59G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.59G
    state->data_stack_base += sizeof(type); \
401
1.59G
} while (0)
402
403
1.76G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.76G
do { \
405
1.76G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.76G
    ptr = (type*)(state->data_stack+pos); \
407
1.76G
} while (0)
408
409
665M
#define DATA_STACK_PUSH(state, data, size) \
410
665M
do { \
411
665M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
665M
           data, state->data_stack_base, size)); \
413
665M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
87.3k
        int j = data_stack_grow(state, size); \
415
87.3k
        if (j < 0) return j; \
416
87.3k
        if (ctx_pos != -1) \
417
87.3k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
87.3k
    } \
419
665M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
665M
    state->data_stack_base += size; \
421
665M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
401M
#define DATA_STACK_POP(state, data, size, discard) \
427
401M
do { \
428
401M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
401M
           data, state->data_stack_base-size, size)); \
430
401M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
401M
    if (discard) \
432
401M
        state->data_stack_base -= size; \
433
401M
} while (0)
434
435
1.86G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.86G
do { \
437
1.86G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.86G
           state->data_stack_base-size, size)); \
439
1.86G
    state->data_stack_base -= size; \
440
1.86G
} while(0)
441
442
#define DATA_PUSH(x) \
443
266M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
266M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.59G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.59G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.76G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
657M
    do if (lastmark >= 0) { \
473
398M
        MARK_TRACE("push", (lastmark)); \
474
398M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
398M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
657M
    } while (0)
477
#define MARK_POP(lastmark) \
478
178M
    do if (lastmark >= 0) { \
479
132M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
132M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
132M
        MARK_TRACE("pop", (lastmark)); \
482
178M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.35M
    do if (lastmark >= 0) { \
485
2.35M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.35M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.35M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.35M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
478M
    do if (lastmark >= 0) { \
491
265M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
265M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
265M
        MARK_TRACE("pop discard", (lastmark)); \
494
478M
    } while (0)
495
496
504M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
266M
#define JUMP_MAX_UNTIL_2     2
499
145M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
144M
#define JUMP_REPEAT          7
504
18.7M
#define JUMP_REPEAT_ONE_1    8
505
194M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
179M
#define JUMP_BRANCH          11
508
97.1M
#define JUMP_ASSERT          12
509
46.2M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.09G
    ctx->pattern = pattern; \
516
1.09G
    ctx->ptr = ptr; \
517
1.09G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.09G
    nextctx->pattern = nextpattern; \
519
1.09G
    nextctx->toplevel = toplevel_; \
520
1.09G
    nextctx->jump = jumpvalue; \
521
1.09G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.09G
    pattern = nextpattern; \
523
1.09G
    ctx_pos = alloc_pos; \
524
1.09G
    ctx = nextctx; \
525
1.09G
    goto entrance; \
526
1.09G
    jumplabel: \
527
1.09G
    pattern = ctx->pattern; \
528
1.09G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
949M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
143M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.79G
    do {                                                           \
553
2.79G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.79G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.79G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.89G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.79G
        do {                               \
588
2.79G
            MAYBE_CHECK_SIGNALS;           \
589
2.79G
            goto *sre_targets[*pattern++]; \
590
2.79G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
504M
{
601
504M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
504M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
504M
    Py_ssize_t ret = 0;
604
504M
    int jump;
605
504M
    unsigned int sigcount = state->sigcount;
606
607
504M
    SRE(match_context)* ctx;
608
504M
    SRE(match_context)* nextctx;
609
504M
    INIT_TRACE(state);
610
611
504M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
504M
    DATA_ALLOC(SRE(match_context), ctx);
614
504M
    ctx->last_ctx_pos = -1;
615
504M
    ctx->jump = JUMP_NONE;
616
504M
    ctx->toplevel = toplevel;
617
504M
    ctx_pos = alloc_pos;
618
619
504M
#if USE_COMPUTED_GOTOS
620
504M
#include "sre_targets.h"
621
504M
#endif
622
623
1.59G
entrance:
624
625
1.59G
    ;  // Fashion statement.
626
1.59G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.59G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
85.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.21M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.21M
                   end - ptr, (size_t) pattern[3]));
634
6.21M
            RETURN_FAILURE;
635
6.21M
        }
636
79.7M
        pattern += pattern[1] + 1;
637
79.7M
    }
638
639
1.59G
#if USE_COMPUTED_GOTOS
640
1.59G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.59G
    {
647
648
1.59G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
542M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
542M
                   ptr, pattern[0]));
653
542M
            {
654
542M
                int i = pattern[0];
655
542M
                if (i & 1)
656
84.0M
                    state->lastindex = i/2 + 1;
657
542M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
534M
                    int j = state->lastmark + 1;
663
542M
                    while (j < i)
664
8.00M
                        state->mark[j++] = NULL;
665
534M
                    state->lastmark = i;
666
534M
                }
667
542M
                state->mark[i] = ptr;
668
542M
            }
669
542M
            pattern++;
670
542M
            DISPATCH;
671
672
542M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
165M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
165M
                   ptr, *pattern));
677
165M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
58.4M
                RETURN_FAILURE;
679
106M
            pattern++;
680
106M
            ptr++;
681
106M
            DISPATCH;
682
683
106M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
242M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
242M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
242M
            if (ctx->toplevel &&
698
242M
                ((state->match_all && ptr != state->end) ||
699
69.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
242M
            state->ptr = ptr;
704
242M
            RETURN_SUCCESS;
705
706
14.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.5M
            if (!SRE(at)(state, ptr, *pattern))
711
4.06M
                RETURN_FAILURE;
712
10.4M
            pattern++;
713
10.4M
            DISPATCH;
714
715
10.4M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
299M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
299M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
299M
            if (ptr >= end ||
749
299M
                !SRE(charset)(state, pattern + 1, *ptr))
750
7.68M
                RETURN_FAILURE;
751
291M
            pattern += pattern[0];
752
291M
            ptr++;
753
291M
            DISPATCH;
754
755
291M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.88M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.88M
                   pattern, ptr, pattern[0]));
758
5.88M
            if (ptr >= end ||
759
5.88M
                sre_lower_ascii(*ptr) != *pattern)
760
346k
                RETURN_FAILURE;
761
5.54M
            pattern++;
762
5.54M
            ptr++;
763
5.54M
            DISPATCH;
764
765
5.54M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
105M
        TARGET(SRE_OP_JUMP):
845
105M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
105M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
105M
                   ptr, pattern[0]));
850
105M
            pattern += pattern[0];
851
105M
            DISPATCH;
852
853
178M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
178M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
178M
            LASTMARK_SAVE();
858
178M
            if (state->repeat)
859
129M
                MARK_PUSH(ctx->lastmark);
860
438M
            for (; pattern[0]; pattern += pattern[0]) {
861
361M
                if (pattern[1] == SRE_OP_LITERAL &&
862
361M
                    (ptr >= end ||
863
168M
                     (SRE_CODE) *ptr != pattern[2]))
864
89.6M
                    continue;
865
271M
                if (pattern[1] == SRE_OP_IN &&
866
271M
                    (ptr >= end ||
867
121M
                     !SRE(charset)(state, pattern + 3,
868
121M
                                   (SRE_CODE) *ptr)))
869
92.6M
                    continue;
870
179M
                state->ptr = ptr;
871
179M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
179M
                if (ret) {
873
102M
                    if (state->repeat)
874
83.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
102M
                    RETURN_ON_ERROR(ret);
876
102M
                    RETURN_SUCCESS;
877
102M
                }
878
77.0M
                if (state->repeat)
879
27.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
77.0M
                LASTMARK_RESTORE();
881
77.0M
            }
882
76.4M
            if (state->repeat)
883
46.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
76.4M
            RETURN_FAILURE;
885
886
670M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
670M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
670M
                   pattern[1], pattern[2]));
898
899
670M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.14M
                RETURN_FAILURE; /* cannot match */
901
902
669M
            state->ptr = ptr;
903
904
669M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
669M
            RETURN_ON_ERROR(ret);
906
669M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
669M
            ctx->count = ret;
908
669M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
669M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
373M
                RETURN_FAILURE;
917
918
295M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
295M
                ptr == state->end &&
920
295M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
73.3k
            {
922
                /* tail is empty.  we're finished */
923
73.3k
                state->ptr = ptr;
924
73.3k
                RETURN_SUCCESS;
925
73.3k
            }
926
927
295M
            LASTMARK_SAVE();
928
295M
            if (state->repeat)
929
214M
                MARK_PUSH(ctx->lastmark);
930
931
295M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
103M
                ctx->u.chr = pattern[pattern[0]+1];
935
103M
                for (;;) {
936
229M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
229M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
126M
                        ptr--;
939
126M
                        ctx->count--;
940
126M
                    }
941
103M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
84.6M
                        break;
943
18.7M
                    state->ptr = ptr;
944
18.7M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
18.7M
                            pattern+pattern[0]);
946
18.7M
                    if (ret) {
947
18.7M
                        if (state->repeat)
948
17.0M
                            MARK_POP_DISCARD(ctx->lastmark);
949
18.7M
                        RETURN_ON_ERROR(ret);
950
18.7M
                        RETURN_SUCCESS;
951
18.7M
                    }
952
552
                    if (state->repeat)
953
552
                        MARK_POP_KEEP(ctx->lastmark);
954
552
                    LASTMARK_RESTORE();
955
956
552
                    ptr--;
957
552
                    ctx->count--;
958
552
                }
959
84.6M
                if (state->repeat)
960
83.2M
                    MARK_POP_DISCARD(ctx->lastmark);
961
191M
            } else {
962
                /* general case */
963
195M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
194M
                    state->ptr = ptr;
965
194M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
194M
                            pattern+pattern[0]);
967
194M
                    if (ret) {
968
190M
                        if (state->repeat)
969
113M
                            MARK_POP_DISCARD(ctx->lastmark);
970
190M
                        RETURN_ON_ERROR(ret);
971
190M
                        RETURN_SUCCESS;
972
190M
                    }
973
3.93M
                    if (state->repeat)
974
2.32M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.93M
                    LASTMARK_RESTORE();
976
977
3.93M
                    ptr--;
978
3.93M
                    ctx->count--;
979
3.93M
                }
980
1.41M
                if (state->repeat)
981
1.21M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.41M
            }
983
86.0M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
144M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
144M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
144M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
144M
            ctx->u.rep = repeat_pool_malloc(state);
1127
144M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
144M
            ctx->u.rep->count = -1;
1131
144M
            ctx->u.rep->pattern = pattern;
1132
144M
            ctx->u.rep->prev = state->repeat;
1133
144M
            ctx->u.rep->last_ptr = NULL;
1134
144M
            state->repeat = ctx->u.rep;
1135
1136
144M
            state->ptr = ptr;
1137
144M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
144M
            state->repeat = ctx->u.rep->prev;
1139
144M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
144M
            if (ret) {
1142
144M
                RETURN_ON_ERROR(ret);
1143
144M
                RETURN_SUCCESS;
1144
144M
            }
1145
104k
            RETURN_FAILURE;
1146
1147
280M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
280M
            ctx->u.rep = state->repeat;
1155
280M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
280M
            state->ptr = ptr;
1159
1160
280M
            ctx->count = ctx->u.rep->count+1;
1161
1162
280M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
280M
                   ptr, ctx->count));
1164
1165
280M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
280M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
280M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
280M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
266M
                ctx->u.rep->count = ctx->count;
1185
266M
                LASTMARK_SAVE();
1186
266M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
266M
                LAST_PTR_PUSH();
1189
266M
                ctx->u.rep->last_ptr = state->ptr;
1190
266M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
266M
                        ctx->u.rep->pattern+3);
1192
266M
                LAST_PTR_POP();
1193
266M
                if (ret) {
1194
134M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
134M
                    RETURN_ON_ERROR(ret);
1196
134M
                    RETURN_SUCCESS;
1197
134M
                }
1198
132M
                MARK_POP(ctx->lastmark);
1199
132M
                LASTMARK_RESTORE();
1200
132M
                ctx->u.rep->count = ctx->count-1;
1201
132M
                state->ptr = ptr;
1202
132M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
145M
            state->repeat = ctx->u.rep->prev;
1207
145M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
145M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
145M
            RETURN_ON_SUCCESS(ret);
1211
1.27M
            state->ptr = ptr;
1212
1.27M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
97.1M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
97.1M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
97.1M
                   ptr, pattern[1]));
1565
97.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
97.1M
            state->ptr = ptr - pattern[1];
1568
97.1M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
97.1M
            RETURN_ON_FAILURE(ret);
1570
92.9M
            pattern += pattern[0];
1571
92.9M
            DISPATCH;
1572
1573
92.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
46.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
46.2M
                   ptr, pattern[1]));
1578
46.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
46.2M
                state->ptr = ptr - pattern[1];
1580
46.2M
                LASTMARK_SAVE();
1581
46.2M
                if (state->repeat)
1582
46.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
92.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
92.4M
                if (ret) {
1586
21.7k
                    if (state->repeat)
1587
21.7k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
21.7k
                    RETURN_ON_ERROR(ret);
1589
21.7k
                    RETURN_FAILURE;
1590
21.7k
                }
1591
46.1M
                if (state->repeat)
1592
46.1M
                    MARK_POP(ctx->lastmark);
1593
46.1M
                LASTMARK_RESTORE();
1594
46.1M
            }
1595
46.1M
            pattern += pattern[0];
1596
46.1M
            DISPATCH;
1597
1598
46.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.59G
exit:
1620
1.59G
    ctx_pos = ctx->last_ctx_pos;
1621
1.59G
    jump = ctx->jump;
1622
1.59G
    DATA_POP_DISCARD(ctx);
1623
1.59G
    if (ctx_pos == -1) {
1624
504M
        state->sigcount = sigcount;
1625
504M
        return ret;
1626
504M
    }
1627
1.09G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.09G
    switch (jump) {
1630
266M
        case JUMP_MAX_UNTIL_2:
1631
266M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
266M
            goto jump_max_until_2;
1633
145M
        case JUMP_MAX_UNTIL_3:
1634
145M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
145M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
179M
        case JUMP_BRANCH:
1643
179M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
179M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
144M
        case JUMP_REPEAT:
1658
144M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
144M
            goto jump_repeat;
1660
18.7M
        case JUMP_REPEAT_ONE_1:
1661
18.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
18.7M
            goto jump_repeat_one_1;
1663
194M
        case JUMP_REPEAT_ONE_2:
1664
194M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
194M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
97.1M
        case JUMP_ASSERT:
1673
97.1M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
97.1M
            goto jump_assert;
1675
46.2M
        case JUMP_ASSERT_NOT:
1676
46.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
46.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.09G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.09G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
157M
{
601
157M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
157M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
157M
    Py_ssize_t ret = 0;
604
157M
    int jump;
605
157M
    unsigned int sigcount = state->sigcount;
606
607
157M
    SRE(match_context)* ctx;
608
157M
    SRE(match_context)* nextctx;
609
157M
    INIT_TRACE(state);
610
611
157M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
157M
    DATA_ALLOC(SRE(match_context), ctx);
614
157M
    ctx->last_ctx_pos = -1;
615
157M
    ctx->jump = JUMP_NONE;
616
157M
    ctx->toplevel = toplevel;
617
157M
    ctx_pos = alloc_pos;
618
619
157M
#if USE_COMPUTED_GOTOS
620
157M
#include "sre_targets.h"
621
157M
#endif
622
623
315M
entrance:
624
625
315M
    ;  // Fashion statement.
626
315M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
315M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.21M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.21M
                   end - ptr, (size_t) pattern[3]));
634
6.21M
            RETURN_FAILURE;
635
6.21M
        }
636
23.6M
        pattern += pattern[1] + 1;
637
23.6M
    }
638
639
309M
#if USE_COMPUTED_GOTOS
640
309M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
309M
    {
647
648
309M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
137M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
137M
                   ptr, pattern[0]));
653
137M
            {
654
137M
                int i = pattern[0];
655
137M
                if (i & 1)
656
20.8M
                    state->lastindex = i/2 + 1;
657
137M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
133M
                    int j = state->lastmark + 1;
663
137M
                    while (j < i)
664
3.75M
                        state->mark[j++] = NULL;
665
133M
                    state->lastmark = i;
666
133M
                }
667
137M
                state->mark[i] = ptr;
668
137M
            }
669
137M
            pattern++;
670
137M
            DISPATCH;
671
672
137M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
52.1M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
52.1M
                   ptr, *pattern));
677
52.1M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
16.9M
                RETURN_FAILURE;
679
35.1M
            pattern++;
680
35.1M
            ptr++;
681
35.1M
            DISPATCH;
682
683
35.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
49.7M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
49.7M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
49.7M
            if (ctx->toplevel &&
698
49.7M
                ((state->match_all && ptr != state->end) ||
699
16.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
49.7M
            state->ptr = ptr;
704
49.7M
            RETURN_SUCCESS;
705
706
13.0M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.0M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.0M
            if (!SRE(at)(state, ptr, *pattern))
711
2.57M
                RETURN_FAILURE;
712
10.4M
            pattern++;
713
10.4M
            DISPATCH;
714
715
10.4M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
35.7M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
35.7M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
35.7M
            if (ptr >= end ||
749
35.7M
                !SRE(charset)(state, pattern + 1, *ptr))
750
395k
                RETURN_FAILURE;
751
35.3M
            pattern += pattern[0];
752
35.3M
            ptr++;
753
35.3M
            DISPATCH;
754
755
35.3M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
988k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
988k
                   pattern, ptr, pattern[0]));
758
988k
            if (ptr >= end ||
759
988k
                sre_lower_ascii(*ptr) != *pattern)
760
169k
                RETURN_FAILURE;
761
819k
            pattern++;
762
819k
            ptr++;
763
819k
            DISPATCH;
764
765
819k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.3M
        TARGET(SRE_OP_JUMP):
845
25.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.3M
                   ptr, pattern[0]));
850
25.3M
            pattern += pattern[0];
851
25.3M
            DISPATCH;
852
853
51.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
51.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
51.6M
            LASTMARK_SAVE();
858
51.6M
            if (state->repeat)
859
9.75M
                MARK_PUSH(ctx->lastmark);
860
156M
            for (; pattern[0]; pattern += pattern[0]) {
861
128M
                if (pattern[1] == SRE_OP_LITERAL &&
862
128M
                    (ptr >= end ||
863
55.6M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.6M
                    continue;
865
102M
                if (pattern[1] == SRE_OP_IN &&
866
102M
                    (ptr >= end ||
867
10.8M
                     !SRE(charset)(state, pattern + 3,
868
10.8M
                                   (SRE_CODE) *ptr)))
869
6.27M
                    continue;
870
95.9M
                state->ptr = ptr;
871
95.9M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
95.9M
                if (ret) {
873
23.5M
                    if (state->repeat)
874
9.45M
                        MARK_POP_DISCARD(ctx->lastmark);
875
23.5M
                    RETURN_ON_ERROR(ret);
876
23.5M
                    RETURN_SUCCESS;
877
23.5M
                }
878
72.4M
                if (state->repeat)
879
6.19k
                    MARK_POP_KEEP(ctx->lastmark);
880
72.4M
                LASTMARK_RESTORE();
881
72.4M
            }
882
28.1M
            if (state->repeat)
883
295k
                MARK_POP_DISCARD(ctx->lastmark);
884
28.1M
            RETURN_FAILURE;
885
886
160M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
160M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
160M
                   pattern[1], pattern[2]));
898
899
160M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
956k
                RETURN_FAILURE; /* cannot match */
901
902
159M
            state->ptr = ptr;
903
904
159M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
159M
            RETURN_ON_ERROR(ret);
906
159M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
159M
            ctx->count = ret;
908
159M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
159M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
132M
                RETURN_FAILURE;
917
918
27.5M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
27.5M
                ptr == state->end &&
920
27.5M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
53.4k
            {
922
                /* tail is empty.  we're finished */
923
53.4k
                state->ptr = ptr;
924
53.4k
                RETURN_SUCCESS;
925
53.4k
            }
926
927
27.5M
            LASTMARK_SAVE();
928
27.5M
            if (state->repeat)
929
12.4M
                MARK_PUSH(ctx->lastmark);
930
931
27.5M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.17M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.17M
                for (;;) {
936
17.0M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
17.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
11.8M
                        ptr--;
939
11.8M
                        ctx->count--;
940
11.8M
                    }
941
5.17M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.00M
                        break;
943
2.17M
                    state->ptr = ptr;
944
2.17M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.17M
                            pattern+pattern[0]);
946
2.17M
                    if (ret) {
947
2.17M
                        if (state->repeat)
948
467k
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.17M
                        RETURN_ON_ERROR(ret);
950
2.17M
                        RETURN_SUCCESS;
951
2.17M
                    }
952
109
                    if (state->repeat)
953
109
                        MARK_POP_KEEP(ctx->lastmark);
954
109
                    LASTMARK_RESTORE();
955
956
109
                    ptr--;
957
109
                    ctx->count--;
958
109
                }
959
3.00M
                if (state->repeat)
960
1.63M
                    MARK_POP_DISCARD(ctx->lastmark);
961
22.3M
            } else {
962
                /* general case */
963
24.3M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
23.4M
                    state->ptr = ptr;
965
23.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
23.4M
                            pattern+pattern[0]);
967
23.4M
                    if (ret) {
968
21.4M
                        if (state->repeat)
969
9.64M
                            MARK_POP_DISCARD(ctx->lastmark);
970
21.4M
                        RETURN_ON_ERROR(ret);
971
21.4M
                        RETURN_SUCCESS;
972
21.4M
                    }
973
1.97M
                    if (state->repeat)
974
1.19M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.97M
                    LASTMARK_RESTORE();
976
977
1.97M
                    ptr--;
978
1.97M
                    ctx->count--;
979
1.97M
                }
980
844k
                if (state->repeat)
981
645k
                    MARK_POP_DISCARD(ctx->lastmark);
982
844k
            }
983
3.84M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
5.92M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
5.92M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
5.92M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
5.92M
            ctx->u.rep = repeat_pool_malloc(state);
1127
5.92M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
5.92M
            ctx->u.rep->count = -1;
1131
5.92M
            ctx->u.rep->pattern = pattern;
1132
5.92M
            ctx->u.rep->prev = state->repeat;
1133
5.92M
            ctx->u.rep->last_ptr = NULL;
1134
5.92M
            state->repeat = ctx->u.rep;
1135
1136
5.92M
            state->ptr = ptr;
1137
5.92M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
5.92M
            state->repeat = ctx->u.rep->prev;
1139
5.92M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
5.92M
            if (ret) {
1142
5.82M
                RETURN_ON_ERROR(ret);
1143
5.82M
                RETURN_SUCCESS;
1144
5.82M
            }
1145
103k
            RETURN_FAILURE;
1146
1147
20.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
20.6M
            ctx->u.rep = state->repeat;
1155
20.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
20.6M
            state->ptr = ptr;
1159
1160
20.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
20.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
20.6M
                   ptr, ctx->count));
1164
1165
20.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
20.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
20.6M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
20.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
16.7M
                ctx->u.rep->count = ctx->count;
1185
16.7M
                LASTMARK_SAVE();
1186
16.7M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
16.7M
                LAST_PTR_PUSH();
1189
16.7M
                ctx->u.rep->last_ptr = state->ptr;
1190
16.7M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
16.7M
                        ctx->u.rep->pattern+3);
1192
16.7M
                LAST_PTR_POP();
1193
16.7M
                if (ret) {
1194
14.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
14.1M
                    RETURN_ON_ERROR(ret);
1196
14.1M
                    RETURN_SUCCESS;
1197
14.1M
                }
1198
2.60M
                MARK_POP(ctx->lastmark);
1199
2.60M
                LASTMARK_RESTORE();
1200
2.60M
                ctx->u.rep->count = ctx->count-1;
1201
2.60M
                state->ptr = ptr;
1202
2.60M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.52M
            state->repeat = ctx->u.rep->prev;
1207
6.52M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.52M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.52M
            RETURN_ON_SUCCESS(ret);
1211
705k
            state->ptr = ptr;
1212
705k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
1.97M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
1.97M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
1.97M
                   ptr, pattern[1]));
1565
1.97M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
1.97M
            state->ptr = ptr - pattern[1];
1568
1.97M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
1.97M
            RETURN_ON_FAILURE(ret);
1570
1.91M
            pattern += pattern[0];
1571
1.91M
            DISPATCH;
1572
1573
4.73M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
4.73M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
4.73M
                   ptr, pattern[1]));
1578
4.73M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
4.73M
                state->ptr = ptr - pattern[1];
1580
4.73M
                LASTMARK_SAVE();
1581
4.73M
                if (state->repeat)
1582
4.73M
                    MARK_PUSH(ctx->lastmark);
1583
1584
9.46M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
9.46M
                if (ret) {
1586
1.20k
                    if (state->repeat)
1587
1.20k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.20k
                    RETURN_ON_ERROR(ret);
1589
1.20k
                    RETURN_FAILURE;
1590
1.20k
                }
1591
4.72M
                if (state->repeat)
1592
4.72M
                    MARK_POP(ctx->lastmark);
1593
4.72M
                LASTMARK_RESTORE();
1594
4.72M
            }
1595
4.72M
            pattern += pattern[0];
1596
4.72M
            DISPATCH;
1597
1598
4.72M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
315M
exit:
1620
315M
    ctx_pos = ctx->last_ctx_pos;
1621
315M
    jump = ctx->jump;
1622
315M
    DATA_POP_DISCARD(ctx);
1623
315M
    if (ctx_pos == -1) {
1624
157M
        state->sigcount = sigcount;
1625
157M
        return ret;
1626
157M
    }
1627
157M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
157M
    switch (jump) {
1630
16.7M
        case JUMP_MAX_UNTIL_2:
1631
16.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
16.7M
            goto jump_max_until_2;
1633
6.52M
        case JUMP_MAX_UNTIL_3:
1634
6.52M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.52M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
95.9M
        case JUMP_BRANCH:
1643
95.9M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
95.9M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
5.92M
        case JUMP_REPEAT:
1658
5.92M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
5.92M
            goto jump_repeat;
1660
2.17M
        case JUMP_REPEAT_ONE_1:
1661
2.17M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.17M
            goto jump_repeat_one_1;
1663
23.4M
        case JUMP_REPEAT_ONE_2:
1664
23.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
23.4M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
1.97M
        case JUMP_ASSERT:
1673
1.97M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
1.97M
            goto jump_assert;
1675
4.73M
        case JUMP_ASSERT_NOT:
1676
4.73M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
4.73M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
157M
    }
1683
1684
0
    return ret; /* should never get here */
1685
157M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
237M
{
601
237M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
237M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
237M
    Py_ssize_t ret = 0;
604
237M
    int jump;
605
237M
    unsigned int sigcount = state->sigcount;
606
607
237M
    SRE(match_context)* ctx;
608
237M
    SRE(match_context)* nextctx;
609
237M
    INIT_TRACE(state);
610
611
237M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
237M
    DATA_ALLOC(SRE(match_context), ctx);
614
237M
    ctx->last_ctx_pos = -1;
615
237M
    ctx->jump = JUMP_NONE;
616
237M
    ctx->toplevel = toplevel;
617
237M
    ctx_pos = alloc_pos;
618
619
237M
#if USE_COMPUTED_GOTOS
620
237M
#include "sre_targets.h"
621
237M
#endif
622
623
660M
entrance:
624
625
660M
    ;  // Fashion statement.
626
660M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
660M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
706
            TRACE(("reject (got %tu chars, need %zu)\n",
633
706
                   end - ptr, (size_t) pattern[3]));
634
706
            RETURN_FAILURE;
635
706
        }
636
29.6M
        pattern += pattern[1] + 1;
637
29.6M
    }
638
639
660M
#if USE_COMPUTED_GOTOS
640
660M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
660M
    {
647
648
660M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
238M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
238M
                   ptr, pattern[0]));
653
238M
            {
654
238M
                int i = pattern[0];
655
238M
                if (i & 1)
656
28.3M
                    state->lastindex = i/2 + 1;
657
238M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
236M
                    int j = state->lastmark + 1;
663
237M
                    while (j < i)
664
1.74M
                        state->mark[j++] = NULL;
665
236M
                    state->lastmark = i;
666
236M
                }
667
238M
                state->mark[i] = ptr;
668
238M
            }
669
238M
            pattern++;
670
238M
            DISPATCH;
671
672
238M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
59.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
59.4M
                   ptr, *pattern));
677
59.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
22.2M
                RETURN_FAILURE;
679
37.2M
            pattern++;
680
37.2M
            ptr++;
681
37.2M
            DISPATCH;
682
683
37.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
107M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
107M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
107M
            if (ctx->toplevel &&
698
107M
                ((state->match_all && ptr != state->end) ||
699
26.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
107M
            state->ptr = ptr;
704
107M
            RETURN_SUCCESS;
705
706
936k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
936k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
936k
            if (!SRE(at)(state, ptr, *pattern))
711
905k
                RETURN_FAILURE;
712
30.6k
            pattern++;
713
30.6k
            DISPATCH;
714
715
30.6k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
136M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
136M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
136M
            if (ptr >= end ||
749
136M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.85M
                RETURN_FAILURE;
751
131M
            pattern += pattern[0];
752
131M
            ptr++;
753
131M
            DISPATCH;
754
755
131M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.95M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.95M
                   pattern, ptr, pattern[0]));
758
3.95M
            if (ptr >= end ||
759
3.95M
                sre_lower_ascii(*ptr) != *pattern)
760
157k
                RETURN_FAILURE;
761
3.79M
            pattern++;
762
3.79M
            ptr++;
763
3.79M
            DISPATCH;
764
765
3.79M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
36.5M
        TARGET(SRE_OP_JUMP):
845
36.5M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
36.5M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
36.5M
                   ptr, pattern[0]));
850
36.5M
            pattern += pattern[0];
851
36.5M
            DISPATCH;
852
853
58.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
58.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
58.0M
            LASTMARK_SAVE();
858
58.0M
            if (state->repeat)
859
54.5M
                MARK_PUSH(ctx->lastmark);
860
130M
            for (; pattern[0]; pattern += pattern[0]) {
861
108M
                if (pattern[1] == SRE_OP_LITERAL &&
862
108M
                    (ptr >= end ||
863
52.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.8M
                    continue;
865
81.8M
                if (pattern[1] == SRE_OP_IN &&
866
81.8M
                    (ptr >= end ||
867
50.8M
                     !SRE(charset)(state, pattern + 3,
868
50.8M
                                   (SRE_CODE) *ptr)))
869
42.3M
                    continue;
870
39.5M
                state->ptr = ptr;
871
39.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
39.5M
                if (ret) {
873
36.0M
                    if (state->repeat)
874
33.9M
                        MARK_POP_DISCARD(ctx->lastmark);
875
36.0M
                    RETURN_ON_ERROR(ret);
876
36.0M
                    RETURN_SUCCESS;
877
36.0M
                }
878
3.43M
                if (state->repeat)
879
5.94k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.43M
                LASTMARK_RESTORE();
881
3.43M
            }
882
22.0M
            if (state->repeat)
883
20.5M
                MARK_POP_DISCARD(ctx->lastmark);
884
22.0M
            RETURN_FAILURE;
885
886
285M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
285M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
285M
                   pattern[1], pattern[2]));
898
899
285M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
159k
                RETURN_FAILURE; /* cannot match */
901
902
285M
            state->ptr = ptr;
903
904
285M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
285M
            RETURN_ON_ERROR(ret);
906
285M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
285M
            ctx->count = ret;
908
285M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
285M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
166M
                RETURN_FAILURE;
917
918
119M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
119M
                ptr == state->end &&
920
119M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
16.3k
            {
922
                /* tail is empty.  we're finished */
923
16.3k
                state->ptr = ptr;
924
16.3k
                RETURN_SUCCESS;
925
16.3k
            }
926
927
119M
            LASTMARK_SAVE();
928
119M
            if (state->repeat)
929
86.4M
                MARK_PUSH(ctx->lastmark);
930
931
119M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
41.7M
                ctx->u.chr = pattern[pattern[0]+1];
935
41.7M
                for (;;) {
936
81.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
81.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
40.1M
                        ptr--;
939
40.1M
                        ctx->count--;
940
40.1M
                    }
941
41.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
34.8M
                        break;
943
6.96M
                    state->ptr = ptr;
944
6.96M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.96M
                            pattern+pattern[0]);
946
6.96M
                    if (ret) {
947
6.95M
                        if (state->repeat)
948
6.93M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.95M
                        RETURN_ON_ERROR(ret);
950
6.95M
                        RETURN_SUCCESS;
951
6.95M
                    }
952
216
                    if (state->repeat)
953
216
                        MARK_POP_KEEP(ctx->lastmark);
954
216
                    LASTMARK_RESTORE();
955
956
216
                    ptr--;
957
216
                    ctx->count--;
958
216
                }
959
34.8M
                if (state->repeat)
960
34.7M
                    MARK_POP_DISCARD(ctx->lastmark);
961
77.8M
            } else {
962
                /* general case */
963
78.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
78.4M
                    state->ptr = ptr;
965
78.4M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
78.4M
                            pattern+pattern[0]);
967
78.4M
                    if (ret) {
968
77.3M
                        if (state->repeat)
969
44.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
77.3M
                        RETURN_ON_ERROR(ret);
971
77.3M
                        RETURN_SUCCESS;
972
77.3M
                    }
973
1.13M
                    if (state->repeat)
974
963k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.13M
                    LASTMARK_RESTORE();
976
977
1.13M
                    ptr--;
978
1.13M
                    ctx->count--;
979
1.13M
                }
980
483k
                if (state->repeat)
981
481k
                    MARK_POP_DISCARD(ctx->lastmark);
982
483k
            }
983
35.2M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
61.0M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
61.0M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
61.0M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
61.0M
            ctx->u.rep = repeat_pool_malloc(state);
1127
61.0M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
61.0M
            ctx->u.rep->count = -1;
1131
61.0M
            ctx->u.rep->pattern = pattern;
1132
61.0M
            ctx->u.rep->prev = state->repeat;
1133
61.0M
            ctx->u.rep->last_ptr = NULL;
1134
61.0M
            state->repeat = ctx->u.rep;
1135
1136
61.0M
            state->ptr = ptr;
1137
61.0M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
61.0M
            state->repeat = ctx->u.rep->prev;
1139
61.0M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
61.0M
            if (ret) {
1142
61.0M
                RETURN_ON_ERROR(ret);
1143
61.0M
                RETURN_SUCCESS;
1144
61.0M
            }
1145
929
            RETURN_FAILURE;
1146
1147
115M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
115M
            ctx->u.rep = state->repeat;
1155
115M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
115M
            state->ptr = ptr;
1159
1160
115M
            ctx->count = ctx->u.rep->count+1;
1161
1162
115M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
115M
                   ptr, ctx->count));
1164
1165
115M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
115M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
115M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
115M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
111M
                ctx->u.rep->count = ctx->count;
1185
111M
                LASTMARK_SAVE();
1186
111M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
111M
                LAST_PTR_PUSH();
1189
111M
                ctx->u.rep->last_ptr = state->ptr;
1190
111M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
111M
                        ctx->u.rep->pattern+3);
1192
111M
                LAST_PTR_POP();
1193
111M
                if (ret) {
1194
53.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
53.9M
                    RETURN_ON_ERROR(ret);
1196
53.9M
                    RETURN_SUCCESS;
1197
53.9M
                }
1198
57.3M
                MARK_POP(ctx->lastmark);
1199
57.3M
                LASTMARK_RESTORE();
1200
57.3M
                ctx->u.rep->count = ctx->count-1;
1201
57.3M
                state->ptr = ptr;
1202
57.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
61.5M
            state->repeat = ctx->u.rep->prev;
1207
61.5M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
61.5M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
61.5M
            RETURN_ON_SUCCESS(ret);
1211
482k
            state->ptr = ptr;
1212
482k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
42.6M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
42.6M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
42.6M
                   ptr, pattern[1]));
1565
42.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
42.6M
            state->ptr = ptr - pattern[1];
1568
42.6M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
42.6M
            RETURN_ON_FAILURE(ret);
1570
39.0M
            pattern += pattern[0];
1571
39.0M
            DISPATCH;
1572
1573
39.0M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.2M
                   ptr, pattern[1]));
1578
22.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.2M
                state->ptr = ptr - pattern[1];
1580
22.2M
                LASTMARK_SAVE();
1581
22.2M
                if (state->repeat)
1582
22.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
44.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
44.5M
                if (ret) {
1586
5.71k
                    if (state->repeat)
1587
5.71k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.71k
                    RETURN_ON_ERROR(ret);
1589
5.71k
                    RETURN_FAILURE;
1590
5.71k
                }
1591
22.2M
                if (state->repeat)
1592
22.2M
                    MARK_POP(ctx->lastmark);
1593
22.2M
                LASTMARK_RESTORE();
1594
22.2M
            }
1595
22.2M
            pattern += pattern[0];
1596
22.2M
            DISPATCH;
1597
1598
22.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
660M
exit:
1620
660M
    ctx_pos = ctx->last_ctx_pos;
1621
660M
    jump = ctx->jump;
1622
660M
    DATA_POP_DISCARD(ctx);
1623
660M
    if (ctx_pos == -1) {
1624
237M
        state->sigcount = sigcount;
1625
237M
        return ret;
1626
237M
    }
1627
423M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
423M
    switch (jump) {
1630
111M
        case JUMP_MAX_UNTIL_2:
1631
111M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
111M
            goto jump_max_until_2;
1633
61.5M
        case JUMP_MAX_UNTIL_3:
1634
61.5M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
61.5M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
39.5M
        case JUMP_BRANCH:
1643
39.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
39.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
61.0M
        case JUMP_REPEAT:
1658
61.0M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
61.0M
            goto jump_repeat;
1660
6.96M
        case JUMP_REPEAT_ONE_1:
1661
6.96M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.96M
            goto jump_repeat_one_1;
1663
78.4M
        case JUMP_REPEAT_ONE_2:
1664
78.4M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
78.4M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
42.6M
        case JUMP_ASSERT:
1673
42.6M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
42.6M
            goto jump_assert;
1675
22.2M
        case JUMP_ASSERT_NOT:
1676
22.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
423M
    }
1683
1684
0
    return ret; /* should never get here */
1685
423M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
109M
{
601
109M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
109M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
109M
    Py_ssize_t ret = 0;
604
109M
    int jump;
605
109M
    unsigned int sigcount = state->sigcount;
606
607
109M
    SRE(match_context)* ctx;
608
109M
    SRE(match_context)* nextctx;
609
109M
    INIT_TRACE(state);
610
611
109M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
109M
    DATA_ALLOC(SRE(match_context), ctx);
614
109M
    ctx->last_ctx_pos = -1;
615
109M
    ctx->jump = JUMP_NONE;
616
109M
    ctx->toplevel = toplevel;
617
109M
    ctx_pos = alloc_pos;
618
619
109M
#if USE_COMPUTED_GOTOS
620
109M
#include "sre_targets.h"
621
109M
#endif
622
623
621M
entrance:
624
625
621M
    ;  // Fashion statement.
626
621M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
621M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
26.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
340
            TRACE(("reject (got %tu chars, need %zu)\n",
633
340
                   end - ptr, (size_t) pattern[3]));
634
340
            RETURN_FAILURE;
635
340
        }
636
26.5M
        pattern += pattern[1] + 1;
637
26.5M
    }
638
639
621M
#if USE_COMPUTED_GOTOS
640
621M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
621M
    {
647
648
621M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
167M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
167M
                   ptr, pattern[0]));
653
167M
            {
654
167M
                int i = pattern[0];
655
167M
                if (i & 1)
656
34.8M
                    state->lastindex = i/2 + 1;
657
167M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
164M
                    int j = state->lastmark + 1;
663
166M
                    while (j < i)
664
2.49M
                        state->mark[j++] = NULL;
665
164M
                    state->lastmark = i;
666
164M
                }
667
167M
                state->mark[i] = ptr;
668
167M
            }
669
167M
            pattern++;
670
167M
            DISPATCH;
671
672
167M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.4M
                   ptr, *pattern));
677
53.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
19.2M
                RETURN_FAILURE;
679
34.2M
            pattern++;
680
34.2M
            ptr++;
681
34.2M
            DISPATCH;
682
683
34.2M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
85.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
85.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
85.1M
            if (ctx->toplevel &&
698
85.1M
                ((state->match_all && ptr != state->end) ||
699
26.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
85.1M
            state->ptr = ptr;
704
85.1M
            RETURN_SUCCESS;
705
706
588k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
588k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
588k
            if (!SRE(at)(state, ptr, *pattern))
711
583k
                RETURN_FAILURE;
712
5.27k
            pattern++;
713
5.27k
            DISPATCH;
714
715
5.27k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
126M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
126M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
126M
            if (ptr >= end ||
749
126M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.43M
                RETURN_FAILURE;
751
124M
            pattern += pattern[0];
752
124M
            ptr++;
753
124M
            DISPATCH;
754
755
124M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
941k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
941k
                   pattern, ptr, pattern[0]));
758
941k
            if (ptr >= end ||
759
941k
                sre_lower_ascii(*ptr) != *pattern)
760
18.8k
                RETURN_FAILURE;
761
922k
            pattern++;
762
922k
            ptr++;
763
922k
            DISPATCH;
764
765
922k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
43.1M
        TARGET(SRE_OP_JUMP):
845
43.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
43.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
43.1M
                   ptr, pattern[0]));
850
43.1M
            pattern += pattern[0];
851
43.1M
            DISPATCH;
852
853
68.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
68.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
68.9M
            LASTMARK_SAVE();
858
68.9M
            if (state->repeat)
859
65.2M
                MARK_PUSH(ctx->lastmark);
860
150M
            for (; pattern[0]; pattern += pattern[0]) {
861
124M
                if (pattern[1] == SRE_OP_LITERAL &&
862
124M
                    (ptr >= end ||
863
60.4M
                     (SRE_CODE) *ptr != pattern[2]))
864
36.2M
                    continue;
865
87.8M
                if (pattern[1] == SRE_OP_IN &&
866
87.8M
                    (ptr >= end ||
867
60.1M
                     !SRE(charset)(state, pattern + 3,
868
60.1M
                                   (SRE_CODE) *ptr)))
869
44.0M
                    continue;
870
43.8M
                state->ptr = ptr;
871
43.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
43.8M
                if (ret) {
873
42.6M
                    if (state->repeat)
874
39.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
42.6M
                    RETURN_ON_ERROR(ret);
876
42.6M
                    RETURN_SUCCESS;
877
42.6M
                }
878
1.21M
                if (state->repeat)
879
15.0k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.21M
                LASTMARK_RESTORE();
881
1.21M
            }
882
26.3M
            if (state->repeat)
883
25.4M
                MARK_POP_DISCARD(ctx->lastmark);
884
26.3M
            RETURN_FAILURE;
885
886
223M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
223M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
223M
                   pattern[1], pattern[2]));
898
899
223M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
25.6k
                RETURN_FAILURE; /* cannot match */
901
902
223M
            state->ptr = ptr;
903
904
223M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
223M
            RETURN_ON_ERROR(ret);
906
223M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
223M
            ctx->count = ret;
908
223M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
223M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
75.4M
                RETURN_FAILURE;
917
918
148M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
148M
                ptr == state->end &&
920
148M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.55k
            {
922
                /* tail is empty.  we're finished */
923
3.55k
                state->ptr = ptr;
924
3.55k
                RETURN_SUCCESS;
925
3.55k
            }
926
927
148M
            LASTMARK_SAVE();
928
148M
            if (state->repeat)
929
115M
                MARK_PUSH(ctx->lastmark);
930
931
148M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
56.4M
                ctx->u.chr = pattern[pattern[0]+1];
935
56.4M
                for (;;) {
936
130M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
130M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
74.1M
                        ptr--;
939
74.1M
                        ctx->count--;
940
74.1M
                    }
941
56.4M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
46.8M
                        break;
943
9.62M
                    state->ptr = ptr;
944
9.62M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
9.62M
                            pattern+pattern[0]);
946
9.62M
                    if (ret) {
947
9.62M
                        if (state->repeat)
948
9.62M
                            MARK_POP_DISCARD(ctx->lastmark);
949
9.62M
                        RETURN_ON_ERROR(ret);
950
9.62M
                        RETURN_SUCCESS;
951
9.62M
                    }
952
227
                    if (state->repeat)
953
227
                        MARK_POP_KEEP(ctx->lastmark);
954
227
                    LASTMARK_RESTORE();
955
956
227
                    ptr--;
957
227
                    ctx->count--;
958
227
                }
959
46.8M
                if (state->repeat)
960
46.8M
                    MARK_POP_DISCARD(ctx->lastmark);
961
91.6M
            } else {
962
                /* general case */
963
92.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
92.3M
                    state->ptr = ptr;
965
92.3M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
92.3M
                            pattern+pattern[0]);
967
92.3M
                    if (ret) {
968
91.5M
                        if (state->repeat)
969
59.2M
                            MARK_POP_DISCARD(ctx->lastmark);
970
91.5M
                        RETURN_ON_ERROR(ret);
971
91.5M
                        RETURN_SUCCESS;
972
91.5M
                    }
973
820k
                    if (state->repeat)
974
170k
                        MARK_POP_KEEP(ctx->lastmark);
975
820k
                    LASTMARK_RESTORE();
976
977
820k
                    ptr--;
978
820k
                    ctx->count--;
979
820k
                }
980
86.1k
                if (state->repeat)
981
85.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
86.1k
            }
983
46.9M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
77.6M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
77.6M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
77.6M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
77.6M
            ctx->u.rep = repeat_pool_malloc(state);
1127
77.6M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
77.6M
            ctx->u.rep->count = -1;
1131
77.6M
            ctx->u.rep->pattern = pattern;
1132
77.6M
            ctx->u.rep->prev = state->repeat;
1133
77.6M
            ctx->u.rep->last_ptr = NULL;
1134
77.6M
            state->repeat = ctx->u.rep;
1135
1136
77.6M
            state->ptr = ptr;
1137
77.6M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
77.6M
            state->repeat = ctx->u.rep->prev;
1139
77.6M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
77.6M
            if (ret) {
1142
77.6M
                RETURN_ON_ERROR(ret);
1143
77.6M
                RETURN_SUCCESS;
1144
77.6M
            }
1145
775
            RETURN_FAILURE;
1146
1147
144M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
144M
            ctx->u.rep = state->repeat;
1155
144M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
144M
            state->ptr = ptr;
1159
1160
144M
            ctx->count = ctx->u.rep->count+1;
1161
1162
144M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
144M
                   ptr, ctx->count));
1164
1165
144M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
144M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
144M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
144M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
138M
                ctx->u.rep->count = ctx->count;
1185
138M
                LASTMARK_SAVE();
1186
138M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
138M
                LAST_PTR_PUSH();
1189
138M
                ctx->u.rep->last_ptr = state->ptr;
1190
138M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
138M
                        ctx->u.rep->pattern+3);
1192
138M
                LAST_PTR_POP();
1193
138M
                if (ret) {
1194
66.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
66.4M
                    RETURN_ON_ERROR(ret);
1196
66.4M
                    RETURN_SUCCESS;
1197
66.4M
                }
1198
72.4M
                MARK_POP(ctx->lastmark);
1199
72.4M
                LASTMARK_RESTORE();
1200
72.4M
                ctx->u.rep->count = ctx->count-1;
1201
72.4M
                state->ptr = ptr;
1202
72.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
77.7M
            state->repeat = ctx->u.rep->prev;
1207
77.7M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
77.7M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
77.7M
            RETURN_ON_SUCCESS(ret);
1211
85.9k
            state->ptr = ptr;
1212
85.9k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
52.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
52.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
52.5M
                   ptr, pattern[1]));
1565
52.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
52.5M
            state->ptr = ptr - pattern[1];
1568
52.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
52.5M
            RETURN_ON_FAILURE(ret);
1570
52.0M
            pattern += pattern[0];
1571
52.0M
            DISPATCH;
1572
1573
52.0M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
19.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
19.1M
                   ptr, pattern[1]));
1578
19.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
19.1M
                state->ptr = ptr - pattern[1];
1580
19.1M
                LASTMARK_SAVE();
1581
19.1M
                if (state->repeat)
1582
19.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
38.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
38.3M
                if (ret) {
1586
14.8k
                    if (state->repeat)
1587
14.8k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
14.8k
                    RETURN_ON_ERROR(ret);
1589
14.8k
                    RETURN_FAILURE;
1590
14.8k
                }
1591
19.1M
                if (state->repeat)
1592
19.1M
                    MARK_POP(ctx->lastmark);
1593
19.1M
                LASTMARK_RESTORE();
1594
19.1M
            }
1595
19.1M
            pattern += pattern[0];
1596
19.1M
            DISPATCH;
1597
1598
19.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
621M
exit:
1620
621M
    ctx_pos = ctx->last_ctx_pos;
1621
621M
    jump = ctx->jump;
1622
621M
    DATA_POP_DISCARD(ctx);
1623
621M
    if (ctx_pos == -1) {
1624
109M
        state->sigcount = sigcount;
1625
109M
        return ret;
1626
109M
    }
1627
511M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
511M
    switch (jump) {
1630
138M
        case JUMP_MAX_UNTIL_2:
1631
138M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
138M
            goto jump_max_until_2;
1633
77.7M
        case JUMP_MAX_UNTIL_3:
1634
77.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
77.7M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
43.8M
        case JUMP_BRANCH:
1643
43.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
43.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
77.6M
        case JUMP_REPEAT:
1658
77.6M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
77.6M
            goto jump_repeat;
1660
9.62M
        case JUMP_REPEAT_ONE_1:
1661
9.62M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
9.62M
            goto jump_repeat_one_1;
1663
92.3M
        case JUMP_REPEAT_ONE_2:
1664
92.3M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
92.3M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
52.5M
        case JUMP_ASSERT:
1673
52.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
52.5M
            goto jump_assert;
1675
19.1M
        case JUMP_ASSERT_NOT:
1676
19.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
19.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
511M
    }
1683
1684
0
    return ret; /* should never get here */
1685
511M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
337M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
87.3M
{
1694
87.3M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
87.3M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
87.3M
    Py_ssize_t status = 0;
1697
87.3M
    Py_ssize_t prefix_len = 0;
1698
87.3M
    Py_ssize_t prefix_skip = 0;
1699
87.3M
    SRE_CODE* prefix = NULL;
1700
87.3M
    SRE_CODE* charset = NULL;
1701
87.3M
    SRE_CODE* overlap = NULL;
1702
87.3M
    int flags = 0;
1703
87.3M
    INIT_TRACE(state);
1704
1705
87.3M
    if (ptr > end)
1706
0
        return 0;
1707
1708
87.3M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
87.3M
        flags = pattern[2];
1713
1714
87.3M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.73M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.73M
                   end - ptr, (size_t) pattern[3]));
1717
1.73M
            return 0;
1718
1.73M
        }
1719
85.6M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
8.22M
            end -= pattern[3] - 1;
1723
8.22M
            if (end <= ptr)
1724
0
                end = ptr;
1725
8.22M
        }
1726
1727
85.6M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
8.23M
            prefix_len = pattern[5];
1731
8.23M
            prefix_skip = pattern[6];
1732
8.23M
            prefix = pattern + 7;
1733
8.23M
            overlap = prefix + prefix_len - 1;
1734
77.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
73.1M
            charset = pattern + 5;
1738
1739
85.6M
        pattern += 1 + pattern[1];
1740
85.6M
    }
1741
1742
85.6M
    TRACE(("prefix = %p %zd %zd\n",
1743
85.6M
           prefix, prefix_len, prefix_skip));
1744
85.6M
    TRACE(("charset = %p\n", charset));
1745
1746
85.6M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
7.72M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.84M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.84M
#endif
1753
4.84M
        end = (SRE_CHAR *)state->end;
1754
4.84M
        state->must_advance = 0;
1755
8.46M
        while (ptr < end) {
1756
106M
            while (*ptr != c) {
1757
98.1M
                if (++ptr >= end)
1758
570k
                    return 0;
1759
98.1M
            }
1760
7.88M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.88M
            state->start = ptr;
1762
7.88M
            state->ptr = ptr + prefix_skip;
1763
7.88M
            if (flags & SRE_INFO_LITERAL)
1764
4.50k
                return 1; /* we got all of it */
1765
7.87M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.87M
            if (status != 0)
1767
7.14M
                return status;
1768
734k
            ++ptr;
1769
734k
            RESET_CAPTURE_GROUP();
1770
734k
        }
1771
10.5k
        return 0;
1772
4.84M
    }
1773
1774
77.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
500k
        Py_ssize_t i = 0;
1778
1779
500k
        end = (SRE_CHAR *)state->end;
1780
500k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.48M
        for (i = 0; i < prefix_len; i++)
1784
992k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
496k
#endif
1787
1.37M
        while (ptr < end) {
1788
1.37M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.37M
            while (*ptr++ != c) {
1790
7.00M
                if (ptr >= end)
1791
298
                    return 0;
1792
7.00M
            }
1793
1.37M
            if (ptr >= end)
1794
49
                return 0;
1795
1796
1.37M
            i = 1;
1797
1.37M
            state->must_advance = 0;
1798
1.37M
            do {
1799
1.37M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.21M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.21M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.21M
                    state->start = ptr - (prefix_len - 1);
1808
1.21M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.21M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.21M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.21M
                    if (status != 0)
1813
500k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
713k
                    if (++ptr >= end)
1816
25
                        return 0;
1817
713k
                    RESET_CAPTURE_GROUP();
1818
713k
                }
1819
872k
                i = overlap[i];
1820
872k
            } while (i != 0);
1821
1.37M
        }
1822
0
        return 0;
1823
500k
    }
1824
1825
77.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
73.1M
        end = (SRE_CHAR *)state->end;
1828
73.1M
        state->must_advance = 0;
1829
75.9M
        for (;;) {
1830
338M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
262M
                ptr++;
1832
75.9M
            if (ptr >= end)
1833
3.69M
                return 0;
1834
72.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
72.2M
            state->start = ptr;
1836
72.2M
            state->ptr = ptr;
1837
72.2M
            status = SRE(match)(state, pattern, 0);
1838
72.2M
            if (status != 0)
1839
69.4M
                break;
1840
2.79M
            ptr++;
1841
2.79M
            RESET_CAPTURE_GROUP();
1842
2.79M
        }
1843
73.1M
    } else {
1844
        /* general case */
1845
4.22M
        assert(ptr <= end);
1846
4.22M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.22M
        state->start = state->ptr = ptr;
1848
4.22M
        status = SRE(match)(state, pattern, 1);
1849
4.22M
        state->must_advance = 0;
1850
4.22M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.22M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
337M
        while (status == 0 && ptr < end) {
1858
332M
            ptr++;
1859
332M
            RESET_CAPTURE_GROUP();
1860
332M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
332M
            state->start = state->ptr = ptr;
1862
332M
            status = SRE(match)(state, pattern, 0);
1863
332M
        }
1864
4.22M
    }
1865
1866
73.6M
    return status;
1867
77.3M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
36.6M
{
1694
36.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
36.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
36.6M
    Py_ssize_t status = 0;
1697
36.6M
    Py_ssize_t prefix_len = 0;
1698
36.6M
    Py_ssize_t prefix_skip = 0;
1699
36.6M
    SRE_CODE* prefix = NULL;
1700
36.6M
    SRE_CODE* charset = NULL;
1701
36.6M
    SRE_CODE* overlap = NULL;
1702
36.6M
    int flags = 0;
1703
36.6M
    INIT_TRACE(state);
1704
1705
36.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
36.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
36.6M
        flags = pattern[2];
1713
1714
36.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.61M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.61M
                   end - ptr, (size_t) pattern[3]));
1717
1.61M
            return 0;
1718
1.61M
        }
1719
35.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.75M
            end -= pattern[3] - 1;
1723
2.75M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.75M
        }
1726
1727
35.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.75M
            prefix_len = pattern[5];
1731
2.75M
            prefix_skip = pattern[6];
1732
2.75M
            prefix = pattern + 7;
1733
2.75M
            overlap = prefix + prefix_len - 1;
1734
32.3M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
29.1M
            charset = pattern + 5;
1738
1739
35.0M
        pattern += 1 + pattern[1];
1740
35.0M
    }
1741
1742
35.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
35.0M
           prefix, prefix_len, prefix_skip));
1744
35.0M
    TRACE(("charset = %p\n", charset));
1745
1746
35.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.74M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.74M
#if SIZEOF_SRE_CHAR < 4
1750
2.74M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.74M
#endif
1753
2.74M
        end = (SRE_CHAR *)state->end;
1754
2.74M
        state->must_advance = 0;
1755
2.92M
        while (ptr < end) {
1756
32.2M
            while (*ptr != c) {
1757
29.8M
                if (++ptr >= end)
1758
503k
                    return 0;
1759
29.8M
            }
1760
2.41M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.41M
            state->start = ptr;
1762
2.41M
            state->ptr = ptr + prefix_skip;
1763
2.41M
            if (flags & SRE_INFO_LITERAL)
1764
343
                return 1; /* we got all of it */
1765
2.41M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.41M
            if (status != 0)
1767
2.22M
                return status;
1768
182k
            ++ptr;
1769
182k
            RESET_CAPTURE_GROUP();
1770
182k
        }
1771
8.11k
        return 0;
1772
2.74M
    }
1773
1774
32.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
11.7k
        Py_ssize_t i = 0;
1778
1779
11.7k
        end = (SRE_CHAR *)state->end;
1780
11.7k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
11.7k
#if SIZEOF_SRE_CHAR < 4
1783
35.3k
        for (i = 0; i < prefix_len; i++)
1784
23.5k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
11.7k
#endif
1787
277k
        while (ptr < end) {
1788
277k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.16M
            while (*ptr++ != c) {
1790
1.88M
                if (ptr >= end)
1791
63
                    return 0;
1792
1.88M
            }
1793
277k
            if (ptr >= end)
1794
23
                return 0;
1795
1796
277k
            i = 1;
1797
277k
            state->must_advance = 0;
1798
277k
            do {
1799
277k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
206k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
206k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
206k
                    state->start = ptr - (prefix_len - 1);
1808
206k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
206k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
206k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
206k
                    if (status != 0)
1813
11.6k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
194k
                    if (++ptr >= end)
1816
9
                        return 0;
1817
194k
                    RESET_CAPTURE_GROUP();
1818
194k
                }
1819
265k
                i = overlap[i];
1820
265k
            } while (i != 0);
1821
277k
        }
1822
0
        return 0;
1823
11.7k
    }
1824
1825
32.3M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
29.1M
        end = (SRE_CHAR *)state->end;
1828
29.1M
        state->must_advance = 0;
1829
31.0M
        for (;;) {
1830
81.6M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
50.6M
                ptr++;
1832
31.0M
            if (ptr >= end)
1833
2.61M
                return 0;
1834
28.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
28.3M
            state->start = ptr;
1836
28.3M
            state->ptr = ptr;
1837
28.3M
            status = SRE(match)(state, pattern, 0);
1838
28.3M
            if (status != 0)
1839
26.5M
                break;
1840
1.87M
            ptr++;
1841
1.87M
            RESET_CAPTURE_GROUP();
1842
1.87M
        }
1843
29.1M
    } else {
1844
        /* general case */
1845
3.16M
        assert(ptr <= end);
1846
3.16M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.16M
        state->start = state->ptr = ptr;
1848
3.16M
        status = SRE(match)(state, pattern, 1);
1849
3.16M
        state->must_advance = 0;
1850
3.16M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.16M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
97.0M
        while (status == 0 && ptr < end) {
1858
93.8M
            ptr++;
1859
93.8M
            RESET_CAPTURE_GROUP();
1860
93.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
93.8M
            state->start = state->ptr = ptr;
1862
93.8M
            status = SRE(match)(state, pattern, 0);
1863
93.8M
        }
1864
3.16M
    }
1865
1866
29.6M
    return status;
1867
32.3M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
43.5M
{
1694
43.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
43.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
43.5M
    Py_ssize_t status = 0;
1697
43.5M
    Py_ssize_t prefix_len = 0;
1698
43.5M
    Py_ssize_t prefix_skip = 0;
1699
43.5M
    SRE_CODE* prefix = NULL;
1700
43.5M
    SRE_CODE* charset = NULL;
1701
43.5M
    SRE_CODE* overlap = NULL;
1702
43.5M
    int flags = 0;
1703
43.5M
    INIT_TRACE(state);
1704
1705
43.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
43.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
43.5M
        flags = pattern[2];
1713
1714
43.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
112k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
112k
                   end - ptr, (size_t) pattern[3]));
1717
112k
            return 0;
1718
112k
        }
1719
43.4M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.58M
            end -= pattern[3] - 1;
1723
2.58M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.58M
        }
1726
1727
43.4M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.59M
            prefix_len = pattern[5];
1731
2.59M
            prefix_skip = pattern[6];
1732
2.59M
            prefix = pattern + 7;
1733
2.59M
            overlap = prefix + prefix_len - 1;
1734
40.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
40.0M
            charset = pattern + 5;
1738
1739
43.4M
        pattern += 1 + pattern[1];
1740
43.4M
    }
1741
1742
43.4M
    TRACE(("prefix = %p %zd %zd\n",
1743
43.4M
           prefix, prefix_len, prefix_skip));
1744
43.4M
    TRACE(("charset = %p\n", charset));
1745
1746
43.4M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.10M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.10M
#if SIZEOF_SRE_CHAR < 4
1750
2.10M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.10M
#endif
1753
2.10M
        end = (SRE_CHAR *)state->end;
1754
2.10M
        state->must_advance = 0;
1755
2.31M
        while (ptr < end) {
1756
49.7M
            while (*ptr != c) {
1757
47.5M
                if (++ptr >= end)
1758
62.8k
                    return 0;
1759
47.5M
            }
1760
2.24M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.24M
            state->start = ptr;
1762
2.24M
            state->ptr = ptr + prefix_skip;
1763
2.24M
            if (flags & SRE_INFO_LITERAL)
1764
1.28k
                return 1; /* we got all of it */
1765
2.24M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.24M
            if (status != 0)
1767
2.04M
                return status;
1768
206k
            ++ptr;
1769
206k
            RESET_CAPTURE_GROUP();
1770
206k
        }
1771
1.45k
        return 0;
1772
2.10M
    }
1773
1774
41.3M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
484k
        Py_ssize_t i = 0;
1778
1779
484k
        end = (SRE_CHAR *)state->end;
1780
484k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
484k
#if SIZEOF_SRE_CHAR < 4
1783
1.45M
        for (i = 0; i < prefix_len; i++)
1784
968k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
484k
#endif
1787
891k
        while (ptr < end) {
1788
891k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.93M
            while (*ptr++ != c) {
1790
3.04M
                if (ptr >= end)
1791
115
                    return 0;
1792
3.04M
            }
1793
891k
            if (ptr >= end)
1794
14
                return 0;
1795
1796
891k
            i = 1;
1797
891k
            state->must_advance = 0;
1798
891k
            do {
1799
891k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
811k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
811k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
811k
                    state->start = ptr - (prefix_len - 1);
1808
811k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
811k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
811k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
811k
                    if (status != 0)
1813
484k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
327k
                    if (++ptr >= end)
1816
11
                        return 0;
1817
327k
                    RESET_CAPTURE_GROUP();
1818
327k
                }
1819
407k
                i = overlap[i];
1820
407k
            } while (i != 0);
1821
891k
        }
1822
0
        return 0;
1823
484k
    }
1824
1825
40.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
40.0M
        end = (SRE_CHAR *)state->end;
1828
40.0M
        state->must_advance = 0;
1829
40.4M
        for (;;) {
1830
183M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
142M
                ptr++;
1832
40.4M
            if (ptr >= end)
1833
1.02M
                return 0;
1834
39.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
39.4M
            state->start = ptr;
1836
39.4M
            state->ptr = ptr;
1837
39.4M
            status = SRE(match)(state, pattern, 0);
1838
39.4M
            if (status != 0)
1839
39.0M
                break;
1840
423k
            ptr++;
1841
423k
            RESET_CAPTURE_GROUP();
1842
423k
        }
1843
40.0M
    } else {
1844
        /* general case */
1845
860k
        assert(ptr <= end);
1846
860k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
860k
        state->start = state->ptr = ptr;
1848
860k
        status = SRE(match)(state, pattern, 1);
1849
860k
        state->must_advance = 0;
1850
860k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
860k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
165M
        while (status == 0 && ptr < end) {
1858
164M
            ptr++;
1859
164M
            RESET_CAPTURE_GROUP();
1860
164M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
164M
            state->start = state->ptr = ptr;
1862
164M
            status = SRE(match)(state, pattern, 0);
1863
164M
        }
1864
860k
    }
1865
1866
39.8M
    return status;
1867
40.8M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.07M
{
1694
7.07M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.07M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.07M
    Py_ssize_t status = 0;
1697
7.07M
    Py_ssize_t prefix_len = 0;
1698
7.07M
    Py_ssize_t prefix_skip = 0;
1699
7.07M
    SRE_CODE* prefix = NULL;
1700
7.07M
    SRE_CODE* charset = NULL;
1701
7.07M
    SRE_CODE* overlap = NULL;
1702
7.07M
    int flags = 0;
1703
7.07M
    INIT_TRACE(state);
1704
1705
7.07M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.07M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.07M
        flags = pattern[2];
1713
1714
7.07M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.58k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.58k
                   end - ptr, (size_t) pattern[3]));
1717
6.58k
            return 0;
1718
6.58k
        }
1719
7.07M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.88M
            end -= pattern[3] - 1;
1723
2.88M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.88M
        }
1726
1727
7.07M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.88M
            prefix_len = pattern[5];
1731
2.88M
            prefix_skip = pattern[6];
1732
2.88M
            prefix = pattern + 7;
1733
2.88M
            overlap = prefix + prefix_len - 1;
1734
4.18M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.98M
            charset = pattern + 5;
1738
1739
7.07M
        pattern += 1 + pattern[1];
1740
7.07M
    }
1741
1742
7.07M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.07M
           prefix, prefix_len, prefix_skip));
1744
7.07M
    TRACE(("charset = %p\n", charset));
1745
1746
7.07M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.88M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.88M
        end = (SRE_CHAR *)state->end;
1754
2.88M
        state->must_advance = 0;
1755
3.22M
        while (ptr < end) {
1756
23.9M
            while (*ptr != c) {
1757
20.7M
                if (++ptr >= end)
1758
4.00k
                    return 0;
1759
20.7M
            }
1760
3.22M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.22M
            state->start = ptr;
1762
3.22M
            state->ptr = ptr + prefix_skip;
1763
3.22M
            if (flags & SRE_INFO_LITERAL)
1764
2.87k
                return 1; /* we got all of it */
1765
3.22M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.22M
            if (status != 0)
1767
2.87M
                return status;
1768
345k
            ++ptr;
1769
345k
            RESET_CAPTURE_GROUP();
1770
345k
        }
1771
960
        return 0;
1772
2.88M
    }
1773
1774
4.19M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
4.69k
        Py_ssize_t i = 0;
1778
1779
4.69k
        end = (SRE_CHAR *)state->end;
1780
4.69k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
203k
        while (ptr < end) {
1788
203k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.27M
            while (*ptr++ != c) {
1790
2.07M
                if (ptr >= end)
1791
120
                    return 0;
1792
2.07M
            }
1793
202k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
202k
            i = 1;
1797
202k
            state->must_advance = 0;
1798
203k
            do {
1799
203k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
196k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
196k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
196k
                    state->start = ptr - (prefix_len - 1);
1808
196k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
196k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
196k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
196k
                    if (status != 0)
1813
4.56k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
191k
                    if (++ptr >= end)
1816
5
                        return 0;
1817
191k
                    RESET_CAPTURE_GROUP();
1818
191k
                }
1819
199k
                i = overlap[i];
1820
199k
            } while (i != 0);
1821
202k
        }
1822
0
        return 0;
1823
4.69k
    }
1824
1825
4.18M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.98M
        end = (SRE_CHAR *)state->end;
1828
3.98M
        state->must_advance = 0;
1829
4.47M
        for (;;) {
1830
73.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
68.7M
                ptr++;
1832
4.47M
            if (ptr >= end)
1833
58.3k
                return 0;
1834
4.42M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.42M
            state->start = ptr;
1836
4.42M
            state->ptr = ptr;
1837
4.42M
            status = SRE(match)(state, pattern, 0);
1838
4.42M
            if (status != 0)
1839
3.92M
                break;
1840
497k
            ptr++;
1841
497k
            RESET_CAPTURE_GROUP();
1842
497k
        }
1843
3.98M
    } else {
1844
        /* general case */
1845
203k
        assert(ptr <= end);
1846
203k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
203k
        state->start = state->ptr = ptr;
1848
203k
        status = SRE(match)(state, pattern, 1);
1849
203k
        state->must_advance = 0;
1850
203k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
203k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
75.0M
        while (status == 0 && ptr < end) {
1858
74.8M
            ptr++;
1859
74.8M
            RESET_CAPTURE_GROUP();
1860
74.8M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
74.8M
            state->start = state->ptr = ptr;
1862
74.8M
            status = SRE(match)(state, pattern, 0);
1863
74.8M
        }
1864
203k
    }
1865
1866
4.12M
    return status;
1867
4.18M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/