Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
13.2M
{
18
    /* check if pointer is at given position */
19
20
13.2M
    Py_ssize_t thisp, thatp;
21
22
13.2M
    switch (at) {
23
24
5.94M
    case SRE_AT_BEGINNING:
25
5.94M
    case SRE_AT_BEGINNING_STRING:
26
5.94M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.61M
    case SRE_AT_END:
33
4.61M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.61M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.61M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.73M
    case SRE_AT_END_STRING:
42
2.73M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
13.2M
    }
87
88
0
    return 0;
89
13.2M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
11.6M
{
18
    /* check if pointer is at given position */
19
20
11.6M
    Py_ssize_t thisp, thatp;
21
22
11.6M
    switch (at) {
23
24
5.90M
    case SRE_AT_BEGINNING:
25
5.90M
    case SRE_AT_BEGINNING_STRING:
26
5.90M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
3.90M
    case SRE_AT_END:
33
3.90M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
3.90M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
3.90M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.83M
    case SRE_AT_END_STRING:
42
1.83M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
11.6M
    }
87
88
0
    return 0;
89
11.6M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
1.12M
{
18
    /* check if pointer is at given position */
19
20
1.12M
    Py_ssize_t thisp, thatp;
21
22
1.12M
    switch (at) {
23
24
33.8k
    case SRE_AT_BEGINNING:
25
33.8k
    case SRE_AT_BEGINNING_STRING:
26
33.8k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
621k
    case SRE_AT_END:
33
621k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
621k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
621k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
469k
    case SRE_AT_END_STRING:
42
469k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.12M
    }
87
88
0
    return 0;
89
1.12M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
517k
{
18
    /* check if pointer is at given position */
19
20
517k
    Py_ssize_t thisp, thatp;
21
22
517k
    switch (at) {
23
24
5.29k
    case SRE_AT_BEGINNING:
25
5.29k
    case SRE_AT_BEGINNING_STRING:
26
5.29k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.6k
    case SRE_AT_END:
33
80.6k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
80.6k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.6k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
431k
    case SRE_AT_END_STRING:
42
431k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
517k
    }
87
88
0
    return 0;
89
517k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.89G
{
94
    /* check if character is a member of the given set */
95
96
1.89G
    int ok = 1;
97
98
4.38G
    for (;;) {
99
4.38G
        switch (*set++) {
100
101
1.26G
        case SRE_OP_FAILURE:
102
1.26G
            return !ok;
103
104
1.36G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.36G
            if (ch == set[0])
107
6.44M
                return ok;
108
1.35G
            set++;
109
1.35G
            break;
110
111
10.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.9M
            if (sre_category(set[0], (int) ch))
114
7.54M
                return ok;
115
3.44M
            set++;
116
3.44M
            break;
117
118
938M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
938M
            if (ch < 256 &&
121
938M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
425M
                return ok;
123
513M
            set += 256/SRE_CODE_BITS;
124
513M
            break;
125
126
332M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
332M
            if (set[0] <= ch && ch <= set[1])
129
197M
                return ok;
130
135M
            set += 2;
131
135M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
478M
        case SRE_OP_NEGATE:
148
478M
            ok = !ok;
149
478M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
4.38G
        }
175
4.38G
    }
176
1.89G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
370M
{
94
    /* check if character is a member of the given set */
95
96
370M
    int ok = 1;
97
98
833M
    for (;;) {
99
833M
        switch (*set++) {
100
101
234M
        case SRE_OP_FAILURE:
102
234M
            return !ok;
103
104
327M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
327M
            if (ch == set[0])
107
2.59M
                return ok;
108
324M
            set++;
109
324M
            break;
110
111
9.85M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
9.85M
            if (sre_category(set[0], (int) ch))
114
6.47M
                return ok;
115
3.38M
            set++;
116
3.38M
            break;
117
118
71.0M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
71.0M
            if (ch < 256 &&
121
71.0M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
32.9M
                return ok;
123
38.0M
            set += 256/SRE_CODE_BITS;
124
38.0M
            break;
125
126
154M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
154M
            if (set[0] <= ch && ch <= set[1])
129
94.3M
                return ok;
130
59.6M
            set += 2;
131
59.6M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
36.3M
        case SRE_OP_NEGATE:
148
36.3M
            ok = !ok;
149
36.3M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
833M
        }
175
833M
    }
176
370M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
806M
{
94
    /* check if character is a member of the given set */
95
96
806M
    int ok = 1;
97
98
1.97G
    for (;;) {
99
1.97G
        switch (*set++) {
100
101
591M
        case SRE_OP_FAILURE:
102
591M
            return !ok;
103
104
735M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
735M
            if (ch == set[0])
107
1.56M
                return ok;
108
734M
            set++;
109
734M
            break;
110
111
168k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
168k
            if (sre_category(set[0], (int) ch))
114
147k
                return ok;
115
21.0k
            set++;
116
21.0k
            break;
117
118
314M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
314M
            if (ch < 256 &&
121
314M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
121M
                return ok;
123
193M
            set += 256/SRE_CODE_BITS;
124
193M
            break;
125
126
154M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
154M
            if (set[0] <= ch && ch <= set[1])
129
91.6M
                return ok;
130
62.7M
            set += 2;
131
62.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
175M
        case SRE_OP_NEGATE:
148
175M
            ok = !ok;
149
175M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.97G
        }
175
1.97G
    }
176
806M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
721M
{
94
    /* check if character is a member of the given set */
95
96
721M
    int ok = 1;
97
98
1.57G
    for (;;) {
99
1.57G
        switch (*set++) {
100
101
436M
        case SRE_OP_FAILURE:
102
436M
            return !ok;
103
104
298M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
298M
            if (ch == set[0])
107
2.28M
                return ok;
108
296M
            set++;
109
296M
            break;
110
111
966k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
966k
            if (sre_category(set[0], (int) ch))
114
926k
                return ok;
115
39.7k
            set++;
116
39.7k
            break;
117
118
552M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
552M
            if (ch < 256 &&
121
552M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
270M
                return ok;
123
281M
            set += 256/SRE_CODE_BITS;
124
281M
            break;
125
126
24.6M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
24.6M
            if (set[0] <= ch && ch <= set[1])
129
11.2M
                return ok;
130
13.3M
            set += 2;
131
13.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
266M
        case SRE_OP_NEGATE:
148
266M
            ok = !ok;
149
266M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.57G
        }
175
1.57G
    }
176
721M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
720M
{
195
720M
    SRE_CODE chr;
196
720M
    SRE_CHAR c;
197
720M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
720M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
720M
    Py_ssize_t i;
200
720M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
720M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
10.4M
        end = ptr + maxcount;
205
206
720M
    switch (pattern[0]) {
207
208
663M
    case SRE_OP_IN:
209
        /* repeated set */
210
663M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
1.05G
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
394M
            ptr++;
213
663M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
52.5M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
52.5M
        chr = pattern[1];
232
52.5M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
52.5M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
50.4M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
50.4M
        else
238
50.4M
#endif
239
56.4M
        while (ptr < end && *ptr == c)
240
3.92M
            ptr++;
241
52.5M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
4.35M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
4.35M
        chr = pattern[1];
270
4.35M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
4.35M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
1.05M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
1.05M
        else
276
1.05M
#endif
277
40.1M
        while (ptr < end && *ptr != c)
278
35.7M
            ptr++;
279
4.35M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
720M
    }
319
320
720M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
720M
           ptr - (SRE_CHAR*) state->ptr));
322
720M
    return ptr - (SRE_CHAR*) state->ptr;
323
720M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
201M
{
195
201M
    SRE_CODE chr;
196
201M
    SRE_CHAR c;
197
201M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
201M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
201M
    Py_ssize_t i;
200
201M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
201M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
1.48M
        end = ptr + maxcount;
205
206
201M
    switch (pattern[0]) {
207
208
156M
    case SRE_OP_IN:
209
        /* repeated set */
210
156M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
257M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
101M
            ptr++;
213
156M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
44.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
44.6M
        chr = pattern[1];
232
44.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
44.6M
        c = (SRE_CHAR) chr;
234
44.6M
#if SIZEOF_SRE_CHAR < 4
235
44.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
44.6M
        else
238
44.6M
#endif
239
46.4M
        while (ptr < end && *ptr == c)
240
1.77M
            ptr++;
241
44.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
202k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
202k
        chr = pattern[1];
270
202k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
202k
        c = (SRE_CHAR) chr;
272
202k
#if SIZEOF_SRE_CHAR < 4
273
202k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
202k
        else
276
202k
#endif
277
368k
        while (ptr < end && *ptr != c)
278
165k
            ptr++;
279
202k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
201M
    }
319
320
201M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
201M
           ptr - (SRE_CHAR*) state->ptr));
322
201M
    return ptr - (SRE_CHAR*) state->ptr;
323
201M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
312M
{
195
312M
    SRE_CODE chr;
196
312M
    SRE_CHAR c;
197
312M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
312M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
312M
    Py_ssize_t i;
200
312M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
312M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.77M
        end = ptr + maxcount;
205
206
312M
    switch (pattern[0]) {
207
208
305M
    case SRE_OP_IN:
209
        /* repeated set */
210
305M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
438M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
132M
            ptr++;
213
305M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
5.83M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
5.83M
        chr = pattern[1];
232
5.83M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
5.83M
        c = (SRE_CHAR) chr;
234
5.83M
#if SIZEOF_SRE_CHAR < 4
235
5.83M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
5.83M
        else
238
5.83M
#endif
239
7.70M
        while (ptr < end && *ptr == c)
240
1.86M
            ptr++;
241
5.83M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
848k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
848k
        chr = pattern[1];
270
848k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
848k
        c = (SRE_CHAR) chr;
272
848k
#if SIZEOF_SRE_CHAR < 4
273
848k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
848k
        else
276
848k
#endif
277
22.8M
        while (ptr < end && *ptr != c)
278
21.9M
            ptr++;
279
848k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
312M
    }
319
320
312M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
312M
           ptr - (SRE_CHAR*) state->ptr));
322
312M
    return ptr - (SRE_CHAR*) state->ptr;
323
312M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
206M
{
195
206M
    SRE_CODE chr;
196
206M
    SRE_CHAR c;
197
206M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
206M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
206M
    Py_ssize_t i;
200
206M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
206M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.15M
        end = ptr + maxcount;
205
206
206M
    switch (pattern[0]) {
207
208
201M
    case SRE_OP_IN:
209
        /* repeated set */
210
201M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
362M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
160M
            ptr++;
213
201M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.07M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.07M
        chr = pattern[1];
232
2.07M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.07M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
2.35M
        while (ptr < end && *ptr == c)
240
277k
            ptr++;
241
2.07M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
3.30M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
3.30M
        chr = pattern[1];
270
3.30M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
3.30M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
16.9M
        while (ptr < end && *ptr != c)
278
13.6M
            ptr++;
279
3.30M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
206M
    }
319
320
206M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
206M
           ptr - (SRE_CHAR*) state->ptr));
322
206M
    return ptr - (SRE_CHAR*) state->ptr;
323
206M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
756M
    do { \
355
756M
        ctx->lastmark = state->lastmark; \
356
756M
        ctx->lastindex = state->lastindex; \
357
756M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
278M
    do { \
360
278M
        state->lastmark = ctx->lastmark; \
361
278M
        state->lastindex = ctx->lastindex; \
362
278M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
317M
    do { \
366
317M
        TRACE(("push last_ptr: %zd", \
367
317M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
317M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
317M
    } while (0)
370
#define LAST_PTR_POP()  \
371
317M
    do { \
372
317M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
317M
        TRACE(("pop last_ptr: %zd", \
374
317M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
317M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
772M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
1.10G
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.66G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
181M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
122M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.87G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.87G
do { \
390
1.87G
    alloc_pos = state->data_stack_base; \
391
1.87G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.87G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.87G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
176M
        int j = data_stack_grow(state, sizeof(type)); \
395
176M
        if (j < 0) return j; \
396
176M
        if (ctx_pos != -1) \
397
176M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
176M
    } \
399
1.87G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.87G
    state->data_stack_base += sizeof(type); \
401
1.87G
} while (0)
402
403
1.94G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.94G
do { \
405
1.94G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.94G
    ptr = (type*)(state->data_stack+pos); \
407
1.94G
} while (0)
408
409
686M
#define DATA_STACK_PUSH(state, data, size) \
410
686M
do { \
411
686M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
686M
           data, state->data_stack_base, size)); \
413
686M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
90.8k
        int j = data_stack_grow(state, size); \
415
90.8k
        if (j < 0) return j; \
416
90.8k
        if (ctx_pos != -1) \
417
90.8k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
90.8k
    } \
419
686M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
686M
    state->data_stack_base += size; \
421
686M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
470M
#define DATA_STACK_POP(state, data, size, discard) \
427
470M
do { \
428
470M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
470M
           data, state->data_stack_base-size, size)); \
430
470M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
470M
    if (discard) \
432
470M
        state->data_stack_base -= size; \
433
470M
} while (0)
434
435
2.09G
#define DATA_STACK_POP_DISCARD(state, size) \
436
2.09G
do { \
437
2.09G
    TRACE(("discard data from %zd (%zd)\n", \
438
2.09G
           state->data_stack_base-size, size)); \
439
2.09G
    state->data_stack_base -= size; \
440
2.09G
} while(0)
441
442
#define DATA_PUSH(x) \
443
317M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
317M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.87G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.87G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.94G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
631M
    do if (lastmark >= 0) { \
473
369M
        MARK_TRACE("push", (lastmark)); \
474
369M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
369M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
631M
    } while (0)
477
#define MARK_POP(lastmark) \
478
211M
    do if (lastmark >= 0) { \
479
149M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
149M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
149M
        MARK_TRACE("pop", (lastmark)); \
482
211M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.59M
    do if (lastmark >= 0) { \
485
2.59M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.59M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.59M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.59M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
419M
    do if (lastmark >= 0) { \
491
219M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
219M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
219M
        MARK_TRACE("pop discard", (lastmark)); \
494
419M
    } while (0)
495
496
649M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
317M
#define JUMP_MAX_UNTIL_2     2
499
181M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
180M
#define JUMP_REPEAT          7
504
5.53M
#define JUMP_REPEAT_ONE_1    8
505
217M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
159M
#define JUMP_BRANCH          11
508
122M
#define JUMP_ASSERT          12
509
39.2M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.22G
    ctx->pattern = pattern; \
516
1.22G
    ctx->ptr = ptr; \
517
1.22G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.22G
    nextctx->pattern = nextpattern; \
519
1.22G
    nextctx->toplevel = toplevel_; \
520
1.22G
    nextctx->jump = jumpvalue; \
521
1.22G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.22G
    pattern = nextpattern; \
523
1.22G
    ctx_pos = alloc_pos; \
524
1.22G
    ctx = nextctx; \
525
1.22G
    goto entrance; \
526
1.22G
    jumplabel: \
527
1.22G
    pattern = ctx->pattern; \
528
1.22G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
1.06G
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
161M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
3.27G
    do {                                                           \
553
3.27G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
3.27G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
3.27G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
3.37G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
3.27G
        do {                               \
588
3.27G
            MAYBE_CHECK_SIGNALS;           \
589
3.27G
            goto *sre_targets[*pattern++]; \
590
3.27G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
649M
{
601
649M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
649M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
649M
    Py_ssize_t ret = 0;
604
649M
    int jump;
605
649M
    unsigned int sigcount = state->sigcount;
606
607
649M
    SRE(match_context)* ctx;
608
649M
    SRE(match_context)* nextctx;
609
649M
    INIT_TRACE(state);
610
611
649M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
649M
    DATA_ALLOC(SRE(match_context), ctx);
614
649M
    ctx->last_ctx_pos = -1;
615
649M
    ctx->jump = JUMP_NONE;
616
649M
    ctx->toplevel = toplevel;
617
649M
    ctx_pos = alloc_pos;
618
619
649M
#if USE_COMPUTED_GOTOS
620
649M
#include "sre_targets.h"
621
649M
#endif
622
623
1.87G
entrance:
624
625
1.87G
    ;  // Fashion statement.
626
1.87G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.87G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
96.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.84M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.84M
                   end - ptr, (size_t) pattern[3]));
634
5.84M
            RETURN_FAILURE;
635
5.84M
        }
636
90.1M
        pattern += pattern[1] + 1;
637
90.1M
    }
638
639
1.86G
#if USE_COMPUTED_GOTOS
640
1.86G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.86G
    {
647
648
1.86G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
693M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
693M
                   ptr, pattern[0]));
653
693M
            {
654
693M
                int i = pattern[0];
655
693M
                if (i & 1)
656
85.0M
                    state->lastindex = i/2 + 1;
657
693M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
687M
                    int j = state->lastmark + 1;
663
692M
                    while (j < i)
664
5.03M
                        state->mark[j++] = NULL;
665
687M
                    state->lastmark = i;
666
687M
                }
667
693M
                state->mark[i] = ptr;
668
693M
            }
669
693M
            pattern++;
670
693M
            DISPATCH;
671
672
693M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
259M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
259M
                   ptr, *pattern));
677
259M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
167M
                RETURN_FAILURE;
679
91.9M
            pattern++;
680
91.9M
            ptr++;
681
91.9M
            DISPATCH;
682
683
91.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
279M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
279M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
279M
            if (ctx->toplevel &&
698
279M
                ((state->match_all && ptr != state->end) ||
699
80.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
279M
            state->ptr = ptr;
704
279M
            RETURN_SUCCESS;
705
706
13.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
13.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
13.2M
            if (!SRE(at)(state, ptr, *pattern))
711
4.10M
                RETURN_FAILURE;
712
9.18M
            pattern++;
713
9.18M
            DISPATCH;
714
715
9.18M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
357M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
357M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
357M
            if (ptr >= end ||
749
357M
                !SRE(charset)(state, pattern + 1, *ptr))
750
6.31M
                RETURN_FAILURE;
751
351M
            pattern += pattern[0];
752
351M
            ptr++;
753
351M
            DISPATCH;
754
755
351M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.75M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.75M
                   pattern, ptr, pattern[0]));
758
4.75M
            if (ptr >= end ||
759
4.75M
                sre_lower_ascii(*ptr) != *pattern)
760
389k
                RETURN_FAILURE;
761
4.36M
            pattern++;
762
4.36M
            ptr++;
763
4.36M
            DISPATCH;
764
765
4.36M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
99.7M
        TARGET(SRE_OP_JUMP):
845
99.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
99.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
99.7M
                   ptr, pattern[0]));
850
99.7M
            pattern += pattern[0];
851
99.7M
            DISPATCH;
852
853
177M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
177M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
177M
            LASTMARK_SAVE();
858
177M
            if (state->repeat)
859
136M
                MARK_PUSH(ctx->lastmark);
860
428M
            for (; pattern[0]; pattern += pattern[0]) {
861
347M
                if (pattern[1] == SRE_OP_LITERAL &&
862
347M
                    (ptr >= end ||
863
164M
                     (SRE_CODE) *ptr != pattern[2]))
864
91.4M
                    continue;
865
256M
                if (pattern[1] == SRE_OP_IN &&
866
256M
                    (ptr >= end ||
867
132M
                     !SRE(charset)(state, pattern + 3,
868
132M
                                   (SRE_CODE) *ptr)))
869
97.0M
                    continue;
870
159M
                state->ptr = ptr;
871
159M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
159M
                if (ret) {
873
97.0M
                    if (state->repeat)
874
79.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
97.0M
                    RETURN_ON_ERROR(ret);
876
97.0M
                    RETURN_SUCCESS;
877
97.0M
                }
878
62.3M
                if (state->repeat)
879
16.6k
                    MARK_POP_KEEP(ctx->lastmark);
880
62.3M
                LASTMARK_RESTORE();
881
62.3M
            }
882
80.6M
            if (state->repeat)
883
57.0M
                MARK_POP_DISCARD(ctx->lastmark);
884
80.6M
            RETURN_FAILURE;
885
886
721M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
721M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
721M
                   pattern[1], pattern[2]));
898
899
721M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.16M
                RETURN_FAILURE; /* cannot match */
901
902
720M
            state->ptr = ptr;
903
904
720M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
720M
            RETURN_ON_ERROR(ret);
906
720M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
720M
            ctx->count = ret;
908
720M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
720M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
498M
                RETURN_FAILURE;
917
918
221M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
221M
                ptr == state->end &&
920
221M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
71.2k
            {
922
                /* tail is empty.  we're finished */
923
71.2k
                state->ptr = ptr;
924
71.2k
                RETURN_SUCCESS;
925
71.2k
            }
926
927
221M
            LASTMARK_SAVE();
928
221M
            if (state->repeat)
929
138M
                MARK_PUSH(ctx->lastmark);
930
931
221M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.93M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.93M
                for (;;) {
936
37.2M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
37.2M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
30.2M
                        ptr--;
939
30.2M
                        ctx->count--;
940
30.2M
                    }
941
6.93M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.40M
                        break;
943
5.53M
                    state->ptr = ptr;
944
5.53M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.53M
                            pattern+pattern[0]);
946
5.53M
                    if (ret) {
947
5.53M
                        if (state->repeat)
948
4.35M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.53M
                        RETURN_ON_ERROR(ret);
950
5.53M
                        RETURN_SUCCESS;
951
5.53M
                    }
952
1.45k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
1.45k
                    LASTMARK_RESTORE();
955
956
1.45k
                    ptr--;
957
1.45k
                    ctx->count--;
958
1.45k
                }
959
1.40M
                if (state->repeat)
960
756
                    MARK_POP_DISCARD(ctx->lastmark);
961
214M
            } else {
962
                /* general case */
963
219M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
217M
                    state->ptr = ptr;
965
217M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
217M
                            pattern+pattern[0]);
967
217M
                    if (ret) {
968
213M
                        if (state->repeat)
969
132M
                            MARK_POP_DISCARD(ctx->lastmark);
970
213M
                        RETURN_ON_ERROR(ret);
971
213M
                        RETURN_SUCCESS;
972
213M
                    }
973
4.31M
                    if (state->repeat)
974
2.57M
                        MARK_POP_KEEP(ctx->lastmark);
975
4.31M
                    LASTMARK_RESTORE();
976
977
4.31M
                    ptr--;
978
4.31M
                    ctx->count--;
979
4.31M
                }
980
1.50M
                if (state->repeat)
981
1.33M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.50M
            }
983
2.90M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
180M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
180M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
180M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
180M
            ctx->u.rep = repeat_pool_malloc(state);
1127
180M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
180M
            ctx->u.rep->count = -1;
1131
180M
            ctx->u.rep->pattern = pattern;
1132
180M
            ctx->u.rep->prev = state->repeat;
1133
180M
            ctx->u.rep->last_ptr = NULL;
1134
180M
            state->repeat = ctx->u.rep;
1135
1136
180M
            state->ptr = ptr;
1137
180M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
180M
            state->repeat = ctx->u.rep->prev;
1139
180M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
180M
            if (ret) {
1142
180M
                RETURN_ON_ERROR(ret);
1143
180M
                RETURN_SUCCESS;
1144
180M
            }
1145
90.1k
            RETURN_FAILURE;
1146
1147
327M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
327M
            ctx->u.rep = state->repeat;
1155
327M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
327M
            state->ptr = ptr;
1159
1160
327M
            ctx->count = ctx->u.rep->count+1;
1161
1162
327M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
327M
                   ptr, ctx->count));
1164
1165
327M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
327M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
327M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
327M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
317M
                ctx->u.rep->count = ctx->count;
1185
317M
                LASTMARK_SAVE();
1186
317M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
317M
                LAST_PTR_PUSH();
1189
317M
                ctx->u.rep->last_ptr = state->ptr;
1190
317M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
317M
                        ctx->u.rep->pattern+3);
1192
317M
                LAST_PTR_POP();
1193
317M
                if (ret) {
1194
145M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
145M
                    RETURN_ON_ERROR(ret);
1196
145M
                    RETURN_SUCCESS;
1197
145M
                }
1198
172M
                MARK_POP(ctx->lastmark);
1199
172M
                LASTMARK_RESTORE();
1200
172M
                ctx->u.rep->count = ctx->count-1;
1201
172M
                state->ptr = ptr;
1202
172M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
181M
            state->repeat = ctx->u.rep->prev;
1207
181M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
181M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
181M
            RETURN_ON_SUCCESS(ret);
1211
1.38M
            state->ptr = ptr;
1212
1.38M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
122M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
122M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
122M
                   ptr, pattern[1]));
1565
122M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
122M
            state->ptr = ptr - pattern[1];
1568
122M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
122M
            RETURN_ON_FAILURE(ret);
1570
118M
            pattern += pattern[0];
1571
118M
            DISPATCH;
1572
1573
118M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
39.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
39.2M
                   ptr, pattern[1]));
1578
39.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
39.2M
                state->ptr = ptr - pattern[1];
1580
39.2M
                LASTMARK_SAVE();
1581
39.2M
                if (state->repeat)
1582
39.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
78.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
78.5M
                if (ret) {
1586
9.82k
                    if (state->repeat)
1587
9.82k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
9.82k
                    RETURN_ON_ERROR(ret);
1589
9.82k
                    RETURN_FAILURE;
1590
9.82k
                }
1591
39.2M
                if (state->repeat)
1592
39.2M
                    MARK_POP(ctx->lastmark);
1593
39.2M
                LASTMARK_RESTORE();
1594
39.2M
            }
1595
39.2M
            pattern += pattern[0];
1596
39.2M
            DISPATCH;
1597
1598
39.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.87G
exit:
1620
1.87G
    ctx_pos = ctx->last_ctx_pos;
1621
1.87G
    jump = ctx->jump;
1622
1.87G
    DATA_POP_DISCARD(ctx);
1623
1.87G
    if (ctx_pos == -1) {
1624
649M
        state->sigcount = sigcount;
1625
649M
        return ret;
1626
649M
    }
1627
1.22G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.22G
    switch (jump) {
1630
317M
        case JUMP_MAX_UNTIL_2:
1631
317M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
317M
            goto jump_max_until_2;
1633
181M
        case JUMP_MAX_UNTIL_3:
1634
181M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
181M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
159M
        case JUMP_BRANCH:
1643
159M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
159M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
180M
        case JUMP_REPEAT:
1658
180M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
180M
            goto jump_repeat;
1660
5.53M
        case JUMP_REPEAT_ONE_1:
1661
5.53M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.53M
            goto jump_repeat_one_1;
1663
217M
        case JUMP_REPEAT_ONE_2:
1664
217M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
217M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
122M
        case JUMP_ASSERT:
1673
122M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
122M
            goto jump_assert;
1675
39.2M
        case JUMP_ASSERT_NOT:
1676
39.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
39.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.22G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.22G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
211M
{
601
211M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
211M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
211M
    Py_ssize_t ret = 0;
604
211M
    int jump;
605
211M
    unsigned int sigcount = state->sigcount;
606
607
211M
    SRE(match_context)* ctx;
608
211M
    SRE(match_context)* nextctx;
609
211M
    INIT_TRACE(state);
610
611
211M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
211M
    DATA_ALLOC(SRE(match_context), ctx);
614
211M
    ctx->last_ctx_pos = -1;
615
211M
    ctx->jump = JUMP_NONE;
616
211M
    ctx->toplevel = toplevel;
617
211M
    ctx_pos = alloc_pos;
618
619
211M
#if USE_COMPUTED_GOTOS
620
211M
#include "sre_targets.h"
621
211M
#endif
622
623
345M
entrance:
624
625
345M
    ;  // Fashion statement.
626
345M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
345M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
29.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
5.84M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
5.84M
                   end - ptr, (size_t) pattern[3]));
634
5.84M
            RETURN_FAILURE;
635
5.84M
        }
636
23.9M
        pattern += pattern[1] + 1;
637
23.9M
    }
638
639
339M
#if USE_COMPUTED_GOTOS
640
339M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
339M
    {
647
648
339M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
186M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
186M
                   ptr, pattern[0]));
653
186M
            {
654
186M
                int i = pattern[0];
655
186M
                if (i & 1)
656
17.1M
                    state->lastindex = i/2 + 1;
657
186M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
183M
                    int j = state->lastmark + 1;
663
186M
                    while (j < i)
664
3.35M
                        state->mark[j++] = NULL;
665
183M
                    state->lastmark = i;
666
183M
                }
667
186M
                state->mark[i] = ptr;
668
186M
            }
669
186M
            pattern++;
670
186M
            DISPATCH;
671
672
186M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
56.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
56.2M
                   ptr, *pattern));
677
56.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
19.6M
                RETURN_FAILURE;
679
36.5M
            pattern++;
680
36.5M
            ptr++;
681
36.5M
            DISPATCH;
682
683
36.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
48.8M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
48.8M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
48.8M
            if (ctx->toplevel &&
698
48.8M
                ((state->match_all && ptr != state->end) ||
699
17.9M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
48.8M
            state->ptr = ptr;
704
48.8M
            RETURN_SUCCESS;
705
706
11.6M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
11.6M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
11.6M
            if (!SRE(at)(state, ptr, *pattern))
711
2.50M
                RETURN_FAILURE;
712
9.14M
            pattern++;
713
9.14M
            DISPATCH;
714
715
9.14M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
34.4M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
34.4M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
34.4M
            if (ptr >= end ||
749
34.4M
                !SRE(charset)(state, pattern + 1, *ptr))
750
376k
                RETURN_FAILURE;
751
34.0M
            pattern += pattern[0];
752
34.0M
            ptr++;
753
34.0M
            DISPATCH;
754
755
34.0M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.45M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.45M
                   pattern, ptr, pattern[0]));
758
1.45M
            if (ptr >= end ||
759
1.45M
                sre_lower_ascii(*ptr) != *pattern)
760
235k
                RETURN_FAILURE;
761
1.21M
            pattern++;
762
1.21M
            ptr++;
763
1.21M
            DISPATCH;
764
765
1.21M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
22.0M
        TARGET(SRE_OP_JUMP):
845
22.0M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
22.0M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
22.0M
                   ptr, pattern[0]));
850
22.0M
            pattern += pattern[0];
851
22.0M
            DISPATCH;
852
853
41.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
41.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
41.6M
            LASTMARK_SAVE();
858
41.6M
            if (state->repeat)
859
5.77M
                MARK_PUSH(ctx->lastmark);
860
124M
            for (; pattern[0]; pattern += pattern[0]) {
861
102M
                if (pattern[1] == SRE_OP_LITERAL &&
862
102M
                    (ptr >= end ||
863
51.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
20.4M
                    continue;
865
82.2M
                if (pattern[1] == SRE_OP_IN &&
866
82.2M
                    (ptr >= end ||
867
6.99M
                     !SRE(charset)(state, pattern + 3,
868
6.98M
                                   (SRE_CODE) *ptr)))
869
3.97M
                    continue;
870
78.3M
                state->ptr = ptr;
871
78.3M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
78.3M
                if (ret) {
873
20.2M
                    if (state->repeat)
874
5.63M
                        MARK_POP_DISCARD(ctx->lastmark);
875
20.2M
                    RETURN_ON_ERROR(ret);
876
20.2M
                    RETURN_SUCCESS;
877
20.2M
                }
878
58.0M
                if (state->repeat)
879
7.56k
                    MARK_POP_KEEP(ctx->lastmark);
880
58.0M
                LASTMARK_RESTORE();
881
58.0M
            }
882
21.4M
            if (state->repeat)
883
134k
                MARK_POP_DISCARD(ctx->lastmark);
884
21.4M
            RETURN_FAILURE;
885
886
201M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
201M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
201M
                   pattern[1], pattern[2]));
898
899
201M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
939k
                RETURN_FAILURE; /* cannot match */
901
902
201M
            state->ptr = ptr;
903
904
201M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
201M
            RETURN_ON_ERROR(ret);
906
201M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
201M
            ctx->count = ret;
908
201M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
201M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
178M
                RETURN_FAILURE;
917
918
22.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
22.4M
                ptr == state->end &&
920
22.4M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
51.6k
            {
922
                /* tail is empty.  we're finished */
923
51.6k
                state->ptr = ptr;
924
51.6k
                RETURN_SUCCESS;
925
51.6k
            }
926
927
22.3M
            LASTMARK_SAVE();
928
22.3M
            if (state->repeat)
929
11.7M
                MARK_PUSH(ctx->lastmark);
930
931
22.3M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
2.70M
                ctx->u.chr = pattern[pattern[0]+1];
935
2.70M
                for (;;) {
936
10.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
10.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
8.24M
                        ptr--;
939
8.24M
                        ctx->count--;
940
8.24M
                    }
941
2.70M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.36M
                        break;
943
1.34M
                    state->ptr = ptr;
944
1.34M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1.34M
                            pattern+pattern[0]);
946
1.34M
                    if (ret) {
947
1.34M
                        if (state->repeat)
948
202k
                            MARK_POP_DISCARD(ctx->lastmark);
949
1.34M
                        RETURN_ON_ERROR(ret);
950
1.34M
                        RETURN_SUCCESS;
951
1.34M
                    }
952
273
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
273
                    LASTMARK_RESTORE();
955
956
273
                    ptr--;
957
273
                    ctx->count--;
958
273
                }
959
1.36M
                if (state->repeat)
960
212
                    MARK_POP_DISCARD(ctx->lastmark);
961
19.6M
            } else {
962
                /* general case */
963
21.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
20.7M
                    state->ptr = ptr;
965
20.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
20.7M
                            pattern+pattern[0]);
967
20.7M
                    if (ret) {
968
18.8M
                        if (state->repeat)
969
10.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
18.8M
                        RETURN_ON_ERROR(ret);
971
18.8M
                        RETURN_SUCCESS;
972
18.8M
                    }
973
1.83M
                    if (state->repeat)
974
1.17M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.83M
                    LASTMARK_RESTORE();
976
977
1.83M
                    ptr--;
978
1.83M
                    ctx->count--;
979
1.83M
                }
980
798k
                if (state->repeat)
981
631k
                    MARK_POP_DISCARD(ctx->lastmark);
982
798k
            }
983
2.16M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
6.30M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
6.30M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
6.30M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
6.30M
            ctx->u.rep = repeat_pool_malloc(state);
1127
6.30M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
6.30M
            ctx->u.rep->count = -1;
1131
6.30M
            ctx->u.rep->pattern = pattern;
1132
6.30M
            ctx->u.rep->prev = state->repeat;
1133
6.30M
            ctx->u.rep->last_ptr = NULL;
1134
6.30M
            state->repeat = ctx->u.rep;
1135
1136
6.30M
            state->ptr = ptr;
1137
6.30M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
6.30M
            state->repeat = ctx->u.rep->prev;
1139
6.30M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
6.30M
            if (ret) {
1142
6.21M
                RETURN_ON_ERROR(ret);
1143
6.21M
                RETURN_SUCCESS;
1144
6.21M
            }
1145
88.6k
            RETURN_FAILURE;
1146
1147
17.9M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
17.9M
            ctx->u.rep = state->repeat;
1155
17.9M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
17.9M
            state->ptr = ptr;
1159
1160
17.9M
            ctx->count = ctx->u.rep->count+1;
1161
1162
17.9M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
17.9M
                   ptr, ctx->count));
1164
1165
17.9M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
17.9M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
17.9M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
17.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
14.3M
                ctx->u.rep->count = ctx->count;
1185
14.3M
                LASTMARK_SAVE();
1186
14.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
14.3M
                LAST_PTR_PUSH();
1189
14.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
14.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
14.3M
                        ctx->u.rep->pattern+3);
1192
14.3M
                LAST_PTR_POP();
1193
14.3M
                if (ret) {
1194
11.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
11.0M
                    RETURN_ON_ERROR(ret);
1196
11.0M
                    RETURN_SUCCESS;
1197
11.0M
                }
1198
3.29M
                MARK_POP(ctx->lastmark);
1199
3.29M
                LASTMARK_RESTORE();
1200
3.29M
                ctx->u.rep->count = ctx->count-1;
1201
3.29M
                state->ptr = ptr;
1202
3.29M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
6.90M
            state->repeat = ctx->u.rep->prev;
1207
6.90M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
6.90M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
6.90M
            RETURN_ON_SUCCESS(ret);
1211
683k
            state->ptr = ptr;
1212
683k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.90M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.90M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.90M
                   ptr, pattern[1]));
1565
2.90M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.90M
            state->ptr = ptr - pattern[1];
1568
2.90M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.90M
            RETURN_ON_FAILURE(ret);
1570
2.82M
            pattern += pattern[0];
1571
2.82M
            DISPATCH;
1572
1573
2.82M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
2.60M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
2.60M
                   ptr, pattern[1]));
1578
2.60M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
2.60M
                state->ptr = ptr - pattern[1];
1580
2.60M
                LASTMARK_SAVE();
1581
2.60M
                if (state->repeat)
1582
2.60M
                    MARK_PUSH(ctx->lastmark);
1583
1584
5.20M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
5.20M
                if (ret) {
1586
1.31k
                    if (state->repeat)
1587
1.31k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.31k
                    RETURN_ON_ERROR(ret);
1589
1.31k
                    RETURN_FAILURE;
1590
1.31k
                }
1591
2.60M
                if (state->repeat)
1592
2.60M
                    MARK_POP(ctx->lastmark);
1593
2.60M
                LASTMARK_RESTORE();
1594
2.60M
            }
1595
2.60M
            pattern += pattern[0];
1596
2.60M
            DISPATCH;
1597
1598
2.60M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
345M
exit:
1620
345M
    ctx_pos = ctx->last_ctx_pos;
1621
345M
    jump = ctx->jump;
1622
345M
    DATA_POP_DISCARD(ctx);
1623
345M
    if (ctx_pos == -1) {
1624
211M
        state->sigcount = sigcount;
1625
211M
        return ret;
1626
211M
    }
1627
133M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
133M
    switch (jump) {
1630
14.3M
        case JUMP_MAX_UNTIL_2:
1631
14.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
14.3M
            goto jump_max_until_2;
1633
6.90M
        case JUMP_MAX_UNTIL_3:
1634
6.90M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
6.90M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
78.3M
        case JUMP_BRANCH:
1643
78.3M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
78.3M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
6.30M
        case JUMP_REPEAT:
1658
6.30M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
6.30M
            goto jump_repeat;
1660
1.34M
        case JUMP_REPEAT_ONE_1:
1661
1.34M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1.34M
            goto jump_repeat_one_1;
1663
20.7M
        case JUMP_REPEAT_ONE_2:
1664
20.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
20.7M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.90M
        case JUMP_ASSERT:
1673
2.90M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.90M
            goto jump_assert;
1675
2.60M
        case JUMP_ASSERT_NOT:
1676
2.60M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
2.60M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
133M
    }
1683
1684
0
    return ret; /* should never get here */
1685
133M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
311M
{
601
311M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
311M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
311M
    Py_ssize_t ret = 0;
604
311M
    int jump;
605
311M
    unsigned int sigcount = state->sigcount;
606
607
311M
    SRE(match_context)* ctx;
608
311M
    SRE(match_context)* nextctx;
609
311M
    INIT_TRACE(state);
610
611
311M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
311M
    DATA_ALLOC(SRE(match_context), ctx);
614
311M
    ctx->last_ctx_pos = -1;
615
311M
    ctx->jump = JUMP_NONE;
616
311M
    ctx->toplevel = toplevel;
617
311M
    ctx_pos = alloc_pos;
618
619
311M
#if USE_COMPUTED_GOTOS
620
311M
#include "sre_targets.h"
621
311M
#endif
622
623
682M
entrance:
624
625
682M
    ;  // Fashion statement.
626
682M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
682M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
26.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
671
            TRACE(("reject (got %tu chars, need %zu)\n",
633
671
                   end - ptr, (size_t) pattern[3]));
634
671
            RETURN_FAILURE;
635
671
        }
636
26.1M
        pattern += pattern[1] + 1;
637
26.1M
    }
638
639
682M
#if USE_COMPUTED_GOTOS
640
682M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
682M
    {
647
648
682M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
301M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
301M
                   ptr, pattern[0]));
653
301M
            {
654
301M
                int i = pattern[0];
655
301M
                if (i & 1)
656
22.2M
                    state->lastindex = i/2 + 1;
657
301M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
301M
                    int j = state->lastmark + 1;
663
301M
                    while (j < i)
664
106k
                        state->mark[j++] = NULL;
665
301M
                    state->lastmark = i;
666
301M
                }
667
301M
                state->mark[i] = ptr;
668
301M
            }
669
301M
            pattern++;
670
301M
            DISPATCH;
671
672
301M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
79.9M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
79.9M
                   ptr, *pattern));
677
79.9M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
54.3M
                RETURN_FAILURE;
679
25.6M
            pattern++;
680
25.6M
            ptr++;
681
25.6M
            DISPATCH;
682
683
25.6M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
104M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
104M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
104M
            if (ctx->toplevel &&
698
104M
                ((state->match_all && ptr != state->end) ||
699
23.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
104M
            state->ptr = ptr;
704
104M
            RETURN_SUCCESS;
705
706
1.12M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.12M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.12M
            if (!SRE(at)(state, ptr, *pattern))
711
1.08M
                RETURN_FAILURE;
712
34.9k
            pattern++;
713
34.9k
            DISPATCH;
714
715
34.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
129M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
129M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
129M
            if (ptr >= end ||
749
129M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.46M
                RETURN_FAILURE;
751
124M
            pattern += pattern[0];
752
124M
            ptr++;
753
124M
            DISPATCH;
754
755
124M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.86M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.86M
                   pattern, ptr, pattern[0]));
758
2.86M
            if (ptr >= end ||
759
2.86M
                sre_lower_ascii(*ptr) != *pattern)
760
142k
                RETURN_FAILURE;
761
2.72M
            pattern++;
762
2.72M
            ptr++;
763
2.72M
            DISPATCH;
764
765
2.72M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
25.7M
        TARGET(SRE_OP_JUMP):
845
25.7M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
25.7M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
25.7M
                   ptr, pattern[0]));
850
25.7M
            pattern += pattern[0];
851
25.7M
            DISPATCH;
852
853
45.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
45.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
45.0M
            LASTMARK_SAVE();
858
45.0M
            if (state->repeat)
859
42.0M
                MARK_PUSH(ctx->lastmark);
860
107M
            for (; pattern[0]; pattern += pattern[0]) {
861
88.1M
                if (pattern[1] == SRE_OP_LITERAL &&
862
88.1M
                    (ptr >= end ||
863
43.2M
                     (SRE_CODE) *ptr != pattern[2]))
864
22.8M
                    continue;
865
65.2M
                if (pattern[1] == SRE_OP_IN &&
866
65.2M
                    (ptr >= end ||
867
41.2M
                     !SRE(charset)(state, pattern + 3,
868
41.2M
                                   (SRE_CODE) *ptr)))
869
36.6M
                    continue;
870
28.5M
                state->ptr = ptr;
871
28.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
28.5M
                if (ret) {
873
25.2M
                    if (state->repeat)
874
23.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
25.2M
                    RETURN_ON_ERROR(ret);
876
25.2M
                    RETURN_SUCCESS;
877
25.2M
                }
878
3.32M
                if (state->repeat)
879
3.02k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.32M
                LASTMARK_RESTORE();
881
3.32M
            }
882
19.7M
            if (state->repeat)
883
18.1M
                MARK_POP_DISCARD(ctx->lastmark);
884
19.7M
            RETURN_FAILURE;
885
886
312M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
312M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
312M
                   pattern[1], pattern[2]));
898
899
312M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
200k
                RETURN_FAILURE; /* cannot match */
901
902
312M
            state->ptr = ptr;
903
904
312M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
312M
            RETURN_ON_ERROR(ret);
906
312M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
312M
            ctx->count = ret;
908
312M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
312M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
242M
                RETURN_FAILURE;
917
918
70.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
70.1M
                ptr == state->end &&
920
70.1M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
15.5k
            {
922
                /* tail is empty.  we're finished */
923
15.5k
                state->ptr = ptr;
924
15.5k
                RETURN_SUCCESS;
925
15.5k
            }
926
927
70.1M
            LASTMARK_SAVE();
928
70.1M
            if (state->repeat)
929
41.6M
                MARK_PUSH(ctx->lastmark);
930
931
70.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
887k
                ctx->u.chr = pattern[pattern[0]+1];
935
888k
                for (;;) {
936
18.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
18.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
17.5M
                        ptr--;
939
17.5M
                        ctx->count--;
940
17.5M
                    }
941
888k
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
16.0k
                        break;
943
872k
                    state->ptr = ptr;
944
872k
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
872k
                            pattern+pattern[0]);
946
872k
                    if (ret) {
947
871k
                        if (state->repeat)
948
848k
                            MARK_POP_DISCARD(ctx->lastmark);
949
871k
                        RETURN_ON_ERROR(ret);
950
871k
                        RETURN_SUCCESS;
951
871k
                    }
952
917
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
917
                    LASTMARK_RESTORE();
955
956
917
                    ptr--;
957
917
                    ctx->count--;
958
917
                }
959
16.0k
                if (state->repeat)
960
285
                    MARK_POP_DISCARD(ctx->lastmark);
961
69.2M
            } else {
962
                /* general case */
963
70.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
70.0M
                    state->ptr = ptr;
965
70.0M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
70.0M
                            pattern+pattern[0]);
967
70.0M
                    if (ret) {
968
68.6M
                        if (state->repeat)
969
40.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
68.6M
                        RETURN_ON_ERROR(ret);
971
68.6M
                        RETURN_SUCCESS;
972
68.6M
                    }
973
1.38M
                    if (state->repeat)
974
1.23M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.38M
                    LASTMARK_RESTORE();
976
977
1.38M
                    ptr--;
978
1.38M
                    ctx->count--;
979
1.38M
                }
980
622k
                if (state->repeat)
981
619k
                    MARK_POP_DISCARD(ctx->lastmark);
982
622k
            }
983
638k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
56.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
56.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
56.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
56.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
56.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
56.3M
            ctx->u.rep->count = -1;
1131
56.3M
            ctx->u.rep->pattern = pattern;
1132
56.3M
            ctx->u.rep->prev = state->repeat;
1133
56.3M
            ctx->u.rep->last_ptr = NULL;
1134
56.3M
            state->repeat = ctx->u.rep;
1135
1136
56.3M
            state->ptr = ptr;
1137
56.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
56.3M
            state->repeat = ctx->u.rep->prev;
1139
56.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
56.3M
            if (ret) {
1142
56.3M
                RETURN_ON_ERROR(ret);
1143
56.3M
                RETURN_SUCCESS;
1144
56.3M
            }
1145
954
            RETURN_FAILURE;
1146
1147
100M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
100M
            ctx->u.rep = state->repeat;
1155
100M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
100M
            state->ptr = ptr;
1159
1160
100M
            ctx->count = ctx->u.rep->count+1;
1161
1162
100M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
100M
                   ptr, ctx->count));
1164
1165
100M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
100M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
100M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
100M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
99.2M
                ctx->u.rep->count = ctx->count;
1185
99.2M
                LASTMARK_SAVE();
1186
99.2M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
99.2M
                LAST_PTR_PUSH();
1189
99.2M
                ctx->u.rep->last_ptr = state->ptr;
1190
99.2M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
99.2M
                        ctx->u.rep->pattern+3);
1192
99.2M
                LAST_PTR_POP();
1193
99.2M
                if (ret) {
1194
43.8M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
43.8M
                    RETURN_ON_ERROR(ret);
1196
43.8M
                    RETURN_SUCCESS;
1197
43.8M
                }
1198
55.3M
                MARK_POP(ctx->lastmark);
1199
55.3M
                LASTMARK_RESTORE();
1200
55.3M
                ctx->u.rep->count = ctx->count-1;
1201
55.3M
                state->ptr = ptr;
1202
55.3M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
56.9M
            state->repeat = ctx->u.rep->prev;
1207
56.9M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
56.9M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
56.9M
            RETURN_ON_SUCCESS(ret);
1211
620k
            state->ptr = ptr;
1212
620k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
39.8M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
39.8M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
39.8M
                   ptr, pattern[1]));
1565
39.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
39.8M
            state->ptr = ptr - pattern[1];
1568
39.8M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
39.8M
            RETURN_ON_FAILURE(ret);
1570
36.9M
            pattern += pattern[0];
1571
36.9M
            DISPATCH;
1572
1573
36.9M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
18.4M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
18.4M
                   ptr, pattern[1]));
1578
18.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
18.4M
                state->ptr = ptr - pattern[1];
1580
18.4M
                LASTMARK_SAVE();
1581
18.4M
                if (state->repeat)
1582
18.4M
                    MARK_PUSH(ctx->lastmark);
1583
1584
36.9M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
36.9M
                if (ret) {
1586
2.73k
                    if (state->repeat)
1587
2.73k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.73k
                    RETURN_ON_ERROR(ret);
1589
2.73k
                    RETURN_FAILURE;
1590
2.73k
                }
1591
18.4M
                if (state->repeat)
1592
18.4M
                    MARK_POP(ctx->lastmark);
1593
18.4M
                LASTMARK_RESTORE();
1594
18.4M
            }
1595
18.4M
            pattern += pattern[0];
1596
18.4M
            DISPATCH;
1597
1598
18.4M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
682M
exit:
1620
682M
    ctx_pos = ctx->last_ctx_pos;
1621
682M
    jump = ctx->jump;
1622
682M
    DATA_POP_DISCARD(ctx);
1623
682M
    if (ctx_pos == -1) {
1624
311M
        state->sigcount = sigcount;
1625
311M
        return ret;
1626
311M
    }
1627
370M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
370M
    switch (jump) {
1630
99.2M
        case JUMP_MAX_UNTIL_2:
1631
99.2M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
99.2M
            goto jump_max_until_2;
1633
56.9M
        case JUMP_MAX_UNTIL_3:
1634
56.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
56.9M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
28.5M
        case JUMP_BRANCH:
1643
28.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
28.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
56.3M
        case JUMP_REPEAT:
1658
56.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
56.3M
            goto jump_repeat;
1660
872k
        case JUMP_REPEAT_ONE_1:
1661
872k
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
872k
            goto jump_repeat_one_1;
1663
70.0M
        case JUMP_REPEAT_ONE_2:
1664
70.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
70.0M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
39.8M
        case JUMP_ASSERT:
1673
39.8M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
39.8M
            goto jump_assert;
1675
18.4M
        case JUMP_ASSERT_NOT:
1676
18.4M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
18.4M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
370M
    }
1683
1684
0
    return ret; /* should never get here */
1685
370M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
125M
{
601
125M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
125M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
125M
    Py_ssize_t ret = 0;
604
125M
    int jump;
605
125M
    unsigned int sigcount = state->sigcount;
606
607
125M
    SRE(match_context)* ctx;
608
125M
    SRE(match_context)* nextctx;
609
125M
    INIT_TRACE(state);
610
611
125M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
125M
    DATA_ALLOC(SRE(match_context), ctx);
614
125M
    ctx->last_ctx_pos = -1;
615
125M
    ctx->jump = JUMP_NONE;
616
125M
    ctx->toplevel = toplevel;
617
125M
    ctx_pos = alloc_pos;
618
619
125M
#if USE_COMPUTED_GOTOS
620
125M
#include "sre_targets.h"
621
125M
#endif
622
623
846M
entrance:
624
625
846M
    ;  // Fashion statement.
626
846M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
846M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
40.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
378
            TRACE(("reject (got %tu chars, need %zu)\n",
633
378
                   end - ptr, (size_t) pattern[3]));
634
378
            RETURN_FAILURE;
635
378
        }
636
40.1M
        pattern += pattern[1] + 1;
637
40.1M
    }
638
639
846M
#if USE_COMPUTED_GOTOS
640
846M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
846M
    {
647
648
846M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
204M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
204M
                   ptr, pattern[0]));
653
204M
            {
654
204M
                int i = pattern[0];
655
204M
                if (i & 1)
656
45.5M
                    state->lastindex = i/2 + 1;
657
204M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
203M
                    int j = state->lastmark + 1;
663
204M
                    while (j < i)
664
1.57M
                        state->mark[j++] = NULL;
665
203M
                    state->lastmark = i;
666
203M
                }
667
204M
                state->mark[i] = ptr;
668
204M
            }
669
204M
            pattern++;
670
204M
            DISPATCH;
671
672
204M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
123M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
123M
                   ptr, *pattern));
677
123M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
93.6M
                RETURN_FAILURE;
679
29.7M
            pattern++;
680
29.7M
            ptr++;
681
29.7M
            DISPATCH;
682
683
29.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
126M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
126M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
126M
            if (ctx->toplevel &&
698
126M
                ((state->match_all && ptr != state->end) ||
699
39.5M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
126M
            state->ptr = ptr;
704
126M
            RETURN_SUCCESS;
705
706
517k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
517k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
517k
            if (!SRE(at)(state, ptr, *pattern))
711
511k
                RETURN_FAILURE;
712
5.66k
            pattern++;
713
5.66k
            DISPATCH;
714
715
5.66k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
193M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
193M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
193M
            if (ptr >= end ||
749
193M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.47M
                RETURN_FAILURE;
751
192M
            pattern += pattern[0];
752
192M
            ptr++;
753
192M
            DISPATCH;
754
755
192M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
436k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
436k
                   pattern, ptr, pattern[0]));
758
436k
            if (ptr >= end ||
759
436k
                sre_lower_ascii(*ptr) != *pattern)
760
11.4k
                RETURN_FAILURE;
761
425k
            pattern++;
762
425k
            ptr++;
763
425k
            DISPATCH;
764
765
425k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
51.9M
        TARGET(SRE_OP_JUMP):
845
51.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
51.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
51.9M
                   ptr, pattern[0]));
850
51.9M
            pattern += pattern[0];
851
51.9M
            DISPATCH;
852
853
91.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
91.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
91.0M
            LASTMARK_SAVE();
858
91.0M
            if (state->repeat)
859
88.2M
                MARK_PUSH(ctx->lastmark);
860
196M
            for (; pattern[0]; pattern += pattern[0]) {
861
157M
                if (pattern[1] == SRE_OP_LITERAL &&
862
157M
                    (ptr >= end ||
863
69.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
48.1M
                    continue;
865
108M
                if (pattern[1] == SRE_OP_IN &&
866
108M
                    (ptr >= end ||
867
84.5M
                     !SRE(charset)(state, pattern + 3,
868
84.5M
                                   (SRE_CODE) *ptr)))
869
56.4M
                    continue;
870
52.4M
                state->ptr = ptr;
871
52.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
52.4M
                if (ret) {
873
51.5M
                    if (state->repeat)
874
49.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
51.5M
                    RETURN_ON_ERROR(ret);
876
51.5M
                    RETURN_SUCCESS;
877
51.5M
                }
878
901k
                if (state->repeat)
879
6.03k
                    MARK_POP_KEEP(ctx->lastmark);
880
901k
                LASTMARK_RESTORE();
881
901k
            }
882
39.4M
            if (state->repeat)
883
38.6M
                MARK_POP_DISCARD(ctx->lastmark);
884
39.4M
            RETURN_FAILURE;
885
886
206M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
206M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
206M
                   pattern[1], pattern[2]));
898
899
206M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
26.3k
                RETURN_FAILURE; /* cannot match */
901
902
206M
            state->ptr = ptr;
903
904
206M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
206M
            RETURN_ON_ERROR(ret);
906
206M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
206M
            ctx->count = ret;
908
206M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
206M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
77.7M
                RETURN_FAILURE;
917
918
129M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
129M
                ptr == state->end &&
920
129M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
4.08k
            {
922
                /* tail is empty.  we're finished */
923
4.08k
                state->ptr = ptr;
924
4.08k
                RETURN_SUCCESS;
925
4.08k
            }
926
927
129M
            LASTMARK_SAVE();
928
129M
            if (state->repeat)
929
85.1M
                MARK_PUSH(ctx->lastmark);
930
931
129M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
3.34M
                ctx->u.chr = pattern[pattern[0]+1];
935
3.34M
                for (;;) {
936
7.84M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.84M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
4.50M
                        ptr--;
939
4.50M
                        ctx->count--;
940
4.50M
                    }
941
3.34M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
21.7k
                        break;
943
3.32M
                    state->ptr = ptr;
944
3.32M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.32M
                            pattern+pattern[0]);
946
3.32M
                    if (ret) {
947
3.32M
                        if (state->repeat)
948
3.30M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.32M
                        RETURN_ON_ERROR(ret);
950
3.32M
                        RETURN_SUCCESS;
951
3.32M
                    }
952
262
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
262
                    LASTMARK_RESTORE();
955
956
262
                    ptr--;
957
262
                    ctx->count--;
958
262
                }
959
21.7k
                if (state->repeat)
960
259
                    MARK_POP_DISCARD(ctx->lastmark);
961
125M
            } else {
962
                /* general case */
963
126M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
126M
                    state->ptr = ptr;
965
126M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
126M
                            pattern+pattern[0]);
967
126M
                    if (ret) {
968
125M
                        if (state->repeat)
969
81.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
125M
                        RETURN_ON_ERROR(ret);
971
125M
                        RETURN_SUCCESS;
972
125M
                    }
973
1.08M
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.08M
                    LASTMARK_RESTORE();
976
977
1.08M
                    ptr--;
978
1.08M
                    ctx->count--;
979
1.08M
                }
980
80.8k
                if (state->repeat)
981
80.1k
                    MARK_POP_DISCARD(ctx->lastmark);
982
80.8k
            }
983
102k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
117M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
117M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
117M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
117M
            ctx->u.rep = repeat_pool_malloc(state);
1127
117M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
117M
            ctx->u.rep->count = -1;
1131
117M
            ctx->u.rep->pattern = pattern;
1132
117M
            ctx->u.rep->prev = state->repeat;
1133
117M
            ctx->u.rep->last_ptr = NULL;
1134
117M
            state->repeat = ctx->u.rep;
1135
1136
117M
            state->ptr = ptr;
1137
117M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
117M
            state->repeat = ctx->u.rep->prev;
1139
117M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
117M
            if (ret) {
1142
117M
                RETURN_ON_ERROR(ret);
1143
117M
                RETURN_SUCCESS;
1144
117M
            }
1145
613
            RETURN_FAILURE;
1146
1147
208M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
208M
            ctx->u.rep = state->repeat;
1155
208M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
208M
            state->ptr = ptr;
1159
1160
208M
            ctx->count = ctx->u.rep->count+1;
1161
1162
208M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
208M
                   ptr, ctx->count));
1164
1165
208M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
208M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
208M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
208M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
204M
                ctx->u.rep->count = ctx->count;
1185
204M
                LASTMARK_SAVE();
1186
204M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
204M
                LAST_PTR_PUSH();
1189
204M
                ctx->u.rep->last_ptr = state->ptr;
1190
204M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
204M
                        ctx->u.rep->pattern+3);
1192
204M
                LAST_PTR_POP();
1193
204M
                if (ret) {
1194
90.1M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
90.1M
                    RETURN_ON_ERROR(ret);
1196
90.1M
                    RETURN_SUCCESS;
1197
90.1M
                }
1198
114M
                MARK_POP(ctx->lastmark);
1199
114M
                LASTMARK_RESTORE();
1200
114M
                ctx->u.rep->count = ctx->count-1;
1201
114M
                state->ptr = ptr;
1202
114M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
118M
            state->repeat = ctx->u.rep->prev;
1207
118M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
118M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
118M
            RETURN_ON_SUCCESS(ret);
1211
80.7k
            state->ptr = ptr;
1212
80.7k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
79.5M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
79.5M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
79.5M
                   ptr, pattern[1]));
1565
79.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
79.5M
            state->ptr = ptr - pattern[1];
1568
79.5M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
79.5M
            RETURN_ON_FAILURE(ret);
1570
79.0M
            pattern += pattern[0];
1571
79.0M
            DISPATCH;
1572
1573
79.0M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
18.1M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
18.1M
                   ptr, pattern[1]));
1578
18.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
18.1M
                state->ptr = ptr - pattern[1];
1580
18.1M
                LASTMARK_SAVE();
1581
18.1M
                if (state->repeat)
1582
18.1M
                    MARK_PUSH(ctx->lastmark);
1583
1584
36.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
36.3M
                if (ret) {
1586
5.77k
                    if (state->repeat)
1587
5.77k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.77k
                    RETURN_ON_ERROR(ret);
1589
5.77k
                    RETURN_FAILURE;
1590
5.77k
                }
1591
18.1M
                if (state->repeat)
1592
18.1M
                    MARK_POP(ctx->lastmark);
1593
18.1M
                LASTMARK_RESTORE();
1594
18.1M
            }
1595
18.1M
            pattern += pattern[0];
1596
18.1M
            DISPATCH;
1597
1598
18.1M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
846M
exit:
1620
846M
    ctx_pos = ctx->last_ctx_pos;
1621
846M
    jump = ctx->jump;
1622
846M
    DATA_POP_DISCARD(ctx);
1623
846M
    if (ctx_pos == -1) {
1624
125M
        state->sigcount = sigcount;
1625
125M
        return ret;
1626
125M
    }
1627
720M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
720M
    switch (jump) {
1630
204M
        case JUMP_MAX_UNTIL_2:
1631
204M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
204M
            goto jump_max_until_2;
1633
118M
        case JUMP_MAX_UNTIL_3:
1634
118M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
118M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
52.4M
        case JUMP_BRANCH:
1643
52.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
52.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
117M
        case JUMP_REPEAT:
1658
117M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
117M
            goto jump_repeat;
1660
3.32M
        case JUMP_REPEAT_ONE_1:
1661
3.32M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.32M
            goto jump_repeat_one_1;
1663
126M
        case JUMP_REPEAT_ONE_2:
1664
126M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
126M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
79.5M
        case JUMP_ASSERT:
1673
79.5M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
79.5M
            goto jump_assert;
1675
18.1M
        case JUMP_ASSERT_NOT:
1676
18.1M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
18.1M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
720M
    }
1683
1684
0
    return ret; /* should never get here */
1685
720M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
473M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
85.6M
{
1694
85.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
85.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
85.6M
    Py_ssize_t status = 0;
1697
85.6M
    Py_ssize_t prefix_len = 0;
1698
85.6M
    Py_ssize_t prefix_skip = 0;
1699
85.6M
    SRE_CODE* prefix = NULL;
1700
85.6M
    SRE_CODE* charset = NULL;
1701
85.6M
    SRE_CODE* overlap = NULL;
1702
85.6M
    int flags = 0;
1703
85.6M
    INIT_TRACE(state);
1704
1705
85.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
85.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
85.6M
        flags = pattern[2];
1713
1714
85.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.30M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.30M
                   end - ptr, (size_t) pattern[3]));
1717
1.30M
            return 0;
1718
1.30M
        }
1719
84.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
4.90M
            end -= pattern[3] - 1;
1723
4.90M
            if (end <= ptr)
1724
0
                end = ptr;
1725
4.90M
        }
1726
1727
84.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
4.90M
            prefix_len = pattern[5];
1731
4.90M
            prefix_skip = pattern[6];
1732
4.90M
            prefix = pattern + 7;
1733
4.90M
            overlap = prefix + prefix_len - 1;
1734
79.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
75.4M
            charset = pattern + 5;
1738
1739
84.3M
        pattern += 1 + pattern[1];
1740
84.3M
    }
1741
1742
84.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
84.3M
           prefix, prefix_len, prefix_skip));
1744
84.3M
    TRACE(("charset = %p\n", charset));
1745
1746
84.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
4.40M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
2.47M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.47M
#endif
1753
2.47M
        end = (SRE_CHAR *)state->end;
1754
2.47M
        state->must_advance = 0;
1755
5.27M
        while (ptr < end) {
1756
105M
            while (*ptr != c) {
1757
100M
                if (++ptr >= end)
1758
390k
                    return 0;
1759
100M
            }
1760
4.88M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
4.88M
            state->start = ptr;
1762
4.88M
            state->ptr = ptr + prefix_skip;
1763
4.88M
            if (flags & SRE_INFO_LITERAL)
1764
2.06k
                return 1; /* we got all of it */
1765
4.88M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
4.88M
            if (status != 0)
1767
4.00M
                return status;
1768
872k
            ++ptr;
1769
872k
            RESET_CAPTURE_GROUP();
1770
872k
        }
1771
6.13k
        return 0;
1772
2.47M
    }
1773
1774
79.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
496k
        Py_ssize_t i = 0;
1778
1779
496k
        end = (SRE_CHAR *)state->end;
1780
496k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.28M
        for (i = 0; i < prefix_len; i++)
1784
859k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
429k
#endif
1787
1.19M
        while (ptr < end) {
1788
1.19M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.58M
            while (*ptr++ != c) {
1790
7.38M
                if (ptr >= end)
1791
315
                    return 0;
1792
7.38M
            }
1793
1.19M
            if (ptr >= end)
1794
30
                return 0;
1795
1796
1.19M
            i = 1;
1797
1.19M
            state->must_advance = 0;
1798
1.21M
            do {
1799
1.21M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.01M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.01M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.01M
                    state->start = ptr - (prefix_len - 1);
1808
1.01M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.01M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.01M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.01M
                    if (status != 0)
1813
495k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
519k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
519k
                    RESET_CAPTURE_GROUP();
1818
519k
                }
1819
723k
                i = overlap[i];
1820
723k
            } while (i != 0);
1821
1.19M
        }
1822
0
        return 0;
1823
496k
    }
1824
1825
79.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
75.4M
        end = (SRE_CHAR *)state->end;
1828
75.4M
        state->must_advance = 0;
1829
78.1M
        for (;;) {
1830
357M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
279M
                ptr++;
1832
78.1M
            if (ptr >= end)
1833
3.65M
                return 0;
1834
74.5M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
74.5M
            state->start = ptr;
1836
74.5M
            state->ptr = ptr;
1837
74.5M
            status = SRE(match)(state, pattern, 0);
1838
74.5M
            if (status != 0)
1839
71.7M
                break;
1840
2.72M
            ptr++;
1841
2.72M
            RESET_CAPTURE_GROUP();
1842
2.72M
        }
1843
75.4M
    } else {
1844
        /* general case */
1845
3.96M
        assert(ptr <= end);
1846
3.96M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.96M
        state->start = state->ptr = ptr;
1848
3.96M
        status = SRE(match)(state, pattern, 1);
1849
3.96M
        state->must_advance = 0;
1850
3.96M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
3.96M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
472M
        while (status == 0 && ptr < end) {
1858
469M
            ptr++;
1859
469M
            RESET_CAPTURE_GROUP();
1860
469M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
469M
            state->start = state->ptr = ptr;
1862
469M
            status = SRE(match)(state, pattern, 0);
1863
469M
        }
1864
3.96M
    }
1865
1866
75.7M
    return status;
1867
79.4M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
32.7M
{
1694
32.7M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
32.7M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
32.7M
    Py_ssize_t status = 0;
1697
32.7M
    Py_ssize_t prefix_len = 0;
1698
32.7M
    Py_ssize_t prefix_skip = 0;
1699
32.7M
    SRE_CODE* prefix = NULL;
1700
32.7M
    SRE_CODE* charset = NULL;
1701
32.7M
    SRE_CODE* overlap = NULL;
1702
32.7M
    int flags = 0;
1703
32.7M
    INIT_TRACE(state);
1704
1705
32.7M
    if (ptr > end)
1706
0
        return 0;
1707
1708
32.7M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
32.7M
        flags = pattern[2];
1713
1714
32.7M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.19M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.19M
                   end - ptr, (size_t) pattern[3]));
1717
1.19M
            return 0;
1718
1.19M
        }
1719
31.5M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.07M
            end -= pattern[3] - 1;
1723
1.07M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.07M
        }
1726
1727
31.5M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.07M
            prefix_len = pattern[5];
1731
1.07M
            prefix_skip = pattern[6];
1732
1.07M
            prefix = pattern + 7;
1733
1.07M
            overlap = prefix + prefix_len - 1;
1734
30.4M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
27.6M
            charset = pattern + 5;
1738
1739
31.5M
        pattern += 1 + pattern[1];
1740
31.5M
    }
1741
1742
31.5M
    TRACE(("prefix = %p %zd %zd\n",
1743
31.5M
           prefix, prefix_len, prefix_skip));
1744
31.5M
    TRACE(("charset = %p\n", charset));
1745
1746
31.5M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.07M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.07M
#if SIZEOF_SRE_CHAR < 4
1750
1.07M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.07M
#endif
1753
1.07M
        end = (SRE_CHAR *)state->end;
1754
1.07M
        state->must_advance = 0;
1755
1.25M
        while (ptr < end) {
1756
20.3M
            while (*ptr != c) {
1757
19.3M
                if (++ptr >= end)
1758
279k
                    return 0;
1759
19.3M
            }
1760
968k
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
968k
            state->start = ptr;
1762
968k
            state->ptr = ptr + prefix_skip;
1763
968k
            if (flags & SRE_INFO_LITERAL)
1764
296
                return 1; /* we got all of it */
1765
968k
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
968k
            if (status != 0)
1767
787k
                return status;
1768
180k
            ++ptr;
1769
180k
            RESET_CAPTURE_GROUP();
1770
180k
        }
1771
3.67k
        return 0;
1772
1.07M
    }
1773
1774
30.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
6.87k
        Py_ssize_t i = 0;
1778
1779
6.87k
        end = (SRE_CHAR *)state->end;
1780
6.87k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
6.87k
#if SIZEOF_SRE_CHAR < 4
1783
20.6k
        for (i = 0; i < prefix_len; i++)
1784
13.7k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
6.87k
#endif
1787
392k
        while (ptr < end) {
1788
392k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.80M
            while (*ptr++ != c) {
1790
2.41M
                if (ptr >= end)
1791
62
                    return 0;
1792
2.41M
            }
1793
392k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
392k
            i = 1;
1797
392k
            state->must_advance = 0;
1798
393k
            do {
1799
393k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
296k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
296k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
296k
                    state->start = ptr - (prefix_len - 1);
1808
296k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
296k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
296k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
296k
                    if (status != 0)
1813
6.79k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
289k
                    if (++ptr >= end)
1816
10
                        return 0;
1817
289k
                    RESET_CAPTURE_GROUP();
1818
289k
                }
1819
386k
                i = overlap[i];
1820
386k
            } while (i != 0);
1821
392k
        }
1822
0
        return 0;
1823
6.87k
    }
1824
1825
30.4M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
27.6M
        end = (SRE_CHAR *)state->end;
1828
27.6M
        state->must_advance = 0;
1829
29.5M
        for (;;) {
1830
78.1M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
48.6M
                ptr++;
1832
29.5M
            if (ptr >= end)
1833
2.55M
                return 0;
1834
26.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
26.9M
            state->start = ptr;
1836
26.9M
            state->ptr = ptr;
1837
26.9M
            status = SRE(match)(state, pattern, 0);
1838
26.9M
            if (status != 0)
1839
25.1M
                break;
1840
1.82M
            ptr++;
1841
1.82M
            RESET_CAPTURE_GROUP();
1842
1.82M
        }
1843
27.6M
    } else {
1844
        /* general case */
1845
2.77M
        assert(ptr <= end);
1846
2.77M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.77M
        state->start = state->ptr = ptr;
1848
2.77M
        status = SRE(match)(state, pattern, 1);
1849
2.77M
        state->must_advance = 0;
1850
2.77M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.77M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
154M
        while (status == 0 && ptr < end) {
1858
151M
            ptr++;
1859
151M
            RESET_CAPTURE_GROUP();
1860
151M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
151M
            state->start = state->ptr = ptr;
1862
151M
            status = SRE(match)(state, pattern, 0);
1863
151M
        }
1864
2.77M
    }
1865
1866
27.9M
    return status;
1867
30.4M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
45.4M
{
1694
45.4M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
45.4M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
45.4M
    Py_ssize_t status = 0;
1697
45.4M
    Py_ssize_t prefix_len = 0;
1698
45.4M
    Py_ssize_t prefix_skip = 0;
1699
45.4M
    SRE_CODE* prefix = NULL;
1700
45.4M
    SRE_CODE* charset = NULL;
1701
45.4M
    SRE_CODE* overlap = NULL;
1702
45.4M
    int flags = 0;
1703
45.4M
    INIT_TRACE(state);
1704
1705
45.4M
    if (ptr > end)
1706
0
        return 0;
1707
1708
45.4M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
45.4M
        flags = pattern[2];
1713
1714
45.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
104k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
104k
                   end - ptr, (size_t) pattern[3]));
1717
104k
            return 0;
1718
104k
        }
1719
45.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.82M
            end -= pattern[3] - 1;
1723
1.82M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.82M
        }
1726
1727
45.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
1.82M
            prefix_len = pattern[5];
1731
1.82M
            prefix_skip = pattern[6];
1732
1.82M
            prefix = pattern + 7;
1733
1.82M
            overlap = prefix + prefix_len - 1;
1734
43.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
42.6M
            charset = pattern + 5;
1738
1739
45.3M
        pattern += 1 + pattern[1];
1740
45.3M
    }
1741
1742
45.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
45.3M
           prefix, prefix_len, prefix_skip));
1744
45.3M
    TRACE(("charset = %p\n", charset));
1745
1746
45.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.40M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.40M
#if SIZEOF_SRE_CHAR < 4
1750
1.40M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.40M
#endif
1753
1.40M
        end = (SRE_CHAR *)state->end;
1754
1.40M
        state->must_advance = 0;
1755
1.75M
        while (ptr < end) {
1756
55.9M
            while (*ptr != c) {
1757
54.2M
                if (++ptr >= end)
1758
102k
                    return 0;
1759
54.2M
            }
1760
1.64M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
1.64M
            state->start = ptr;
1762
1.64M
            state->ptr = ptr + prefix_skip;
1763
1.64M
            if (flags & SRE_INFO_LITERAL)
1764
557
                return 1; /* we got all of it */
1765
1.64M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
1.64M
            if (status != 0)
1767
1.29M
                return status;
1768
349k
            ++ptr;
1769
349k
            RESET_CAPTURE_GROUP();
1770
349k
        }
1771
2.02k
        return 0;
1772
1.40M
    }
1773
1774
43.9M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
422k
        Py_ssize_t i = 0;
1778
1779
422k
        end = (SRE_CHAR *)state->end;
1780
422k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
422k
#if SIZEOF_SRE_CHAR < 4
1783
1.26M
        for (i = 0; i < prefix_len; i++)
1784
845k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
422k
#endif
1787
659k
        while (ptr < end) {
1788
659k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.25M
            while (*ptr++ != c) {
1790
3.59M
                if (ptr >= end)
1791
132
                    return 0;
1792
3.59M
            }
1793
659k
            if (ptr >= end)
1794
6
                return 0;
1795
1796
659k
            i = 1;
1797
659k
            state->must_advance = 0;
1798
659k
            do {
1799
659k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
587k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
587k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
587k
                    state->start = ptr - (prefix_len - 1);
1808
587k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
587k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
587k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
587k
                    if (status != 0)
1813
422k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
165k
                    if (++ptr >= end)
1816
4
                        return 0;
1817
165k
                    RESET_CAPTURE_GROUP();
1818
165k
                }
1819
237k
                i = overlap[i];
1820
237k
            } while (i != 0);
1821
659k
        }
1822
0
        return 0;
1823
422k
    }
1824
1825
43.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
42.6M
        end = (SRE_CHAR *)state->end;
1828
42.6M
        state->must_advance = 0;
1829
43.0M
        for (;;) {
1830
198M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
155M
                ptr++;
1832
43.0M
            if (ptr >= end)
1833
1.03M
                return 0;
1834
42.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
42.0M
            state->start = ptr;
1836
42.0M
            state->ptr = ptr;
1837
42.0M
            status = SRE(match)(state, pattern, 0);
1838
42.0M
            if (status != 0)
1839
41.5M
                break;
1840
468k
            ptr++;
1841
468k
            RESET_CAPTURE_GROUP();
1842
468k
        }
1843
42.6M
    } else {
1844
        /* general case */
1845
911k
        assert(ptr <= end);
1846
911k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
911k
        state->start = state->ptr = ptr;
1848
911k
        status = SRE(match)(state, pattern, 1);
1849
911k
        state->must_advance = 0;
1850
911k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
911k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
241M
        while (status == 0 && ptr < end) {
1858
240M
            ptr++;
1859
240M
            RESET_CAPTURE_GROUP();
1860
240M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
240M
            state->start = state->ptr = ptr;
1862
240M
            status = SRE(match)(state, pattern, 0);
1863
240M
        }
1864
911k
    }
1865
1866
42.4M
    return status;
1867
43.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.44M
{
1694
7.44M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.44M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.44M
    Py_ssize_t status = 0;
1697
7.44M
    Py_ssize_t prefix_len = 0;
1698
7.44M
    Py_ssize_t prefix_skip = 0;
1699
7.44M
    SRE_CODE* prefix = NULL;
1700
7.44M
    SRE_CODE* charset = NULL;
1701
7.44M
    SRE_CODE* overlap = NULL;
1702
7.44M
    int flags = 0;
1703
7.44M
    INIT_TRACE(state);
1704
1705
7.44M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.44M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.44M
        flags = pattern[2];
1713
1714
7.44M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
7.36k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
7.36k
                   end - ptr, (size_t) pattern[3]));
1717
7.36k
            return 0;
1718
7.36k
        }
1719
7.43M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
1.99M
            end -= pattern[3] - 1;
1723
1.99M
            if (end <= ptr)
1724
0
                end = ptr;
1725
1.99M
        }
1726
1727
7.43M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.00M
            prefix_len = pattern[5];
1731
2.00M
            prefix_skip = pattern[6];
1732
2.00M
            prefix = pattern + 7;
1733
2.00M
            overlap = prefix + prefix_len - 1;
1734
5.43M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
5.16M
            charset = pattern + 5;
1738
1739
7.43M
        pattern += 1 + pattern[1];
1740
7.43M
    }
1741
1742
7.43M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.43M
           prefix, prefix_len, prefix_skip));
1744
7.43M
    TRACE(("charset = %p\n", charset));
1745
1746
7.43M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.93M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
1.93M
        end = (SRE_CHAR *)state->end;
1754
1.93M
        state->must_advance = 0;
1755
2.27M
        while (ptr < end) {
1756
28.7M
            while (*ptr != c) {
1757
26.5M
                if (++ptr >= end)
1758
7.90k
                    return 0;
1759
26.5M
            }
1760
2.26M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.26M
            state->start = ptr;
1762
2.26M
            state->ptr = ptr + prefix_skip;
1763
2.26M
            if (flags & SRE_INFO_LITERAL)
1764
1.20k
                return 1; /* we got all of it */
1765
2.26M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.26M
            if (status != 0)
1767
1.92M
                return status;
1768
342k
            ++ptr;
1769
342k
            RESET_CAPTURE_GROUP();
1770
342k
        }
1771
429
        return 0;
1772
1.93M
    }
1773
1774
5.50M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
66.3k
        Py_ssize_t i = 0;
1778
1779
66.3k
        end = (SRE_CHAR *)state->end;
1780
66.3k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
147k
        while (ptr < end) {
1788
147k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.51M
            while (*ptr++ != c) {
1790
1.37M
                if (ptr >= end)
1791
121
                    return 0;
1792
1.37M
            }
1793
146k
            if (ptr >= end)
1794
12
                return 0;
1795
1796
146k
            i = 1;
1797
146k
            state->must_advance = 0;
1798
165k
            do {
1799
165k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
131k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
131k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
131k
                    state->start = ptr - (prefix_len - 1);
1808
131k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
131k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
131k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
131k
                    if (status != 0)
1813
66.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
64.9k
                    if (++ptr >= end)
1816
5
                        return 0;
1817
64.9k
                    RESET_CAPTURE_GROUP();
1818
64.9k
                }
1819
99.6k
                i = overlap[i];
1820
99.6k
            } while (i != 0);
1821
146k
        }
1822
0
        return 0;
1823
66.3k
    }
1824
1825
5.43M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
5.16M
        end = (SRE_CHAR *)state->end;
1828
5.16M
        state->must_advance = 0;
1829
5.59M
        for (;;) {
1830
80.6M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
75.0M
                ptr++;
1832
5.59M
            if (ptr >= end)
1833
59.8k
                return 0;
1834
5.53M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
5.53M
            state->start = ptr;
1836
5.53M
            state->ptr = ptr;
1837
5.53M
            status = SRE(match)(state, pattern, 0);
1838
5.53M
            if (status != 0)
1839
5.10M
                break;
1840
430k
            ptr++;
1841
430k
            RESET_CAPTURE_GROUP();
1842
430k
        }
1843
5.16M
    } else {
1844
        /* general case */
1845
274k
        assert(ptr <= end);
1846
274k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
274k
        state->start = state->ptr = ptr;
1848
274k
        status = SRE(match)(state, pattern, 1);
1849
274k
        state->must_advance = 0;
1850
274k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
274k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
77.6M
        while (status == 0 && ptr < end) {
1858
77.4M
            ptr++;
1859
77.4M
            RESET_CAPTURE_GROUP();
1860
77.4M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
77.4M
            state->start = state->ptr = ptr;
1862
77.4M
            status = SRE(match)(state, pattern, 0);
1863
77.4M
        }
1864
274k
    }
1865
1866
5.37M
    return status;
1867
5.43M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/