Coverage Report

Created: 2025-07-18 06:09

/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.1M
{
18
    /* check if pointer is at given position */
19
20
14.1M
    Py_ssize_t thisp, thatp;
21
22
14.1M
    switch (at) {
23
24
6.65M
    case SRE_AT_BEGINNING:
25
6.65M
    case SRE_AT_BEGINNING_STRING:
26
6.65M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.89M
    case SRE_AT_END:
33
4.89M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.89M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.89M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.57M
    case SRE_AT_END_STRING:
42
2.57M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.1M
    }
87
88
0
    return 0;
89
14.1M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.4M
{
18
    /* check if pointer is at given position */
19
20
12.4M
    Py_ssize_t thisp, thatp;
21
22
12.4M
    switch (at) {
23
24
6.62M
    case SRE_AT_BEGINNING:
25
6.62M
    case SRE_AT_BEGINNING_STRING:
26
6.62M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.19M
    case SRE_AT_END:
33
4.19M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
4.19M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.19M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.62M
    case SRE_AT_END_STRING:
42
1.62M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.4M
    }
87
88
0
    return 0;
89
12.4M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
1.12M
{
18
    /* check if pointer is at given position */
19
20
1.12M
    Py_ssize_t thisp, thatp;
21
22
1.12M
    switch (at) {
23
24
31.9k
    case SRE_AT_BEGINNING:
25
31.9k
    case SRE_AT_BEGINNING_STRING:
26
31.9k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
620k
    case SRE_AT_END:
33
620k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
620k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
620k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
468k
    case SRE_AT_END_STRING:
42
468k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
1.12M
    }
87
88
0
    return 0;
89
1.12M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
568k
{
18
    /* check if pointer is at given position */
19
20
568k
    Py_ssize_t thisp, thatp;
21
22
568k
    switch (at) {
23
24
5.05k
    case SRE_AT_BEGINNING:
25
5.05k
    case SRE_AT_BEGINNING_STRING:
26
5.05k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
80.6k
    case SRE_AT_END:
33
80.6k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
80.6k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
80.6k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
482k
    case SRE_AT_END_STRING:
42
482k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
568k
    }
87
88
0
    return 0;
89
568k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.81G
{
94
    /* check if character is a member of the given set */
95
96
1.81G
    int ok = 1;
97
98
4.11G
    for (;;) {
99
4.11G
        switch (*set++) {
100
101
1.16G
        case SRE_OP_FAILURE:
102
1.16G
            return !ok;
103
104
1.23G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.23G
            if (ch == set[0])
107
4.81M
                return ok;
108
1.23G
            set++;
109
1.23G
            break;
110
111
11.6M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.6M
            if (sre_category(set[0], (int) ch))
114
7.91M
                return ok;
115
3.69M
            set++;
116
3.69M
            break;
117
118
914M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
914M
            if (ch < 256 &&
121
914M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
435M
                return ok;
123
478M
            set += 256/SRE_CODE_BITS;
124
478M
            break;
125
126
341M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
341M
            if (set[0] <= ch && ch <= set[1])
129
202M
                return ok;
130
139M
            set += 2;
131
139M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
444M
        case SRE_OP_NEGATE:
148
444M
            ok = !ok;
149
444M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
4.11G
        }
175
4.11G
    }
176
1.81G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
358M
{
94
    /* check if character is a member of the given set */
95
96
358M
    int ok = 1;
97
98
785M
    for (;;) {
99
785M
        switch (*set++) {
100
101
216M
        case SRE_OP_FAILURE:
102
216M
            return !ok;
103
104
287M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
287M
            if (ch == set[0])
107
2.32M
                return ok;
108
284M
            set++;
109
284M
            break;
110
111
10.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
10.5M
            if (sre_category(set[0], (int) ch))
114
6.91M
                return ok;
115
3.65M
            set++;
116
3.65M
            break;
117
118
72.0M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
72.0M
            if (ch < 256 &&
121
72.0M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
33.3M
                return ok;
123
38.7M
            set += 256/SRE_CODE_BITS;
124
38.7M
            break;
125
126
160M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
160M
            if (set[0] <= ch && ch <= set[1])
129
99.0M
                return ok;
130
61.7M
            set += 2;
131
61.7M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
37.8M
        case SRE_OP_NEGATE:
148
37.8M
            ok = !ok;
149
37.8M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
785M
        }
175
785M
    }
176
358M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
821M
{
94
    /* check if character is a member of the given set */
95
96
821M
    int ok = 1;
97
98
1.94G
    for (;;) {
99
1.94G
        switch (*set++) {
100
101
570M
        case SRE_OP_FAILURE:
102
570M
            return !ok;
103
104
660M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
660M
            if (ch == set[0])
107
1.39M
                return ok;
108
659M
            set++;
109
659M
            break;
110
111
87.1k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
87.1k
            if (sre_category(set[0], (int) ch))
114
67.9k
                return ok;
115
19.2k
            set++;
116
19.2k
            break;
117
118
364M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
364M
            if (ch < 256 &&
121
364M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
156M
                return ok;
123
207M
            set += 256/SRE_CODE_BITS;
124
207M
            break;
125
126
159M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
159M
            if (set[0] <= ch && ch <= set[1])
129
93.1M
                return ok;
130
66.4M
            set += 2;
131
66.4M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
186M
        case SRE_OP_NEGATE:
148
186M
            ok = !ok;
149
186M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.94G
        }
175
1.94G
    }
176
821M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
639M
{
94
    /* check if character is a member of the given set */
95
96
639M
    int ok = 1;
97
98
1.39G
    for (;;) {
99
1.39G
        switch (*set++) {
100
101
381M
        case SRE_OP_FAILURE:
102
381M
            return !ok;
103
104
290M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
290M
            if (ch == set[0])
107
1.10M
                return ok;
108
289M
            set++;
109
289M
            break;
110
111
957k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
957k
            if (sre_category(set[0], (int) ch))
114
929k
                return ok;
115
27.9k
            set++;
116
27.9k
            break;
117
118
477M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
477M
            if (ch < 256 &&
121
477M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
245M
                return ok;
123
232M
            set += 256/SRE_CODE_BITS;
124
232M
            break;
125
126
21.3M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
21.3M
            if (set[0] <= ch && ch <= set[1])
129
10.3M
                return ok;
130
10.9M
            set += 2;
131
10.9M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
220M
        case SRE_OP_NEGATE:
148
220M
            ok = !ok;
149
220M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.39G
        }
175
1.39G
    }
176
639M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
673M
{
195
673M
    SRE_CODE chr;
196
673M
    SRE_CHAR c;
197
673M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
673M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
673M
    Py_ssize_t i;
200
673M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
673M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
15.3M
        end = ptr + maxcount;
205
206
673M
    switch (pattern[0]) {
207
208
601M
    case SRE_OP_IN:
209
        /* repeated set */
210
601M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
965M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
364M
            ptr++;
213
601M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
61.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
61.6M
        chr = pattern[1];
232
61.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
61.6M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
59.2M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
59.2M
        else
238
59.2M
#endif
239
65.5M
        while (ptr < end && *ptr == c)
240
3.92M
            ptr++;
241
61.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
10.7M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
10.7M
        chr = pattern[1];
270
10.7M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
10.7M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
5.72M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
5.72M
        else
276
5.72M
#endif
277
39.1M
        while (ptr < end && *ptr != c)
278
28.3M
            ptr++;
279
10.7M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
673M
    }
319
320
673M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
673M
           ptr - (SRE_CHAR*) state->ptr));
322
673M
    return ptr - (SRE_CHAR*) state->ptr;
323
673M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
190M
{
195
190M
    SRE_CODE chr;
196
190M
    SRE_CHAR c;
197
190M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
190M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
190M
    Py_ssize_t i;
200
190M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
190M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.54M
        end = ptr + maxcount;
205
206
190M
    switch (pattern[0]) {
207
208
136M
    case SRE_OP_IN:
209
        /* repeated set */
210
136M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
240M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
103M
            ptr++;
213
136M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
52.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
52.9M
        chr = pattern[1];
232
52.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
52.9M
        c = (SRE_CHAR) chr;
234
52.9M
#if SIZEOF_SRE_CHAR < 4
235
52.9M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
52.9M
        else
238
52.9M
#endif
239
54.8M
        while (ptr < end && *ptr == c)
240
1.89M
            ptr++;
241
52.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
126k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
126k
        chr = pattern[1];
270
126k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
126k
        c = (SRE_CHAR) chr;
272
126k
#if SIZEOF_SRE_CHAR < 4
273
126k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
126k
        else
276
126k
#endif
277
2.53M
        while (ptr < end && *ptr != c)
278
2.40M
            ptr++;
279
126k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
190M
    }
319
320
190M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
190M
           ptr - (SRE_CHAR*) state->ptr));
322
190M
    return ptr - (SRE_CHAR*) state->ptr;
323
190M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
286M
{
195
286M
    SRE_CODE chr;
196
286M
    SRE_CHAR c;
197
286M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
286M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
286M
    Py_ssize_t i;
200
286M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
286M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
5.93M
        end = ptr + maxcount;
205
206
286M
    switch (pattern[0]) {
207
208
274M
    case SRE_OP_IN:
209
        /* repeated set */
210
274M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
407M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
132M
            ptr++;
213
274M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
6.31M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
6.31M
        chr = pattern[1];
232
6.31M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
6.31M
        c = (SRE_CHAR) chr;
234
6.31M
#if SIZEOF_SRE_CHAR < 4
235
6.31M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
6.31M
        else
238
6.31M
#endif
239
8.07M
        while (ptr < end && *ptr == c)
240
1.75M
            ptr++;
241
6.31M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
5.59M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
5.59M
        chr = pattern[1];
270
5.59M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
5.59M
        c = (SRE_CHAR) chr;
272
5.59M
#if SIZEOF_SRE_CHAR < 4
273
5.59M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
5.59M
        else
276
5.59M
#endif
277
15.7M
        while (ptr < end && *ptr != c)
278
10.1M
            ptr++;
279
5.59M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
286M
    }
319
320
286M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
286M
           ptr - (SRE_CHAR*) state->ptr));
322
286M
    return ptr - (SRE_CHAR*) state->ptr;
323
286M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
197M
{
195
197M
    SRE_CODE chr;
196
197M
    SRE_CHAR c;
197
197M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
197M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
197M
    Py_ssize_t i;
200
197M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
197M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.87M
        end = ptr + maxcount;
205
206
197M
    switch (pattern[0]) {
207
208
189M
    case SRE_OP_IN:
209
        /* repeated set */
210
189M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
317M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
127M
            ptr++;
213
189M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
2.37M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
2.37M
        chr = pattern[1];
232
2.37M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
2.37M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
2.63M
        while (ptr < end && *ptr == c)
240
267k
            ptr++;
241
2.37M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
5.05M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
5.05M
        chr = pattern[1];
270
5.05M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
5.05M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
20.8M
        while (ptr < end && *ptr != c)
278
15.8M
            ptr++;
279
5.05M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
197M
    }
319
320
197M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
197M
           ptr - (SRE_CHAR*) state->ptr));
322
197M
    return ptr - (SRE_CHAR*) state->ptr;
323
197M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
791M
    do { \
355
791M
        ctx->lastmark = state->lastmark; \
356
791M
        ctx->lastindex = state->lastindex; \
357
791M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
281M
    do { \
360
281M
        state->lastmark = ctx->lastmark; \
361
281M
        state->lastindex = ctx->lastindex; \
362
281M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
322M
    do { \
366
322M
        TRACE(("push last_ptr: %zd", \
367
322M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
322M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
322M
    } while (0)
370
#define LAST_PTR_POP()  \
371
322M
    do { \
372
322M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
322M
        TRACE(("pop last_ptr: %zd", \
374
322M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
322M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
713M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
1.14G
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.65G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
183M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
122M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.85G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.85G
do { \
390
1.85G
    alloc_pos = state->data_stack_base; \
391
1.85G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.85G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.85G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
182M
        int j = data_stack_grow(state, sizeof(type)); \
395
182M
        if (j < 0) return j; \
396
182M
        if (ctx_pos != -1) \
397
182M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
182M
    } \
399
1.85G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.85G
    state->data_stack_base += sizeof(type); \
401
1.85G
} while (0)
402
403
1.93G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.93G
do { \
405
1.93G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.93G
    ptr = (type*)(state->data_stack+pos); \
407
1.93G
} while (0)
408
409
713M
#define DATA_STACK_PUSH(state, data, size) \
410
713M
do { \
411
713M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
713M
           data, state->data_stack_base, size)); \
413
713M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
41.0k
        int j = data_stack_grow(state, size); \
415
41.0k
        if (j < 0) return j; \
416
41.0k
        if (ctx_pos != -1) \
417
41.0k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
41.0k
    } \
419
713M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
713M
    state->data_stack_base += size; \
421
713M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
476M
#define DATA_STACK_POP(state, data, size, discard) \
427
476M
do { \
428
476M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
476M
           data, state->data_stack_base-size, size)); \
430
476M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
476M
    if (discard) \
432
476M
        state->data_stack_base -= size; \
433
476M
} while (0)
434
435
2.09G
#define DATA_STACK_POP_DISCARD(state, size) \
436
2.09G
do { \
437
2.09G
    TRACE(("discard data from %zd (%zd)\n", \
438
2.09G
           state->data_stack_base-size, size)); \
439
2.09G
    state->data_stack_base -= size; \
440
2.09G
} while(0)
441
442
#define DATA_PUSH(x) \
443
322M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
322M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.85G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.85G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.93G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
654M
    do if (lastmark >= 0) { \
473
390M
        MARK_TRACE("push", (lastmark)); \
474
390M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
390M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
654M
    } while (0)
477
#define MARK_POP(lastmark) \
478
208M
    do if (lastmark >= 0) { \
479
150M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
150M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
150M
        MARK_TRACE("pop", (lastmark)); \
482
208M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
2.65M
    do if (lastmark >= 0) { \
485
2.65M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
2.65M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
2.65M
        MARK_TRACE("pop keep", (lastmark)); \
488
2.65M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
445M
    do if (lastmark >= 0) { \
491
239M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
239M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
239M
        MARK_TRACE("pop discard", (lastmark)); \
494
445M
    } while (0)
495
496
591M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
322M
#define JUMP_MAX_UNTIL_2     2
499
183M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
182M
#define JUMP_REPEAT          7
504
12.1M
#define JUMP_REPEAT_ONE_1    8
505
222M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
176M
#define JUMP_BRANCH          11
508
122M
#define JUMP_ASSERT          12
509
40.7M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
1.26G
    ctx->pattern = pattern; \
516
1.26G
    ctx->ptr = ptr; \
517
1.26G
    DATA_ALLOC(SRE(match_context), nextctx); \
518
1.26G
    nextctx->pattern = nextpattern; \
519
1.26G
    nextctx->toplevel = toplevel_; \
520
1.26G
    nextctx->jump = jumpvalue; \
521
1.26G
    nextctx->last_ctx_pos = ctx_pos; \
522
1.26G
    pattern = nextpattern; \
523
1.26G
    ctx_pos = alloc_pos; \
524
1.26G
    ctx = nextctx; \
525
1.26G
    goto entrance; \
526
1.26G
    jumplabel: \
527
1.26G
    pattern = ctx->pattern; \
528
1.26G
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
1.10G
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
163M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
3.24G
    do {                                                           \
553
3.24G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
3.24G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
3.24G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
3.36G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
3.24G
        do {                               \
588
3.24G
            MAYBE_CHECK_SIGNALS;           \
589
3.24G
            goto *sre_targets[*pattern++]; \
590
3.24G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
591M
{
601
591M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
591M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
591M
    Py_ssize_t ret = 0;
604
591M
    int jump;
605
591M
    unsigned int sigcount = state->sigcount;
606
607
591M
    SRE(match_context)* ctx;
608
591M
    SRE(match_context)* nextctx;
609
591M
    INIT_TRACE(state);
610
611
591M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
591M
    DATA_ALLOC(SRE(match_context), ctx);
614
591M
    ctx->last_ctx_pos = -1;
615
591M
    ctx->jump = JUMP_NONE;
616
591M
    ctx->toplevel = toplevel;
617
591M
    ctx_pos = alloc_pos;
618
619
591M
#if USE_COMPUTED_GOTOS
620
591M
#include "sre_targets.h"
621
591M
#endif
622
623
1.85G
entrance:
624
625
1.85G
    ;  // Fashion statement.
626
1.85G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.85G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
98.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.68M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.68M
                   end - ptr, (size_t) pattern[3]));
634
6.68M
            RETURN_FAILURE;
635
6.68M
        }
636
91.8M
        pattern += pattern[1] + 1;
637
91.8M
    }
638
639
1.84G
#if USE_COMPUTED_GOTOS
640
1.84G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.84G
    {
647
648
1.84G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
651M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
651M
                   ptr, pattern[0]));
653
651M
            {
654
651M
                int i = pattern[0];
655
651M
                if (i & 1)
656
96.1M
                    state->lastindex = i/2 + 1;
657
651M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
641M
                    int j = state->lastmark + 1;
663
650M
                    while (j < i)
664
8.55M
                        state->mark[j++] = NULL;
665
641M
                    state->lastmark = i;
666
641M
                }
667
651M
                state->mark[i] = ptr;
668
651M
            }
669
651M
            pattern++;
670
651M
            DISPATCH;
671
672
651M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
272M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
272M
                   ptr, *pattern));
677
272M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
162M
                RETURN_FAILURE;
679
110M
            pattern++;
680
110M
            ptr++;
681
110M
            DISPATCH;
682
683
110M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
282M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
282M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
282M
            if (ctx->toplevel &&
698
282M
                ((state->match_all && ptr != state->end) ||
699
81.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
282M
            state->ptr = ptr;
704
282M
            RETURN_SUCCESS;
705
706
14.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.1M
            if (!SRE(at)(state, ptr, *pattern))
711
3.98M
                RETURN_FAILURE;
712
10.1M
            pattern++;
713
10.1M
            DISPATCH;
714
715
10.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
359M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
359M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
359M
            if (ptr >= end ||
749
359M
                !SRE(charset)(state, pattern + 1, *ptr))
750
5.72M
                RETURN_FAILURE;
751
354M
            pattern += pattern[0];
752
354M
            ptr++;
753
354M
            DISPATCH;
754
755
354M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.26M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.26M
                   pattern, ptr, pattern[0]));
758
4.26M
            if (ptr >= end ||
759
4.26M
                sre_lower_ascii(*ptr) != *pattern)
760
270k
                RETURN_FAILURE;
761
3.99M
            pattern++;
762
3.99M
            ptr++;
763
3.99M
            DISPATCH;
764
765
3.99M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
111M
        TARGET(SRE_OP_JUMP):
845
111M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
111M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
111M
                   ptr, pattern[0]));
850
111M
            pattern += pattern[0];
851
111M
            DISPATCH;
852
853
194M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
194M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
194M
            LASTMARK_SAVE();
858
194M
            if (state->repeat)
859
147M
                MARK_PUSH(ctx->lastmark);
860
460M
            for (; pattern[0]; pattern += pattern[0]) {
861
375M
                if (pattern[1] == SRE_OP_LITERAL &&
862
375M
                    (ptr >= end ||
863
177M
                     (SRE_CODE) *ptr != pattern[2]))
864
98.7M
                    continue;
865
276M
                if (pattern[1] == SRE_OP_IN &&
866
276M
                    (ptr >= end ||
867
138M
                     !SRE(charset)(state, pattern + 3,
868
138M
                                   (SRE_CODE) *ptr)))
869
100M
                    continue;
870
176M
                state->ptr = ptr;
871
176M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
176M
                if (ret) {
873
108M
                    if (state->repeat)
874
89.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
108M
                    RETURN_ON_ERROR(ret);
876
108M
                    RETURN_SUCCESS;
877
108M
                }
878
67.7M
                if (state->repeat)
879
18.9k
                    MARK_POP_KEEP(ctx->lastmark);
880
67.7M
                LASTMARK_RESTORE();
881
67.7M
            }
882
85.4M
            if (state->repeat)
883
58.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
85.4M
            RETURN_FAILURE;
885
886
674M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
674M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
674M
                   pattern[1], pattern[2]));
898
899
674M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.07M
                RETURN_FAILURE; /* cannot match */
901
902
673M
            state->ptr = ptr;
903
904
673M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
673M
            RETURN_ON_ERROR(ret);
906
673M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
673M
            ctx->count = ret;
908
673M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
673M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
440M
                RETURN_FAILURE;
917
918
233M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
233M
                ptr == state->end &&
920
233M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
78.9k
            {
922
                /* tail is empty.  we're finished */
923
78.9k
                state->ptr = ptr;
924
78.9k
                RETURN_SUCCESS;
925
78.9k
            }
926
927
233M
            LASTMARK_SAVE();
928
233M
            if (state->repeat)
929
143M
                MARK_PUSH(ctx->lastmark);
930
931
233M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
13.8M
                ctx->u.chr = pattern[pattern[0]+1];
935
13.8M
                for (;;) {
936
32.3M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
32.3M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
18.5M
                        ptr--;
939
18.5M
                        ctx->count--;
940
18.5M
                    }
941
13.8M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.68M
                        break;
943
12.1M
                    state->ptr = ptr;
944
12.1M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
12.1M
                            pattern+pattern[0]);
946
12.1M
                    if (ret) {
947
12.1M
                        if (state->repeat)
948
10.7M
                            MARK_POP_DISCARD(ctx->lastmark);
949
12.1M
                        RETURN_ON_ERROR(ret);
950
12.1M
                        RETURN_SUCCESS;
951
12.1M
                    }
952
8.82k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
8.82k
                    LASTMARK_RESTORE();
955
956
8.82k
                    ptr--;
957
8.82k
                    ctx->count--;
958
8.82k
                }
959
1.68M
                if (state->repeat)
960
560
                    MARK_POP_DISCARD(ctx->lastmark);
961
219M
            } else {
962
                /* general case */
963
224M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
222M
                    state->ptr = ptr;
965
222M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
222M
                            pattern+pattern[0]);
967
222M
                    if (ret) {
968
218M
                        if (state->repeat)
969
131M
                            MARK_POP_DISCARD(ctx->lastmark);
970
218M
                        RETURN_ON_ERROR(ret);
971
218M
                        RETURN_SUCCESS;
972
218M
                    }
973
4.39M
                    if (state->repeat)
974
2.63M
                        MARK_POP_KEEP(ctx->lastmark);
975
4.39M
                    LASTMARK_RESTORE();
976
977
4.39M
                    ptr--;
978
4.39M
                    ctx->count--;
979
4.39M
                }
980
1.55M
                if (state->repeat)
981
1.36M
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.55M
            }
983
3.24M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
182M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
182M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
182M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
182M
            ctx->u.rep = repeat_pool_malloc(state);
1127
182M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
182M
            ctx->u.rep->count = -1;
1131
182M
            ctx->u.rep->pattern = pattern;
1132
182M
            ctx->u.rep->prev = state->repeat;
1133
182M
            ctx->u.rep->last_ptr = NULL;
1134
182M
            state->repeat = ctx->u.rep;
1135
1136
182M
            state->ptr = ptr;
1137
182M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
182M
            state->repeat = ctx->u.rep->prev;
1139
182M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
182M
            if (ret) {
1142
182M
                RETURN_ON_ERROR(ret);
1143
182M
                RETURN_SUCCESS;
1144
182M
            }
1145
102k
            RETURN_FAILURE;
1146
1147
338M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
338M
            ctx->u.rep = state->repeat;
1155
338M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
338M
            state->ptr = ptr;
1159
1160
338M
            ctx->count = ctx->u.rep->count+1;
1161
1162
338M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
338M
                   ptr, ctx->count));
1164
1165
338M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
338M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
338M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
338M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
322M
                ctx->u.rep->count = ctx->count;
1185
322M
                LASTMARK_SAVE();
1186
322M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
322M
                LAST_PTR_PUSH();
1189
322M
                ctx->u.rep->last_ptr = state->ptr;
1190
322M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
322M
                        ctx->u.rep->pattern+3);
1192
322M
                LAST_PTR_POP();
1193
322M
                if (ret) {
1194
154M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
154M
                    RETURN_ON_ERROR(ret);
1196
154M
                    RETURN_SUCCESS;
1197
154M
                }
1198
168M
                MARK_POP(ctx->lastmark);
1199
168M
                LASTMARK_RESTORE();
1200
168M
                ctx->u.rep->count = ctx->count-1;
1201
168M
                state->ptr = ptr;
1202
168M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
183M
            state->repeat = ctx->u.rep->prev;
1207
183M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
183M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
183M
            RETURN_ON_SUCCESS(ret);
1211
1.42M
            state->ptr = ptr;
1212
1.42M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
122M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
122M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
122M
                   ptr, pattern[1]));
1565
122M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
122M
            state->ptr = ptr - pattern[1];
1568
122M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
122M
            RETURN_ON_FAILURE(ret);
1570
119M
            pattern += pattern[0];
1571
119M
            DISPATCH;
1572
1573
119M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
40.7M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
40.7M
                   ptr, pattern[1]));
1578
40.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
40.7M
                state->ptr = ptr - pattern[1];
1580
40.7M
                LASTMARK_SAVE();
1581
40.7M
                if (state->repeat)
1582
40.7M
                    MARK_PUSH(ctx->lastmark);
1583
1584
81.5M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
81.5M
                if (ret) {
1586
10.7k
                    if (state->repeat)
1587
10.7k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
10.7k
                    RETURN_ON_ERROR(ret);
1589
10.7k
                    RETURN_FAILURE;
1590
10.7k
                }
1591
40.7M
                if (state->repeat)
1592
40.7M
                    MARK_POP(ctx->lastmark);
1593
40.7M
                LASTMARK_RESTORE();
1594
40.7M
            }
1595
40.7M
            pattern += pattern[0];
1596
40.7M
            DISPATCH;
1597
1598
40.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.85G
exit:
1620
1.85G
    ctx_pos = ctx->last_ctx_pos;
1621
1.85G
    jump = ctx->jump;
1622
1.85G
    DATA_POP_DISCARD(ctx);
1623
1.85G
    if (ctx_pos == -1) {
1624
591M
        state->sigcount = sigcount;
1625
591M
        return ret;
1626
591M
    }
1627
1.26G
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
1.26G
    switch (jump) {
1630
322M
        case JUMP_MAX_UNTIL_2:
1631
322M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
322M
            goto jump_max_until_2;
1633
183M
        case JUMP_MAX_UNTIL_3:
1634
183M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
183M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
176M
        case JUMP_BRANCH:
1643
176M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
176M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
182M
        case JUMP_REPEAT:
1658
182M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
182M
            goto jump_repeat;
1660
12.1M
        case JUMP_REPEAT_ONE_1:
1661
12.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
12.1M
            goto jump_repeat_one_1;
1663
222M
        case JUMP_REPEAT_ONE_2:
1664
222M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
222M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
122M
        case JUMP_ASSERT:
1673
122M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
122M
            goto jump_assert;
1675
40.7M
        case JUMP_ASSERT_NOT:
1676
40.7M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
40.7M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
1.26G
    }
1683
1684
0
    return ret; /* should never get here */
1685
1.26G
}
sre.c:sre_ucs1_match
Line
Count
Source
600
196M
{
601
196M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
196M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
196M
    Py_ssize_t ret = 0;
604
196M
    int jump;
605
196M
    unsigned int sigcount = state->sigcount;
606
607
196M
    SRE(match_context)* ctx;
608
196M
    SRE(match_context)* nextctx;
609
196M
    INIT_TRACE(state);
610
611
196M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
196M
    DATA_ALLOC(SRE(match_context), ctx);
614
196M
    ctx->last_ctx_pos = -1;
615
196M
    ctx->jump = JUMP_NONE;
616
196M
    ctx->toplevel = toplevel;
617
196M
    ctx_pos = alloc_pos;
618
619
196M
#if USE_COMPUTED_GOTOS
620
196M
#include "sre_targets.h"
621
196M
#endif
622
623
342M
entrance:
624
625
342M
    ;  // Fashion statement.
626
342M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
342M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
31.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
6.68M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
6.68M
                   end - ptr, (size_t) pattern[3]));
634
6.68M
            RETURN_FAILURE;
635
6.68M
        }
636
24.8M
        pattern += pattern[1] + 1;
637
24.8M
    }
638
639
335M
#if USE_COMPUTED_GOTOS
640
335M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
335M
    {
647
648
335M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
174M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
174M
                   ptr, pattern[0]));
653
174M
            {
654
174M
                int i = pattern[0];
655
174M
                if (i & 1)
656
19.9M
                    state->lastindex = i/2 + 1;
657
174M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
170M
                    int j = state->lastmark + 1;
663
174M
                    while (j < i)
664
3.64M
                        state->mark[j++] = NULL;
665
170M
                    state->lastmark = i;
666
170M
                }
667
174M
                state->mark[i] = ptr;
668
174M
            }
669
174M
            pattern++;
670
174M
            DISPATCH;
671
672
174M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
53.7M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
53.7M
                   ptr, *pattern));
677
53.7M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
18.9M
                RETURN_FAILURE;
679
34.8M
            pattern++;
680
34.8M
            ptr++;
681
34.8M
            DISPATCH;
682
683
34.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
50.1M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
50.1M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
50.1M
            if (ctx->toplevel &&
698
50.1M
                ((state->match_all && ptr != state->end) ||
699
17.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
50.1M
            state->ptr = ptr;
704
50.1M
            RETURN_SUCCESS;
705
706
12.4M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.4M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.4M
            if (!SRE(at)(state, ptr, *pattern))
711
2.33M
                RETURN_FAILURE;
712
10.0M
            pattern++;
713
10.0M
            DISPATCH;
714
715
10.0M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
34.8M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
34.8M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
34.8M
            if (ptr >= end ||
749
34.8M
                !SRE(charset)(state, pattern + 1, *ptr))
750
285k
                RETURN_FAILURE;
751
34.5M
            pattern += pattern[0];
752
34.5M
            ptr++;
753
34.5M
            DISPATCH;
754
755
34.5M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
988k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
988k
                   pattern, ptr, pattern[0]));
758
988k
            if (ptr >= end ||
759
988k
                sre_lower_ascii(*ptr) != *pattern)
760
173k
                RETURN_FAILURE;
761
815k
            pattern++;
762
815k
            ptr++;
763
815k
            DISPATCH;
764
765
815k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
23.6M
        TARGET(SRE_OP_JUMP):
845
23.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
23.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
23.6M
                   ptr, pattern[0]));
850
23.6M
            pattern += pattern[0];
851
23.6M
            DISPATCH;
852
853
47.2M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
47.2M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
47.2M
            LASTMARK_SAVE();
858
47.2M
            if (state->repeat)
859
7.42M
                MARK_PUSH(ctx->lastmark);
860
143M
            for (; pattern[0]; pattern += pattern[0]) {
861
118M
                if (pattern[1] == SRE_OP_LITERAL &&
862
118M
                    (ptr >= end ||
863
56.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
27.3M
                    continue;
865
90.7M
                if (pattern[1] == SRE_OP_IN &&
866
90.7M
                    (ptr >= end ||
867
8.95M
                     !SRE(charset)(state, pattern + 3,
868
8.95M
                                   (SRE_CODE) *ptr)))
869
5.30M
                    continue;
870
85.4M
                state->ptr = ptr;
871
85.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
85.4M
                if (ret) {
873
22.0M
                    if (state->repeat)
874
7.03M
                        MARK_POP_DISCARD(ctx->lastmark);
875
22.0M
                    RETURN_ON_ERROR(ret);
876
22.0M
                    RETURN_SUCCESS;
877
22.0M
                }
878
63.3M
                if (state->repeat)
879
8.78k
                    MARK_POP_KEEP(ctx->lastmark);
880
63.3M
                LASTMARK_RESTORE();
881
63.3M
            }
882
25.2M
            if (state->repeat)
883
396k
                MARK_POP_DISCARD(ctx->lastmark);
884
25.2M
            RETURN_FAILURE;
885
886
190M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
190M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
190M
                   pattern[1], pattern[2]));
898
899
190M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
832k
                RETURN_FAILURE; /* cannot match */
901
902
190M
            state->ptr = ptr;
903
904
190M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
190M
            RETURN_ON_ERROR(ret);
906
190M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
190M
            ctx->count = ret;
908
190M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
190M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
164M
                RETURN_FAILURE;
917
918
25.7M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
25.7M
                ptr == state->end &&
920
25.7M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
59.1k
            {
922
                /* tail is empty.  we're finished */
923
59.1k
                state->ptr = ptr;
924
59.1k
                RETURN_SUCCESS;
925
59.1k
            }
926
927
25.7M
            LASTMARK_SAVE();
928
25.7M
            if (state->repeat)
929
10.8M
                MARK_PUSH(ctx->lastmark);
930
931
25.7M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
3.07M
                ctx->u.chr = pattern[pattern[0]+1];
935
3.07M
                for (;;) {
936
11.8M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
11.8M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
8.80M
                        ptr--;
939
8.80M
                        ctx->count--;
940
8.80M
                    }
941
3.07M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.63M
                        break;
943
1.44M
                    state->ptr = ptr;
944
1.44M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
1.44M
                            pattern+pattern[0]);
946
1.44M
                    if (ret) {
947
1.44M
                        if (state->repeat)
948
126k
                            MARK_POP_DISCARD(ctx->lastmark);
949
1.44M
                        RETURN_ON_ERROR(ret);
950
1.44M
                        RETURN_SUCCESS;
951
1.44M
                    }
952
291
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
291
                    LASTMARK_RESTORE();
955
956
291
                    ptr--;
957
291
                    ctx->count--;
958
291
                }
959
1.63M
                if (state->repeat)
960
124
                    MARK_POP_DISCARD(ctx->lastmark);
961
22.6M
            } else {
962
                /* general case */
963
24.6M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
23.7M
                    state->ptr = ptr;
965
23.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
23.7M
                            pattern+pattern[0]);
967
23.7M
                    if (ret) {
968
21.7M
                        if (state->repeat)
969
10.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
21.7M
                        RETURN_ON_ERROR(ret);
971
21.7M
                        RETURN_SUCCESS;
972
21.7M
                    }
973
2.00M
                    if (state->repeat)
974
1.23M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.00M
                    LASTMARK_RESTORE();
976
977
2.00M
                    ptr--;
978
2.00M
                    ctx->count--;
979
2.00M
                }
980
852k
                if (state->repeat)
981
665k
                    MARK_POP_DISCARD(ctx->lastmark);
982
852k
            }
983
2.48M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
6.43M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
6.43M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
6.43M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
6.43M
            ctx->u.rep = repeat_pool_malloc(state);
1127
6.43M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
6.43M
            ctx->u.rep->count = -1;
1131
6.43M
            ctx->u.rep->pattern = pattern;
1132
6.43M
            ctx->u.rep->prev = state->repeat;
1133
6.43M
            ctx->u.rep->last_ptr = NULL;
1134
6.43M
            state->repeat = ctx->u.rep;
1135
1136
6.43M
            state->ptr = ptr;
1137
6.43M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
6.43M
            state->repeat = ctx->u.rep->prev;
1139
6.43M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
6.43M
            if (ret) {
1142
6.33M
                RETURN_ON_ERROR(ret);
1143
6.33M
                RETURN_SUCCESS;
1144
6.33M
            }
1145
100k
            RETURN_FAILURE;
1146
1147
19.0M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
19.0M
            ctx->u.rep = state->repeat;
1155
19.0M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
19.0M
            state->ptr = ptr;
1159
1160
19.0M
            ctx->count = ctx->u.rep->count+1;
1161
1162
19.0M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
19.0M
                   ptr, ctx->count));
1164
1165
19.0M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
19.0M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
19.0M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
19.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
15.3M
                ctx->u.rep->count = ctx->count;
1185
15.3M
                LASTMARK_SAVE();
1186
15.3M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
15.3M
                LAST_PTR_PUSH();
1189
15.3M
                ctx->u.rep->last_ptr = state->ptr;
1190
15.3M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
15.3M
                        ctx->u.rep->pattern+3);
1192
15.3M
                LAST_PTR_POP();
1193
15.3M
                if (ret) {
1194
12.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
12.0M
                    RETURN_ON_ERROR(ret);
1196
12.0M
                    RETURN_SUCCESS;
1197
12.0M
                }
1198
3.29M
                MARK_POP(ctx->lastmark);
1199
3.29M
                LASTMARK_RESTORE();
1200
3.29M
                ctx->u.rep->count = ctx->count-1;
1201
3.29M
                state->ptr = ptr;
1202
3.29M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
7.06M
            state->repeat = ctx->u.rep->prev;
1207
7.06M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
7.06M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
7.06M
            RETURN_ON_SUCCESS(ret);
1211
725k
            state->ptr = ptr;
1212
725k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
2.48M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
2.48M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
2.48M
                   ptr, pattern[1]));
1565
2.48M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
2.48M
            state->ptr = ptr - pattern[1];
1568
2.48M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
2.48M
            RETURN_ON_FAILURE(ret);
1570
2.43M
            pattern += pattern[0];
1571
2.43M
            DISPATCH;
1572
1573
3.36M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
3.36M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
3.36M
                   ptr, pattern[1]));
1578
3.36M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
3.36M
                state->ptr = ptr - pattern[1];
1580
3.36M
                LASTMARK_SAVE();
1581
3.36M
                if (state->repeat)
1582
3.36M
                    MARK_PUSH(ctx->lastmark);
1583
1584
6.73M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
6.73M
                if (ret) {
1586
1.04k
                    if (state->repeat)
1587
1.04k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.04k
                    RETURN_ON_ERROR(ret);
1589
1.04k
                    RETURN_FAILURE;
1590
1.04k
                }
1591
3.36M
                if (state->repeat)
1592
3.36M
                    MARK_POP(ctx->lastmark);
1593
3.36M
                LASTMARK_RESTORE();
1594
3.36M
            }
1595
3.36M
            pattern += pattern[0];
1596
3.36M
            DISPATCH;
1597
1598
3.36M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
342M
exit:
1620
342M
    ctx_pos = ctx->last_ctx_pos;
1621
342M
    jump = ctx->jump;
1622
342M
    DATA_POP_DISCARD(ctx);
1623
342M
    if (ctx_pos == -1) {
1624
196M
        state->sigcount = sigcount;
1625
196M
        return ret;
1626
196M
    }
1627
145M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
145M
    switch (jump) {
1630
15.3M
        case JUMP_MAX_UNTIL_2:
1631
15.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
15.3M
            goto jump_max_until_2;
1633
7.06M
        case JUMP_MAX_UNTIL_3:
1634
7.06M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
7.06M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
85.4M
        case JUMP_BRANCH:
1643
85.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
85.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
6.43M
        case JUMP_REPEAT:
1658
6.43M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
6.43M
            goto jump_repeat;
1660
1.44M
        case JUMP_REPEAT_ONE_1:
1661
1.44M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
1.44M
            goto jump_repeat_one_1;
1663
23.7M
        case JUMP_REPEAT_ONE_2:
1664
23.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
23.7M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
2.48M
        case JUMP_ASSERT:
1673
2.48M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
2.48M
            goto jump_assert;
1675
3.36M
        case JUMP_ASSERT_NOT:
1676
3.36M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
3.36M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
145M
    }
1683
1684
0
    return ret; /* should never get here */
1685
145M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
273M
{
601
273M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
273M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
273M
    Py_ssize_t ret = 0;
604
273M
    int jump;
605
273M
    unsigned int sigcount = state->sigcount;
606
607
273M
    SRE(match_context)* ctx;
608
273M
    SRE(match_context)* nextctx;
609
273M
    INIT_TRACE(state);
610
611
273M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
273M
    DATA_ALLOC(SRE(match_context), ctx);
614
273M
    ctx->last_ctx_pos = -1;
615
273M
    ctx->jump = JUMP_NONE;
616
273M
    ctx->toplevel = toplevel;
617
273M
    ctx_pos = alloc_pos;
618
619
273M
#if USE_COMPUTED_GOTOS
620
273M
#include "sre_targets.h"
621
273M
#endif
622
623
744M
entrance:
624
625
744M
    ;  // Fashion statement.
626
744M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
744M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
31.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
709
            TRACE(("reject (got %tu chars, need %zu)\n",
633
709
                   end - ptr, (size_t) pattern[3]));
634
709
            RETURN_FAILURE;
635
709
        }
636
31.1M
        pattern += pattern[1] + 1;
637
31.1M
    }
638
639
744M
#if USE_COMPUTED_GOTOS
640
744M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
744M
    {
647
648
744M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
280M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
280M
                   ptr, pattern[0]));
653
280M
            {
654
280M
                int i = pattern[0];
655
280M
                if (i & 1)
656
32.8M
                    state->lastindex = i/2 + 1;
657
280M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
277M
                    int j = state->lastmark + 1;
663
280M
                    while (j < i)
664
2.50M
                        state->mark[j++] = NULL;
665
277M
                    state->lastmark = i;
666
277M
                }
667
280M
                state->mark[i] = ptr;
668
280M
            }
669
280M
            pattern++;
670
280M
            DISPATCH;
671
672
280M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
107M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
107M
                   ptr, *pattern));
677
107M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
63.0M
                RETURN_FAILURE;
679
43.9M
            pattern++;
680
43.9M
            ptr++;
681
43.9M
            DISPATCH;
682
683
43.9M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
120M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
120M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
120M
            if (ctx->toplevel &&
698
120M
                ((state->match_all && ptr != state->end) ||
699
28.6M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
120M
            state->ptr = ptr;
704
120M
            RETURN_SUCCESS;
705
706
1.12M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
1.12M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
1.12M
            if (!SRE(at)(state, ptr, *pattern))
711
1.08M
                RETURN_FAILURE;
712
32.9k
            pattern++;
713
32.9k
            DISPATCH;
714
715
32.9k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
151M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
151M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
151M
            if (ptr >= end ||
749
151M
                !SRE(charset)(state, pattern + 1, *ptr))
750
4.23M
                RETURN_FAILURE;
751
147M
            pattern += pattern[0];
752
147M
            ptr++;
753
147M
            DISPATCH;
754
755
147M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.79M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.79M
                   pattern, ptr, pattern[0]));
758
2.79M
            if (ptr >= end ||
759
2.79M
                sre_lower_ascii(*ptr) != *pattern)
760
75.8k
                RETURN_FAILURE;
761
2.71M
            pattern++;
762
2.71M
            ptr++;
763
2.71M
            DISPATCH;
764
765
2.71M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
38.6M
        TARGET(SRE_OP_JUMP):
845
38.6M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
38.6M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
38.6M
                   ptr, pattern[0]));
850
38.6M
            pattern += pattern[0];
851
38.6M
            DISPATCH;
852
853
63.0M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
63.0M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
63.0M
            LASTMARK_SAVE();
858
63.0M
            if (state->repeat)
859
59.5M
                MARK_PUSH(ctx->lastmark);
860
141M
            for (; pattern[0]; pattern += pattern[0]) {
861
116M
                if (pattern[1] == SRE_OP_LITERAL &&
862
116M
                    (ptr >= end ||
863
57.9M
                     (SRE_CODE) *ptr != pattern[2]))
864
28.6M
                    continue;
865
87.4M
                if (pattern[1] == SRE_OP_IN &&
866
87.4M
                    (ptr >= end ||
867
53.9M
                     !SRE(charset)(state, pattern + 3,
868
53.9M
                                   (SRE_CODE) *ptr)))
869
45.9M
                    continue;
870
41.4M
                state->ptr = ptr;
871
41.4M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
41.4M
                if (ret) {
873
38.1M
                    if (state->repeat)
874
36.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
38.1M
                    RETURN_ON_ERROR(ret);
876
38.1M
                    RETURN_SUCCESS;
877
38.1M
                }
878
3.29M
                if (state->repeat)
879
4.15k
                    MARK_POP_KEEP(ctx->lastmark);
880
3.29M
                LASTMARK_RESTORE();
881
3.29M
            }
882
24.8M
            if (state->repeat)
883
23.3M
                MARK_POP_DISCARD(ctx->lastmark);
884
24.8M
            RETURN_FAILURE;
885
886
286M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
286M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
286M
                   pattern[1], pattern[2]));
898
899
286M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
211k
                RETURN_FAILURE; /* cannot match */
901
902
286M
            state->ptr = ptr;
903
904
286M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
286M
            RETURN_ON_ERROR(ret);
906
286M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
286M
            ctx->count = ret;
908
286M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
286M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
197M
                RETURN_FAILURE;
917
918
89.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
89.4M
                ptr == state->end &&
920
89.4M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
15.9k
            {
922
                /* tail is empty.  we're finished */
923
15.9k
                state->ptr = ptr;
924
15.9k
                RETURN_SUCCESS;
925
15.9k
            }
926
927
89.4M
            LASTMARK_SAVE();
928
89.4M
            if (state->repeat)
929
54.6M
                MARK_PUSH(ctx->lastmark);
930
931
89.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.64M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.64M
                for (;;) {
936
12.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
12.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
6.83M
                        ptr--;
939
6.83M
                        ctx->count--;
940
6.83M
                    }
941
5.64M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
21.7k
                        break;
943
5.62M
                    state->ptr = ptr;
944
5.62M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.62M
                            pattern+pattern[0]);
946
5.62M
                    if (ret) {
947
5.61M
                        if (state->repeat)
948
5.59M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.61M
                        RETURN_ON_ERROR(ret);
950
5.61M
                        RETURN_SUCCESS;
951
5.61M
                    }
952
3.89k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
3.89k
                    LASTMARK_RESTORE();
955
956
3.89k
                    ptr--;
957
3.89k
                    ctx->count--;
958
3.89k
                }
959
21.7k
                if (state->repeat)
960
217
                    MARK_POP_DISCARD(ctx->lastmark);
961
83.8M
            } else {
962
                /* general case */
963
85.1M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
84.5M
                    state->ptr = ptr;
965
84.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
84.5M
                            pattern+pattern[0]);
967
84.5M
                    if (ret) {
968
83.2M
                        if (state->repeat)
969
48.4M
                            MARK_POP_DISCARD(ctx->lastmark);
970
83.2M
                        RETURN_ON_ERROR(ret);
971
83.2M
                        RETURN_SUCCESS;
972
83.2M
                    }
973
1.30M
                    if (state->repeat)
974
1.23M
                        MARK_POP_KEEP(ctx->lastmark);
975
1.30M
                    LASTMARK_RESTORE();
976
977
1.30M
                    ptr--;
978
1.30M
                    ctx->count--;
979
1.30M
                }
980
622k
                if (state->repeat)
981
619k
                    MARK_POP_DISCARD(ctx->lastmark);
982
622k
            }
983
644k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
70.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
70.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
70.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
70.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
70.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
70.8M
            ctx->u.rep->count = -1;
1131
70.8M
            ctx->u.rep->pattern = pattern;
1132
70.8M
            ctx->u.rep->prev = state->repeat;
1133
70.8M
            ctx->u.rep->last_ptr = NULL;
1134
70.8M
            state->repeat = ctx->u.rep;
1135
1136
70.8M
            state->ptr = ptr;
1137
70.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
70.8M
            state->repeat = ctx->u.rep->prev;
1139
70.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
70.8M
            if (ret) {
1142
70.8M
                RETURN_ON_ERROR(ret);
1143
70.8M
                RETURN_SUCCESS;
1144
70.8M
            }
1145
971
            RETURN_FAILURE;
1146
1147
131M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
131M
            ctx->u.rep = state->repeat;
1155
131M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
131M
            state->ptr = ptr;
1159
1160
131M
            ctx->count = ctx->u.rep->count+1;
1161
1162
131M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
131M
                   ptr, ctx->count));
1164
1165
131M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
131M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
131M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
131M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
125M
                ctx->u.rep->count = ctx->count;
1185
125M
                LASTMARK_SAVE();
1186
125M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
125M
                LAST_PTR_PUSH();
1189
125M
                ctx->u.rep->last_ptr = state->ptr;
1190
125M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
125M
                        ctx->u.rep->pattern+3);
1192
125M
                LAST_PTR_POP();
1193
125M
                if (ret) {
1194
60.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
60.3M
                    RETURN_ON_ERROR(ret);
1196
60.3M
                    RETURN_SUCCESS;
1197
60.3M
                }
1198
65.0M
                MARK_POP(ctx->lastmark);
1199
65.0M
                LASTMARK_RESTORE();
1200
65.0M
                ctx->u.rep->count = ctx->count-1;
1201
65.0M
                state->ptr = ptr;
1202
65.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
71.4M
            state->repeat = ctx->u.rep->prev;
1207
71.4M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
71.4M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
71.4M
            RETURN_ON_SUCCESS(ret);
1211
620k
            state->ptr = ptr;
1212
620k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
49.1M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
49.1M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
49.1M
                   ptr, pattern[1]));
1565
49.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
49.1M
            state->ptr = ptr - pattern[1];
1568
49.1M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
49.1M
            RETURN_ON_FAILURE(ret);
1570
46.3M
            pattern += pattern[0];
1571
46.3M
            DISPATCH;
1572
1573
46.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
22.6M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
22.6M
                   ptr, pattern[1]));
1578
22.6M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
22.6M
                state->ptr = ptr - pattern[1];
1580
22.6M
                LASTMARK_SAVE();
1581
22.6M
                if (state->repeat)
1582
22.6M
                    MARK_PUSH(ctx->lastmark);
1583
1584
45.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
45.3M
                if (ret) {
1586
3.93k
                    if (state->repeat)
1587
3.93k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.93k
                    RETURN_ON_ERROR(ret);
1589
3.93k
                    RETURN_FAILURE;
1590
3.93k
                }
1591
22.6M
                if (state->repeat)
1592
22.6M
                    MARK_POP(ctx->lastmark);
1593
22.6M
                LASTMARK_RESTORE();
1594
22.6M
            }
1595
22.6M
            pattern += pattern[0];
1596
22.6M
            DISPATCH;
1597
1598
22.6M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
744M
exit:
1620
744M
    ctx_pos = ctx->last_ctx_pos;
1621
744M
    jump = ctx->jump;
1622
744M
    DATA_POP_DISCARD(ctx);
1623
744M
    if (ctx_pos == -1) {
1624
273M
        state->sigcount = sigcount;
1625
273M
        return ret;
1626
273M
    }
1627
471M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
471M
    switch (jump) {
1630
125M
        case JUMP_MAX_UNTIL_2:
1631
125M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
125M
            goto jump_max_until_2;
1633
71.4M
        case JUMP_MAX_UNTIL_3:
1634
71.4M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
71.4M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
41.4M
        case JUMP_BRANCH:
1643
41.4M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
41.4M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
70.8M
        case JUMP_REPEAT:
1658
70.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
70.8M
            goto jump_repeat;
1660
5.62M
        case JUMP_REPEAT_ONE_1:
1661
5.62M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.62M
            goto jump_repeat_one_1;
1663
84.5M
        case JUMP_REPEAT_ONE_2:
1664
84.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
84.5M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
49.1M
        case JUMP_ASSERT:
1673
49.1M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
49.1M
            goto jump_assert;
1675
22.6M
        case JUMP_ASSERT_NOT:
1676
22.6M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
22.6M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
471M
    }
1683
1684
0
    return ret; /* should never get here */
1685
471M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
121M
{
601
121M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
121M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
121M
    Py_ssize_t ret = 0;
604
121M
    int jump;
605
121M
    unsigned int sigcount = state->sigcount;
606
607
121M
    SRE(match_context)* ctx;
608
121M
    SRE(match_context)* nextctx;
609
121M
    INIT_TRACE(state);
610
611
121M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
121M
    DATA_ALLOC(SRE(match_context), ctx);
614
121M
    ctx->last_ctx_pos = -1;
615
121M
    ctx->jump = JUMP_NONE;
616
121M
    ctx->toplevel = toplevel;
617
121M
    ctx_pos = alloc_pos;
618
619
121M
#if USE_COMPUTED_GOTOS
620
121M
#include "sre_targets.h"
621
121M
#endif
622
623
768M
entrance:
624
625
768M
    ;  // Fashion statement.
626
768M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
768M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
35.8M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
315
            TRACE(("reject (got %tu chars, need %zu)\n",
633
315
                   end - ptr, (size_t) pattern[3]));
634
315
            RETURN_FAILURE;
635
315
        }
636
35.8M
        pattern += pattern[1] + 1;
637
35.8M
    }
638
639
768M
#if USE_COMPUTED_GOTOS
640
768M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
768M
    {
647
648
768M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
196M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
196M
                   ptr, pattern[0]));
653
196M
            {
654
196M
                int i = pattern[0];
655
196M
                if (i & 1)
656
43.4M
                    state->lastindex = i/2 + 1;
657
196M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
194M
                    int j = state->lastmark + 1;
663
196M
                    while (j < i)
664
2.40M
                        state->mark[j++] = NULL;
665
194M
                    state->lastmark = i;
666
194M
                }
667
196M
                state->mark[i] = ptr;
668
196M
            }
669
196M
            pattern++;
670
196M
            DISPATCH;
671
672
196M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
111M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
111M
                   ptr, *pattern));
677
111M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
80.3M
                RETURN_FAILURE;
679
31.5M
            pattern++;
680
31.5M
            ptr++;
681
31.5M
            DISPATCH;
682
683
31.5M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
112M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
112M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
112M
            if (ctx->toplevel &&
698
112M
                ((state->match_all && ptr != state->end) ||
699
35.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
112M
            state->ptr = ptr;
704
112M
            RETURN_SUCCESS;
705
706
568k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
568k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
568k
            if (!SRE(at)(state, ptr, *pattern))
711
563k
                RETURN_FAILURE;
712
5.42k
            pattern++;
713
5.42k
            DISPATCH;
714
715
5.42k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
173M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
173M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
173M
            if (ptr >= end ||
749
173M
                !SRE(charset)(state, pattern + 1, *ptr))
750
1.20M
                RETURN_FAILURE;
751
172M
            pattern += pattern[0];
752
172M
            ptr++;
753
172M
            DISPATCH;
754
755
172M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
479k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
479k
                   pattern, ptr, pattern[0]));
758
479k
            if (ptr >= end ||
759
479k
                sre_lower_ascii(*ptr) != *pattern)
760
21.3k
                RETURN_FAILURE;
761
458k
            pattern++;
762
458k
            ptr++;
763
458k
            DISPATCH;
764
765
458k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
48.9M
        TARGET(SRE_OP_JUMP):
845
48.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
48.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
48.9M
                   ptr, pattern[0]));
850
48.9M
            pattern += pattern[0];
851
48.9M
            DISPATCH;
852
853
83.8M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
83.8M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
83.8M
            LASTMARK_SAVE();
858
83.8M
            if (state->repeat)
859
80.7M
                MARK_PUSH(ctx->lastmark);
860
176M
            for (; pattern[0]; pattern += pattern[0]) {
861
141M
                if (pattern[1] == SRE_OP_LITERAL &&
862
141M
                    (ptr >= end ||
863
63.1M
                     (SRE_CODE) *ptr != pattern[2]))
864
42.6M
                    continue;
865
98.4M
                if (pattern[1] == SRE_OP_IN &&
866
98.4M
                    (ptr >= end ||
867
75.3M
                     !SRE(charset)(state, pattern + 3,
868
75.3M
                                   (SRE_CODE) *ptr)))
869
48.9M
                    continue;
870
49.5M
                state->ptr = ptr;
871
49.5M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
49.5M
                if (ret) {
873
48.4M
                    if (state->repeat)
874
46.1M
                        MARK_POP_DISCARD(ctx->lastmark);
875
48.4M
                    RETURN_ON_ERROR(ret);
876
48.4M
                    RETURN_SUCCESS;
877
48.4M
                }
878
1.08M
                if (state->repeat)
879
6.03k
                    MARK_POP_KEEP(ctx->lastmark);
880
1.08M
                LASTMARK_RESTORE();
881
1.08M
            }
882
35.3M
            if (state->repeat)
883
34.5M
                MARK_POP_DISCARD(ctx->lastmark);
884
35.3M
            RETURN_FAILURE;
885
886
197M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
197M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
197M
                   pattern[1], pattern[2]));
898
899
197M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
26.3k
                RETURN_FAILURE; /* cannot match */
901
902
197M
            state->ptr = ptr;
903
904
197M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
197M
            RETURN_ON_ERROR(ret);
906
197M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
197M
            ctx->count = ret;
908
197M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
197M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
78.8M
                RETURN_FAILURE;
917
918
118M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
118M
                ptr == state->end &&
920
118M
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.82k
            {
922
                /* tail is empty.  we're finished */
923
3.82k
                state->ptr = ptr;
924
3.82k
                RETURN_SUCCESS;
925
3.82k
            }
926
927
118M
            LASTMARK_SAVE();
928
118M
            if (state->repeat)
929
77.9M
                MARK_PUSH(ctx->lastmark);
930
931
118M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
5.10M
                ctx->u.chr = pattern[pattern[0]+1];
935
5.10M
                for (;;) {
936
8.01M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
8.01M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
2.90M
                        ptr--;
939
2.90M
                        ctx->count--;
940
2.90M
                    }
941
5.10M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
32.2k
                        break;
943
5.07M
                    state->ptr = ptr;
944
5.07M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.07M
                            pattern+pattern[0]);
946
5.07M
                    if (ret) {
947
5.07M
                        if (state->repeat)
948
5.05M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.07M
                        RETURN_ON_ERROR(ret);
950
5.07M
                        RETURN_SUCCESS;
951
5.07M
                    }
952
4.64k
                    if (state->repeat)
953
0
                        MARK_POP_KEEP(ctx->lastmark);
954
4.64k
                    LASTMARK_RESTORE();
955
956
4.64k
                    ptr--;
957
4.64k
                    ctx->count--;
958
4.64k
                }
959
32.2k
                if (state->repeat)
960
219
                    MARK_POP_DISCARD(ctx->lastmark);
961
113M
            } else {
962
                /* general case */
963
114M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
114M
                    state->ptr = ptr;
965
114M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
114M
                            pattern+pattern[0]);
967
114M
                    if (ret) {
968
113M
                        if (state->repeat)
969
72.7M
                            MARK_POP_DISCARD(ctx->lastmark);
970
113M
                        RETURN_ON_ERROR(ret);
971
113M
                        RETURN_SUCCESS;
972
113M
                    }
973
1.08M
                    if (state->repeat)
974
160k
                        MARK_POP_KEEP(ctx->lastmark);
975
1.08M
                    LASTMARK_RESTORE();
976
977
1.08M
                    ptr--;
978
1.08M
                    ctx->count--;
979
1.08M
                }
980
80.7k
                if (state->repeat)
981
80.1k
                    MARK_POP_DISCARD(ctx->lastmark);
982
80.7k
            }
983
112k
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
105M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
105M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
105M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
105M
            ctx->u.rep = repeat_pool_malloc(state);
1127
105M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
105M
            ctx->u.rep->count = -1;
1131
105M
            ctx->u.rep->pattern = pattern;
1132
105M
            ctx->u.rep->prev = state->repeat;
1133
105M
            ctx->u.rep->last_ptr = NULL;
1134
105M
            state->repeat = ctx->u.rep;
1135
1136
105M
            state->ptr = ptr;
1137
105M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
105M
            state->repeat = ctx->u.rep->prev;
1139
105M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
105M
            if (ret) {
1142
105M
                RETURN_ON_ERROR(ret);
1143
105M
                RETURN_SUCCESS;
1144
105M
            }
1145
575
            RETURN_FAILURE;
1146
1147
187M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
187M
            ctx->u.rep = state->repeat;
1155
187M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
187M
            state->ptr = ptr;
1159
1160
187M
            ctx->count = ctx->u.rep->count+1;
1161
1162
187M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
187M
                   ptr, ctx->count));
1164
1165
187M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
187M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
187M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
187M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
182M
                ctx->u.rep->count = ctx->count;
1185
182M
                LASTMARK_SAVE();
1186
182M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
182M
                LAST_PTR_PUSH();
1189
182M
                ctx->u.rep->last_ptr = state->ptr;
1190
182M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
182M
                        ctx->u.rep->pattern+3);
1192
182M
                LAST_PTR_POP();
1193
182M
                if (ret) {
1194
82.3M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
82.3M
                    RETURN_ON_ERROR(ret);
1196
82.3M
                    RETURN_SUCCESS;
1197
82.3M
                }
1198
99.8M
                MARK_POP(ctx->lastmark);
1199
99.8M
                LASTMARK_RESTORE();
1200
99.8M
                ctx->u.rep->count = ctx->count-1;
1201
99.8M
                state->ptr = ptr;
1202
99.8M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
105M
            state->repeat = ctx->u.rep->prev;
1207
105M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
105M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
105M
            RETURN_ON_SUCCESS(ret);
1211
80.7k
            state->ptr = ptr;
1212
80.7k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
71.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
71.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
71.0M
                   ptr, pattern[1]));
1565
71.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
71.0M
            state->ptr = ptr - pattern[1];
1568
71.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
71.0M
            RETURN_ON_FAILURE(ret);
1570
70.5M
            pattern += pattern[0];
1571
70.5M
            DISPATCH;
1572
1573
70.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
14.7M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
14.7M
                   ptr, pattern[1]));
1578
14.7M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
14.7M
                state->ptr = ptr - pattern[1];
1580
14.7M
                LASTMARK_SAVE();
1581
14.7M
                if (state->repeat)
1582
14.7M
                    MARK_PUSH(ctx->lastmark);
1583
1584
29.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
29.4M
                if (ret) {
1586
5.81k
                    if (state->repeat)
1587
5.81k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
5.81k
                    RETURN_ON_ERROR(ret);
1589
5.81k
                    RETURN_FAILURE;
1590
5.81k
                }
1591
14.7M
                if (state->repeat)
1592
14.7M
                    MARK_POP(ctx->lastmark);
1593
14.7M
                LASTMARK_RESTORE();
1594
14.7M
            }
1595
14.7M
            pattern += pattern[0];
1596
14.7M
            DISPATCH;
1597
1598
14.7M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
768M
exit:
1620
768M
    ctx_pos = ctx->last_ctx_pos;
1621
768M
    jump = ctx->jump;
1622
768M
    DATA_POP_DISCARD(ctx);
1623
768M
    if (ctx_pos == -1) {
1624
121M
        state->sigcount = sigcount;
1625
121M
        return ret;
1626
121M
    }
1627
647M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
647M
    switch (jump) {
1630
182M
        case JUMP_MAX_UNTIL_2:
1631
182M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
182M
            goto jump_max_until_2;
1633
105M
        case JUMP_MAX_UNTIL_3:
1634
105M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
105M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
49.5M
        case JUMP_BRANCH:
1643
49.5M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
49.5M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
105M
        case JUMP_REPEAT:
1658
105M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
105M
            goto jump_repeat;
1660
5.07M
        case JUMP_REPEAT_ONE_1:
1661
5.07M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.07M
            goto jump_repeat_one_1;
1663
114M
        case JUMP_REPEAT_ONE_2:
1664
114M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
114M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
71.0M
        case JUMP_ASSERT:
1673
71.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
71.0M
            goto jump_assert;
1675
14.7M
        case JUMP_ASSERT_NOT:
1676
14.7M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
14.7M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
647M
    }
1683
1684
0
    return ret; /* should never get here */
1685
647M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
409M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
88.9M
{
1694
88.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
88.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
88.9M
    Py_ssize_t status = 0;
1697
88.9M
    Py_ssize_t prefix_len = 0;
1698
88.9M
    Py_ssize_t prefix_skip = 0;
1699
88.9M
    SRE_CODE* prefix = NULL;
1700
88.9M
    SRE_CODE* charset = NULL;
1701
88.9M
    SRE_CODE* overlap = NULL;
1702
88.9M
    int flags = 0;
1703
88.9M
    INIT_TRACE(state);
1704
1705
88.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
88.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
88.9M
        flags = pattern[2];
1713
1714
88.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.72M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.72M
                   end - ptr, (size_t) pattern[3]));
1717
1.72M
            return 0;
1718
1.72M
        }
1719
87.2M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
7.42M
            end -= pattern[3] - 1;
1723
7.42M
            if (end <= ptr)
1724
0
                end = ptr;
1725
7.42M
        }
1726
1727
87.2M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
7.43M
            prefix_len = pattern[5];
1731
7.43M
            prefix_skip = pattern[6];
1732
7.43M
            prefix = pattern + 7;
1733
7.43M
            overlap = prefix + prefix_len - 1;
1734
79.8M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
75.7M
            charset = pattern + 5;
1738
1739
87.2M
        pattern += 1 + pattern[1];
1740
87.2M
    }
1741
1742
87.2M
    TRACE(("prefix = %p %zd %zd\n",
1743
87.2M
           prefix, prefix_len, prefix_skip));
1744
87.2M
    TRACE(("charset = %p\n", charset));
1745
1746
87.2M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.95M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.66M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.66M
#endif
1753
4.66M
        end = (SRE_CHAR *)state->end;
1754
4.66M
        state->must_advance = 0;
1755
7.75M
        while (ptr < end) {
1756
106M
            while (*ptr != c) {
1757
99.0M
                if (++ptr >= end)
1758
380k
                    return 0;
1759
99.0M
            }
1760
7.36M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
7.36M
            state->start = ptr;
1762
7.36M
            state->ptr = ptr + prefix_skip;
1763
7.36M
            if (flags & SRE_INFO_LITERAL)
1764
9.20k
                return 1; /* we got all of it */
1765
7.35M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
7.35M
            if (status != 0)
1767
6.55M
                return status;
1768
800k
            ++ptr;
1769
800k
            RESET_CAPTURE_GROUP();
1770
800k
        }
1771
6.77k
        return 0;
1772
4.66M
    }
1773
1774
80.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
480k
        Py_ssize_t i = 0;
1778
1779
480k
        end = (SRE_CHAR *)state->end;
1780
480k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.34M
        for (i = 0; i < prefix_len; i++)
1784
898k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
449k
#endif
1787
1.03M
        while (ptr < end) {
1788
1.03M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
8.46M
            while (*ptr++ != c) {
1790
7.43M
                if (ptr >= end)
1791
288
                    return 0;
1792
7.43M
            }
1793
1.03M
            if (ptr >= end)
1794
56
                return 0;
1795
1796
1.03M
            i = 1;
1797
1.03M
            state->must_advance = 0;
1798
1.04M
            do {
1799
1.04M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
899k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
899k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
899k
                    state->start = ptr - (prefix_len - 1);
1808
899k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
899k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
899k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
899k
                    if (status != 0)
1813
480k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
418k
                    if (++ptr >= end)
1816
25
                        return 0;
1817
418k
                    RESET_CAPTURE_GROUP();
1818
418k
                }
1819
562k
                i = overlap[i];
1820
562k
            } while (i != 0);
1821
1.03M
        }
1822
0
        return 0;
1823
480k
    }
1824
1825
79.8M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
75.7M
        end = (SRE_CHAR *)state->end;
1828
75.7M
        state->must_advance = 0;
1829
78.3M
        for (;;) {
1830
363M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
285M
                ptr++;
1832
78.3M
            if (ptr >= end)
1833
3.90M
                return 0;
1834
74.4M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
74.4M
            state->start = ptr;
1836
74.4M
            state->ptr = ptr;
1837
74.4M
            status = SRE(match)(state, pattern, 0);
1838
74.4M
            if (status != 0)
1839
71.8M
                break;
1840
2.56M
            ptr++;
1841
2.56M
            RESET_CAPTURE_GROUP();
1842
2.56M
        }
1843
75.7M
    } else {
1844
        /* general case */
1845
4.02M
        assert(ptr <= end);
1846
4.02M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
4.02M
        state->start = state->ptr = ptr;
1848
4.02M
        status = SRE(match)(state, pattern, 1);
1849
4.02M
        state->must_advance = 0;
1850
4.02M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.02M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
410M
        while (status == 0 && ptr < end) {
1858
406M
            ptr++;
1859
406M
            RESET_CAPTURE_GROUP();
1860
406M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
406M
            state->start = state->ptr = ptr;
1862
406M
            status = SRE(match)(state, pattern, 0);
1863
406M
        }
1864
4.02M
    }
1865
1866
75.9M
    return status;
1867
79.8M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
35.5M
{
1694
35.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
35.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
35.5M
    Py_ssize_t status = 0;
1697
35.5M
    Py_ssize_t prefix_len = 0;
1698
35.5M
    Py_ssize_t prefix_skip = 0;
1699
35.5M
    SRE_CODE* prefix = NULL;
1700
35.5M
    SRE_CODE* charset = NULL;
1701
35.5M
    SRE_CODE* overlap = NULL;
1702
35.5M
    int flags = 0;
1703
35.5M
    INIT_TRACE(state);
1704
1705
35.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
35.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
35.5M
        flags = pattern[2];
1713
1714
35.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
1.60M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
1.60M
                   end - ptr, (size_t) pattern[3]));
1717
1.60M
            return 0;
1718
1.60M
        }
1719
33.9M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.68M
            end -= pattern[3] - 1;
1723
2.68M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.68M
        }
1726
1727
33.9M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.68M
            prefix_len = pattern[5];
1731
2.68M
            prefix_skip = pattern[6];
1732
2.68M
            prefix = pattern + 7;
1733
2.68M
            overlap = prefix + prefix_len - 1;
1734
31.2M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
28.4M
            charset = pattern + 5;
1738
1739
33.9M
        pattern += 1 + pattern[1];
1740
33.9M
    }
1741
1742
33.9M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.9M
           prefix, prefix_len, prefix_skip));
1744
33.9M
    TRACE(("charset = %p\n", charset));
1745
1746
33.9M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.67M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.67M
#if SIZEOF_SRE_CHAR < 4
1750
2.67M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.67M
#endif
1753
2.67M
        end = (SRE_CHAR *)state->end;
1754
2.67M
        state->must_advance = 0;
1755
2.84M
        while (ptr < end) {
1756
27.7M
            while (*ptr != c) {
1757
25.2M
                if (++ptr >= end)
1758
302k
                    return 0;
1759
25.2M
            }
1760
2.53M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.53M
            state->start = ptr;
1762
2.53M
            state->ptr = ptr + prefix_skip;
1763
2.53M
            if (flags & SRE_INFO_LITERAL)
1764
372
                return 1; /* we got all of it */
1765
2.53M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.53M
            if (status != 0)
1767
2.36M
                return status;
1768
176k
            ++ptr;
1769
176k
            RESET_CAPTURE_GROUP();
1770
176k
        }
1771
4.15k
        return 0;
1772
2.67M
    }
1773
1774
31.2M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
11.8k
        Py_ssize_t i = 0;
1778
1779
11.8k
        end = (SRE_CHAR *)state->end;
1780
11.8k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
11.8k
#if SIZEOF_SRE_CHAR < 4
1783
35.6k
        for (i = 0; i < prefix_len; i++)
1784
23.7k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
11.8k
#endif
1787
277k
        while (ptr < end) {
1788
277k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.43M
            while (*ptr++ != c) {
1790
2.15M
                if (ptr >= end)
1791
58
                    return 0;
1792
2.15M
            }
1793
277k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
277k
            i = 1;
1797
277k
            state->must_advance = 0;
1798
277k
            do {
1799
277k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
205k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
205k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
205k
                    state->start = ptr - (prefix_len - 1);
1808
205k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
205k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
205k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
205k
                    if (status != 0)
1813
11.8k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
193k
                    if (++ptr >= end)
1816
14
                        return 0;
1817
193k
                    RESET_CAPTURE_GROUP();
1818
193k
                }
1819
266k
                i = overlap[i];
1820
266k
            } while (i != 0);
1821
277k
        }
1822
0
        return 0;
1823
11.8k
    }
1824
1825
31.2M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
28.4M
        end = (SRE_CHAR *)state->end;
1828
28.4M
        state->must_advance = 0;
1829
30.0M
        for (;;) {
1830
79.9M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
49.9M
                ptr++;
1832
30.0M
            if (ptr >= end)
1833
2.68M
                return 0;
1834
27.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
27.3M
            state->start = ptr;
1836
27.3M
            state->ptr = ptr;
1837
27.3M
            status = SRE(match)(state, pattern, 0);
1838
27.3M
            if (status != 0)
1839
25.7M
                break;
1840
1.61M
            ptr++;
1841
1.61M
            RESET_CAPTURE_GROUP();
1842
1.61M
        }
1843
28.4M
    } else {
1844
        /* general case */
1845
2.85M
        assert(ptr <= end);
1846
2.85M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.85M
        state->start = state->ptr = ptr;
1848
2.85M
        status = SRE(match)(state, pattern, 1);
1849
2.85M
        state->must_advance = 0;
1850
2.85M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.85M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
135M
        while (status == 0 && ptr < end) {
1858
132M
            ptr++;
1859
132M
            RESET_CAPTURE_GROUP();
1860
132M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
132M
            state->start = state->ptr = ptr;
1862
132M
            status = SRE(match)(state, pattern, 0);
1863
132M
        }
1864
2.85M
    }
1865
1866
28.5M
    return status;
1867
31.2M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
47.1M
{
1694
47.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
47.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
47.1M
    Py_ssize_t status = 0;
1697
47.1M
    Py_ssize_t prefix_len = 0;
1698
47.1M
    Py_ssize_t prefix_skip = 0;
1699
47.1M
    SRE_CODE* prefix = NULL;
1700
47.1M
    SRE_CODE* charset = NULL;
1701
47.1M
    SRE_CODE* overlap = NULL;
1702
47.1M
    int flags = 0;
1703
47.1M
    INIT_TRACE(state);
1704
1705
47.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
47.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
47.1M
        flags = pattern[2];
1713
1714
47.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
110k
                   end - ptr, (size_t) pattern[3]));
1717
110k
            return 0;
1718
110k
        }
1719
47.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.42M
            end -= pattern[3] - 1;
1723
2.42M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.42M
        }
1726
1727
47.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.43M
            prefix_len = pattern[5];
1731
2.43M
            prefix_skip = pattern[6];
1732
2.43M
            prefix = pattern + 7;
1733
2.43M
            overlap = prefix + prefix_len - 1;
1734
44.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
43.6M
            charset = pattern + 5;
1738
1739
47.0M
        pattern += 1 + pattern[1];
1740
47.0M
    }
1741
1742
47.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
47.0M
           prefix, prefix_len, prefix_skip));
1744
47.0M
    TRACE(("charset = %p\n", charset));
1745
1746
47.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.99M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
1.99M
#if SIZEOF_SRE_CHAR < 4
1750
1.99M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
1.99M
#endif
1753
1.99M
        end = (SRE_CHAR *)state->end;
1754
1.99M
        state->must_advance = 0;
1755
2.29M
        while (ptr < end) {
1756
53.4M
            while (*ptr != c) {
1757
51.2M
                if (++ptr >= end)
1758
72.9k
                    return 0;
1759
51.2M
            }
1760
2.21M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.21M
            state->start = ptr;
1762
2.21M
            state->ptr = ptr + prefix_skip;
1763
2.21M
            if (flags & SRE_INFO_LITERAL)
1764
6.93k
                return 1; /* we got all of it */
1765
2.21M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.21M
            if (status != 0)
1767
1.91M
                return status;
1768
297k
            ++ptr;
1769
297k
            RESET_CAPTURE_GROUP();
1770
297k
        }
1771
1.85k
        return 0;
1772
1.99M
    }
1773
1774
45.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
437k
        Py_ssize_t i = 0;
1778
1779
437k
        end = (SRE_CHAR *)state->end;
1780
437k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
437k
#if SIZEOF_SRE_CHAR < 4
1783
1.31M
        for (i = 0; i < prefix_len; i++)
1784
874k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
437k
#endif
1787
622k
        while (ptr < end) {
1788
622k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.73M
            while (*ptr++ != c) {
1790
2.11M
                if (ptr >= end)
1791
109
                    return 0;
1792
2.11M
            }
1793
622k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
622k
            i = 1;
1797
622k
            state->must_advance = 0;
1798
622k
            do {
1799
622k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
575k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
575k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
575k
                    state->start = ptr - (prefix_len - 1);
1808
575k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
575k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
575k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
575k
                    if (status != 0)
1813
437k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
138k
                    if (++ptr >= end)
1816
7
                        return 0;
1817
138k
                    RESET_CAPTURE_GROUP();
1818
138k
                }
1819
185k
                i = overlap[i];
1820
185k
            } while (i != 0);
1821
622k
        }
1822
0
        return 0;
1823
437k
    }
1824
1825
44.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
43.6M
        end = (SRE_CHAR *)state->end;
1828
43.6M
        state->must_advance = 0;
1829
44.0M
        for (;;) {
1830
209M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
165M
                ptr++;
1832
44.0M
            if (ptr >= end)
1833
1.16M
                return 0;
1834
42.9M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
42.9M
            state->start = ptr;
1836
42.9M
            state->ptr = ptr;
1837
42.9M
            status = SRE(match)(state, pattern, 0);
1838
42.9M
            if (status != 0)
1839
42.4M
                break;
1840
467k
            ptr++;
1841
467k
            RESET_CAPTURE_GROUP();
1842
467k
        }
1843
43.6M
    } else {
1844
        /* general case */
1845
949k
        assert(ptr <= end);
1846
949k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
949k
        state->start = state->ptr = ptr;
1848
949k
        status = SRE(match)(state, pattern, 1);
1849
949k
        state->must_advance = 0;
1850
949k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
949k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
196M
        while (status == 0 && ptr < end) {
1858
195M
            ptr++;
1859
195M
            RESET_CAPTURE_GROUP();
1860
195M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
195M
            state->start = state->ptr = ptr;
1862
195M
            status = SRE(match)(state, pattern, 0);
1863
195M
        }
1864
949k
    }
1865
1866
43.4M
    return status;
1867
44.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
6.27M
{
1694
6.27M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
6.27M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
6.27M
    Py_ssize_t status = 0;
1697
6.27M
    Py_ssize_t prefix_len = 0;
1698
6.27M
    Py_ssize_t prefix_skip = 0;
1699
6.27M
    SRE_CODE* prefix = NULL;
1700
6.27M
    SRE_CODE* charset = NULL;
1701
6.27M
    SRE_CODE* overlap = NULL;
1702
6.27M
    int flags = 0;
1703
6.27M
    INIT_TRACE(state);
1704
1705
6.27M
    if (ptr > end)
1706
0
        return 0;
1707
1708
6.27M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
6.27M
        flags = pattern[2];
1713
1714
6.27M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
6.96k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
6.96k
                   end - ptr, (size_t) pattern[3]));
1717
6.96k
            return 0;
1718
6.96k
        }
1719
6.26M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.31M
            end -= pattern[3] - 1;
1723
2.31M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.31M
        }
1726
1727
6.26M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.31M
            prefix_len = pattern[5];
1731
2.31M
            prefix_skip = pattern[6];
1732
2.31M
            prefix = pattern + 7;
1733
2.31M
            overlap = prefix + prefix_len - 1;
1734
3.95M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.72M
            charset = pattern + 5;
1738
1739
6.26M
        pattern += 1 + pattern[1];
1740
6.26M
    }
1741
1742
6.26M
    TRACE(("prefix = %p %zd %zd\n",
1743
6.26M
           prefix, prefix_len, prefix_skip));
1744
6.26M
    TRACE(("charset = %p\n", charset));
1745
1746
6.26M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.28M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
2.28M
        end = (SRE_CHAR *)state->end;
1754
2.28M
        state->must_advance = 0;
1755
2.61M
        while (ptr < end) {
1756
25.1M
            while (*ptr != c) {
1757
22.5M
                if (++ptr >= end)
1758
4.85k
                    return 0;
1759
22.5M
            }
1760
2.60M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.60M
            state->start = ptr;
1762
2.60M
            state->ptr = ptr + prefix_skip;
1763
2.60M
            if (flags & SRE_INFO_LITERAL)
1764
1.90k
                return 1; /* we got all of it */
1765
2.60M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.60M
            if (status != 0)
1767
2.27M
                return status;
1768
326k
            ++ptr;
1769
326k
            RESET_CAPTURE_GROUP();
1770
326k
        }
1771
769
        return 0;
1772
2.28M
    }
1773
1774
3.98M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
31.6k
        Py_ssize_t i = 0;
1778
1779
31.6k
        end = (SRE_CHAR *)state->end;
1780
31.6k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
133k
        while (ptr < end) {
1788
133k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.29M
            while (*ptr++ != c) {
1790
3.16M
                if (ptr >= end)
1791
121
                    return 0;
1792
3.16M
            }
1793
133k
            if (ptr >= end)
1794
16
                return 0;
1795
1796
133k
            i = 1;
1797
133k
            state->must_advance = 0;
1798
141k
            do {
1799
141k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
117k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
117k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
117k
                    state->start = ptr - (prefix_len - 1);
1808
117k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
117k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
117k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
117k
                    if (status != 0)
1813
31.4k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
86.3k
                    if (++ptr >= end)
1816
4
                        return 0;
1817
86.3k
                    RESET_CAPTURE_GROUP();
1818
86.3k
                }
1819
110k
                i = overlap[i];
1820
110k
            } while (i != 0);
1821
133k
        }
1822
0
        return 0;
1823
31.6k
    }
1824
1825
3.95M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.72M
        end = (SRE_CHAR *)state->end;
1828
3.72M
        state->must_advance = 0;
1829
4.21M
        for (;;) {
1830
74.0M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
69.8M
                ptr++;
1832
4.21M
            if (ptr >= end)
1833
55.7k
                return 0;
1834
4.15M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
4.15M
            state->start = ptr;
1836
4.15M
            state->ptr = ptr;
1837
4.15M
            status = SRE(match)(state, pattern, 0);
1838
4.15M
            if (status != 0)
1839
3.67M
                break;
1840
482k
            ptr++;
1841
482k
            RESET_CAPTURE_GROUP();
1842
482k
        }
1843
3.72M
    } else {
1844
        /* general case */
1845
222k
        assert(ptr <= end);
1846
222k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
222k
        state->start = state->ptr = ptr;
1848
222k
        status = SRE(match)(state, pattern, 1);
1849
222k
        state->must_advance = 0;
1850
222k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
222k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
0
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
78.5M
        while (status == 0 && ptr < end) {
1858
78.3M
            ptr++;
1859
78.3M
            RESET_CAPTURE_GROUP();
1860
78.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
78.3M
            state->start = state->ptr = ptr;
1862
78.3M
            status = SRE(match)(state, pattern, 0);
1863
78.3M
        }
1864
222k
    }
1865
1866
3.89M
    return status;
1867
3.95M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/