Coverage Report

Created: 2026-03-23 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
97.2M
{
18
    /* check if pointer is at given position */
19
20
97.2M
    Py_ssize_t thisp, thatp;
21
22
97.2M
    switch (at) {
23
24
9.97M
    case SRE_AT_BEGINNING:
25
9.97M
    case SRE_AT_BEGINNING_STRING:
26
9.97M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
82.3M
    case SRE_AT_END:
33
82.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.50M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
82.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
5.00M
    case SRE_AT_END_STRING:
42
5.00M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
97.2M
    }
87
88
0
    return 0;
89
97.2M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
29.5M
{
18
    /* check if pointer is at given position */
19
20
29.5M
    Py_ssize_t thisp, thatp;
21
22
29.5M
    switch (at) {
23
24
8.52M
    case SRE_AT_BEGINNING:
25
8.52M
    case SRE_AT_BEGINNING_STRING:
26
8.52M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
18.4M
    case SRE_AT_END:
33
18.4M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
309k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
18.4M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
2.55M
    case SRE_AT_END_STRING:
42
2.55M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
29.5M
    }
87
88
0
    return 0;
89
29.5M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
59.1M
{
18
    /* check if pointer is at given position */
19
20
59.1M
    Py_ssize_t thisp, thatp;
21
22
59.1M
    switch (at) {
23
24
1.43M
    case SRE_AT_BEGINNING:
25
1.43M
    case SRE_AT_BEGINNING_STRING:
26
1.43M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
56.3M
    case SRE_AT_END:
33
56.3M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
1.18M
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
56.3M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.37M
    case SRE_AT_END_STRING:
42
1.37M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
59.1M
    }
87
88
0
    return 0;
89
59.1M
}
sre.c:sre_ucs4_at
Line
Count
Source
17
8.55M
{
18
    /* check if pointer is at given position */
19
20
8.55M
    Py_ssize_t thisp, thatp;
21
22
8.55M
    switch (at) {
23
24
17.4k
    case SRE_AT_BEGINNING:
25
17.4k
    case SRE_AT_BEGINNING_STRING:
26
17.4k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
7.45M
    case SRE_AT_END:
33
7.45M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
6.54k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
7.45M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.07M
    case SRE_AT_END_STRING:
42
1.07M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
8.55M
    }
87
88
0
    return 0;
89
8.55M
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.57G
{
94
    /* check if character is a member of the given set */
95
96
1.57G
    int ok = 1;
97
98
3.65G
    for (;;) {
99
3.65G
        switch (*set++) {
100
101
1.08G
        case SRE_OP_FAILURE:
102
1.08G
            return !ok;
103
104
1.27G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.27G
            if (ch == set[0])
107
7.32M
                return ok;
108
1.26G
            set++;
109
1.26G
            break;
110
111
99.2M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
99.2M
            if (sre_category(set[0], (int) ch))
114
31.1M
                return ok;
115
68.0M
            set++;
116
68.0M
            break;
117
118
497M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
497M
            if (ch < 256 &&
121
476M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
181M
                return ok;
123
316M
            set += 256/SRE_CODE_BITS;
124
316M
            break;
125
126
413M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
413M
            if (set[0] <= ch && ch <= set[1])
129
270M
                return ok;
130
142M
            set += 2;
131
142M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
282M
        case SRE_OP_NEGATE:
148
282M
            ok = !ok;
149
282M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.65G
        }
175
3.65G
    }
176
1.57G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
481M
{
94
    /* check if character is a member of the given set */
95
96
481M
    int ok = 1;
97
98
994M
    for (;;) {
99
994M
        switch (*set++) {
100
101
269M
        case SRE_OP_FAILURE:
102
269M
            return !ok;
103
104
278M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
278M
            if (ch == set[0])
107
4.77M
                return ok;
108
273M
            set++;
109
273M
            break;
110
111
32.1M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
32.1M
            if (sre_category(set[0], (int) ch))
114
14.7M
                return ok;
115
17.4M
            set++;
116
17.4M
            break;
117
118
129M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
129M
            if (ch < 256 &&
121
129M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
53.6M
                return ok;
123
75.8M
            set += 256/SRE_CODE_BITS;
124
75.8M
            break;
125
126
216M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
216M
            if (set[0] <= ch && ch <= set[1])
129
138M
                return ok;
130
77.2M
            set += 2;
131
77.2M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
68.4M
        case SRE_OP_NEGATE:
148
68.4M
            ok = !ok;
149
68.4M
            break;
150
151
4
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
4
        {
154
4
            Py_ssize_t count, block;
155
4
            count = *(set++);
156
157
4
            if (ch < 0x10000u)
158
4
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
4
            set += 256/sizeof(SRE_CODE);
162
4
            if (block >=0 &&
163
4
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
4
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
4
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
4
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
994M
        }
175
994M
    }
176
481M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
758M
{
94
    /* check if character is a member of the given set */
95
96
758M
    int ok = 1;
97
98
1.86G
    for (;;) {
99
1.86G
        switch (*set++) {
100
101
579M
        case SRE_OP_FAILURE:
102
579M
            return !ok;
103
104
768M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
768M
            if (ch == set[0])
107
1.43M
                return ok;
108
767M
            set++;
109
767M
            break;
110
111
58.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
58.9M
            if (sre_category(set[0], (int) ch))
114
13.6M
                return ok;
115
45.3M
            set++;
116
45.3M
            break;
117
118
173M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
173M
            if (ch < 256 &&
121
162M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
51.3M
                return ok;
123
122M
            set += 256/SRE_CODE_BITS;
124
122M
            break;
125
126
169M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
169M
            if (set[0] <= ch && ch <= set[1])
129
113M
                return ok;
130
56.3M
            set += 2;
131
56.3M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
111M
        case SRE_OP_NEGATE:
148
111M
            ok = !ok;
149
111M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.86G
        }
175
1.86G
    }
176
758M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
338M
{
94
    /* check if character is a member of the given set */
95
96
338M
    int ok = 1;
97
98
795M
    for (;;) {
99
795M
        switch (*set++) {
100
101
239M
        case SRE_OP_FAILURE:
102
239M
            return !ok;
103
104
224M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
224M
            if (ch == set[0])
107
1.11M
                return ok;
108
223M
            set++;
109
223M
            break;
110
111
8.10M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
8.10M
            if (sre_category(set[0], (int) ch))
114
2.73M
                return ok;
115
5.37M
            set++;
116
5.37M
            break;
117
118
194M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
194M
            if (ch < 256 &&
121
184M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
76.3M
                return ok;
123
118M
            set += 256/SRE_CODE_BITS;
124
118M
            break;
125
126
27.0M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
27.0M
            if (set[0] <= ch && ch <= set[1])
129
18.1M
                return ok;
130
8.90M
            set += 2;
131
8.90M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
101M
        case SRE_OP_NEGATE:
148
101M
            ok = !ok;
149
101M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
795M
        }
175
795M
    }
176
338M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
590M
{
195
590M
    SRE_CODE chr;
196
590M
    SRE_CHAR c;
197
590M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
590M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
590M
    Py_ssize_t i;
200
590M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
590M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
106M
        end = ptr + maxcount;
205
206
590M
    switch (pattern[0]) {
207
208
479M
    case SRE_OP_IN:
209
        /* repeated set */
210
479M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
862M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
383M
            ptr++;
213
479M
        break;
214
215
62.4M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
62.4M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
158M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
95.8M
            ptr++;
220
62.4M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
47.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
47.6M
        chr = pattern[1];
232
47.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
47.6M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
37.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
37.0M
        else
238
37.0M
#endif
239
49.8M
        while (ptr < end && *ptr == c)
240
2.17M
            ptr++;
241
47.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
1.32M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
1.32M
        chr = pattern[1];
270
1.32M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
1.32M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
523k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
523k
        else
276
523k
#endif
277
47.8M
        while (ptr < end && *ptr != c)
278
46.5M
            ptr++;
279
1.32M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
590M
    }
319
320
590M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
590M
           ptr - (SRE_CHAR*) state->ptr));
322
590M
    return ptr - (SRE_CHAR*) state->ptr;
323
590M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
168M
{
195
168M
    SRE_CODE chr;
196
168M
    SRE_CHAR c;
197
168M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
168M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
168M
    Py_ssize_t i;
200
168M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
168M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
31.8M
        end = ptr + maxcount;
205
206
168M
    switch (pattern[0]) {
207
208
131M
    case SRE_OP_IN:
209
        /* repeated set */
210
131M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
276M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
145M
            ptr++;
213
131M
        break;
214
215
12.5M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
12.5M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
32.1M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.6M
            ptr++;
220
12.5M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
24.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
24.6M
        chr = pattern[1];
232
24.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
24.6M
        c = (SRE_CHAR) chr;
234
24.6M
#if SIZEOF_SRE_CHAR < 4
235
24.6M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
24.6M
        else
238
24.6M
#endif
239
24.9M
        while (ptr < end && *ptr == c)
240
247k
            ptr++;
241
24.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
285k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
285k
        chr = pattern[1];
270
285k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
285k
        c = (SRE_CHAR) chr;
272
285k
#if SIZEOF_SRE_CHAR < 4
273
285k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
285k
        else
276
285k
#endif
277
8.14M
        while (ptr < end && *ptr != c)
278
7.86M
            ptr++;
279
285k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
168M
    }
319
320
168M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
168M
           ptr - (SRE_CHAR*) state->ptr));
322
168M
    return ptr - (SRE_CHAR*) state->ptr;
323
168M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
319M
{
195
319M
    SRE_CODE chr;
196
319M
    SRE_CHAR c;
197
319M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
319M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
319M
    Py_ssize_t i;
200
319M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
319M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
56.5M
        end = ptr + maxcount;
205
206
319M
    switch (pattern[0]) {
207
208
261M
    case SRE_OP_IN:
209
        /* repeated set */
210
261M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
394M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
133M
            ptr++;
213
261M
        break;
214
215
44.9M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
44.9M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
101M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
56.6M
            ptr++;
220
44.9M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
12.3M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
12.3M
        chr = pattern[1];
232
12.3M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
12.3M
        c = (SRE_CHAR) chr;
234
12.3M
#if SIZEOF_SRE_CHAR < 4
235
12.3M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
12.3M
        else
238
12.3M
#endif
239
13.8M
        while (ptr < end && *ptr == c)
240
1.52M
            ptr++;
241
12.3M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
238k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
238k
        chr = pattern[1];
270
238k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
238k
        c = (SRE_CHAR) chr;
272
238k
#if SIZEOF_SRE_CHAR < 4
273
238k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
238k
        else
276
238k
#endif
277
14.8M
        while (ptr < end && *ptr != c)
278
14.6M
            ptr++;
279
238k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
319M
    }
319
320
319M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
319M
           ptr - (SRE_CHAR*) state->ptr));
322
319M
    return ptr - (SRE_CHAR*) state->ptr;
323
319M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
102M
{
195
102M
    SRE_CODE chr;
196
102M
    SRE_CHAR c;
197
102M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
102M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
102M
    Py_ssize_t i;
200
102M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
102M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
17.6M
        end = ptr + maxcount;
205
206
102M
    switch (pattern[0]) {
207
208
86.2M
    case SRE_OP_IN:
209
        /* repeated set */
210
86.2M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
191M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
104M
            ptr++;
213
86.2M
        break;
214
215
5.08M
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
5.08M
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
24.5M
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
19.5M
            ptr++;
220
5.08M
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
10.6M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
10.6M
        chr = pattern[1];
232
10.6M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
10.6M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
11.0M
        while (ptr < end && *ptr == c)
240
401k
            ptr++;
241
10.6M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
801k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
801k
        chr = pattern[1];
270
801k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
801k
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.8M
        while (ptr < end && *ptr != c)
278
24.0M
            ptr++;
279
801k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
102M
    }
319
320
102M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
102M
           ptr - (SRE_CHAR*) state->ptr));
322
102M
    return ptr - (SRE_CHAR*) state->ptr;
323
102M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
531M
    do { \
355
531M
        ctx->lastmark = state->lastmark; \
356
531M
        ctx->lastindex = state->lastindex; \
357
531M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
312M
    do { \
360
312M
        state->lastmark = ctx->lastmark; \
361
312M
        state->lastindex = ctx->lastindex; \
362
312M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
197M
    do { \
366
197M
        TRACE(("push last_ptr: %zd", \
367
197M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
197M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
197M
    } while (0)
370
#define LAST_PTR_POP()  \
371
197M
    do { \
372
197M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
197M
        TRACE(("pop last_ptr: %zd", \
374
197M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
197M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
809M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
550M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
1.08G
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
113M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
23.2M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.35G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.35G
do { \
390
1.35G
    alloc_pos = state->data_stack_base; \
391
1.35G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.35G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.35G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
161M
        int j = data_stack_grow(state, sizeof(type)); \
395
161M
        if (j < 0) return j; \
396
161M
        if (ctx_pos != -1) \
397
161M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
161M
    } \
399
1.35G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.35G
    state->data_stack_base += sizeof(type); \
401
1.35G
} while (0)
402
403
1.46G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.46G
do { \
405
1.46G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.46G
    ptr = (type*)(state->data_stack+pos); \
407
1.46G
} while (0)
408
409
483M
#define DATA_STACK_PUSH(state, data, size) \
410
483M
do { \
411
483M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
483M
           data, state->data_stack_base, size)); \
413
483M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
77.3k
        int j = data_stack_grow(state, size); \
415
77.3k
        if (j < 0) return j; \
416
77.3k
        if (ctx_pos != -1) \
417
77.3k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
77.3k
    } \
419
483M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
483M
    state->data_stack_base += size; \
421
483M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
307M
#define DATA_STACK_POP(state, data, size, discard) \
427
307M
do { \
428
307M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
307M
           data, state->data_stack_base-size, size)); \
430
307M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
307M
    if (discard) \
432
307M
        state->data_stack_base -= size; \
433
307M
} while (0)
434
435
1.53G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.53G
do { \
437
1.53G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.53G
           state->data_stack_base-size, size)); \
439
1.53G
    state->data_stack_base -= size; \
440
1.53G
} while(0)
441
442
#define DATA_PUSH(x) \
443
197M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
197M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.35G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.35G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.46G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
390M
    do if (lastmark >= 0) { \
473
285M
        MARK_TRACE("push", (lastmark)); \
474
285M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
285M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
390M
    } while (0)
477
#define MARK_POP(lastmark) \
478
123M
    do if (lastmark >= 0) { \
479
108M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
108M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
108M
        MARK_TRACE("pop", (lastmark)); \
482
123M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.67M
    do if (lastmark >= 0) { \
485
1.05M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.05M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.05M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.67M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
266M
    do if (lastmark >= 0) { \
491
177M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
177M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
177M
        MARK_TRACE("pop discard", (lastmark)); \
494
266M
    } while (0)
495
496
481M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
197M
#define JUMP_MAX_UNTIL_2     2
499
113M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
113M
#define JUMP_REPEAT          7
504
13.2M
#define JUMP_REPEAT_ONE_1    8
505
225M
#define JUMP_REPEAT_ONE_2    9
506
63.5M
#define JUMP_MIN_REPEAT_ONE  10
507
102M
#define JUMP_BRANCH          11
508
23.2M
#define JUMP_ASSERT          12
509
25.5M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
878M
    ctx->pattern = pattern; \
516
878M
    ctx->ptr = ptr; \
517
878M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
878M
    nextctx->pattern = nextpattern; \
519
878M
    nextctx->toplevel = toplevel_; \
520
878M
    nextctx->jump = jumpvalue; \
521
878M
    nextctx->last_ctx_pos = ctx_pos; \
522
878M
    pattern = nextpattern; \
523
878M
    ctx_pos = alloc_pos; \
524
878M
    ctx = nextctx; \
525
878M
    goto entrance; \
526
878M
    jumplabel: \
527
878M
    pattern = ctx->pattern; \
528
878M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
829M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
48.8M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
2.34G
    do {                                                           \
553
2.34G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
2.34G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
2.34G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.42G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
2.34G
        do {                               \
588
2.34G
            MAYBE_CHECK_SIGNALS;           \
589
2.34G
            goto *sre_targets[*pattern++]; \
590
2.34G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
481M
{
601
481M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
481M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
481M
    Py_ssize_t ret = 0;
604
481M
    int jump;
605
481M
    unsigned int sigcount = state->sigcount;
606
607
481M
    SRE(match_context)* ctx;
608
481M
    SRE(match_context)* nextctx;
609
481M
    INIT_TRACE(state);
610
611
481M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
481M
    DATA_ALLOC(SRE(match_context), ctx);
614
481M
    ctx->last_ctx_pos = -1;
615
481M
    ctx->jump = JUMP_NONE;
616
481M
    ctx->toplevel = toplevel;
617
481M
    ctx_pos = alloc_pos;
618
619
481M
#if USE_COMPUTED_GOTOS
620
481M
#include "sre_targets.h"
621
481M
#endif
622
623
1.35G
entrance:
624
625
1.35G
    ;  // Fashion statement.
626
1.35G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.35G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
54.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.54M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.54M
                   end - ptr, (size_t) pattern[3]));
634
3.54M
            RETURN_FAILURE;
635
3.54M
        }
636
50.8M
        pattern += pattern[1] + 1;
637
50.8M
    }
638
639
1.35G
#if USE_COMPUTED_GOTOS
640
1.35G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.35G
    {
647
648
1.35G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
579M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
579M
                   ptr, pattern[0]));
653
579M
            {
654
579M
                int i = pattern[0];
655
579M
                if (i & 1)
656
126M
                    state->lastindex = i/2 + 1;
657
579M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
573M
                    int j = state->lastmark + 1;
663
588M
                    while (j < i)
664
14.3M
                        state->mark[j++] = NULL;
665
573M
                    state->lastmark = i;
666
573M
                }
667
579M
                state->mark[i] = ptr;
668
579M
            }
669
579M
            pattern++;
670
579M
            DISPATCH;
671
672
579M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
138M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
138M
                   ptr, *pattern));
677
138M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
63.1M
                RETURN_FAILURE;
679
74.8M
            pattern++;
680
74.8M
            ptr++;
681
74.8M
            DISPATCH;
682
683
74.8M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
158M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
158M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
158M
            if (ctx->toplevel &&
698
38.0M
                ((state->match_all && ptr != state->end) ||
699
38.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
158M
            state->ptr = ptr;
704
158M
            RETURN_SUCCESS;
705
706
97.2M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
97.2M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
97.2M
            if (!SRE(at)(state, ptr, *pattern))
711
80.7M
                RETURN_FAILURE;
712
16.5M
            pattern++;
713
16.5M
            DISPATCH;
714
715
16.5M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
290M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
290M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
290M
            if (ptr >= end ||
749
288M
                !SRE(charset)(state, pattern + 1, *ptr))
750
94.7M
                RETURN_FAILURE;
751
195M
            pattern += pattern[0];
752
195M
            ptr++;
753
195M
            DISPATCH;
754
755
195M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
5.52M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
5.52M
                   pattern, ptr, pattern[0]));
758
5.52M
            if (ptr >= end ||
759
5.52M
                sre_lower_ascii(*ptr) != *pattern)
760
41.1k
                RETURN_FAILURE;
761
5.48M
            pattern++;
762
5.48M
            ptr++;
763
5.48M
            DISPATCH;
764
765
5.48M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
73.1M
        TARGET(SRE_OP_JUMP):
845
73.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
73.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
73.1M
                   ptr, pattern[0]));
850
73.1M
            pattern += pattern[0];
851
73.1M
            DISPATCH;
852
853
92.7M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
92.7M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
92.7M
            LASTMARK_SAVE();
858
92.7M
            if (state->repeat)
859
56.7M
                MARK_PUSH(ctx->lastmark);
860
206M
            for (; pattern[0]; pattern += pattern[0]) {
861
184M
                if (pattern[1] == SRE_OP_LITERAL &&
862
108M
                    (ptr >= end ||
863
107M
                     (SRE_CODE) *ptr != pattern[2]))
864
56.4M
                    continue;
865
128M
                if (pattern[1] == SRE_OP_IN &&
866
48.8M
                    (ptr >= end ||
867
48.7M
                     !SRE(charset)(state, pattern + 3,
868
48.7M
                                   (SRE_CODE) *ptr)))
869
26.0M
                    continue;
870
102M
                state->ptr = ptr;
871
102M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
102M
                if (ret) {
873
70.7M
                    if (state->repeat)
874
48.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
70.7M
                    RETURN_ON_ERROR(ret);
876
70.7M
                    RETURN_SUCCESS;
877
70.7M
                }
878
31.5M
                if (state->repeat)
879
12.0k
                    MARK_POP_KEEP(ctx->lastmark);
880
31.5M
                LASTMARK_RESTORE();
881
31.5M
            }
882
21.9M
            if (state->repeat)
883
8.12M
                MARK_POP_DISCARD(ctx->lastmark);
884
21.9M
            RETURN_FAILURE;
885
886
531M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
531M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
531M
                   pattern[1], pattern[2]));
898
899
531M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
886k
                RETURN_FAILURE; /* cannot match */
901
902
530M
            state->ptr = ptr;
903
904
530M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
530M
            RETURN_ON_ERROR(ret);
906
530M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
530M
            ctx->count = ret;
908
530M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
530M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
318M
                RETURN_FAILURE;
917
918
212M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.04M
                ptr == state->end &&
920
87.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
87.5k
            {
922
                /* tail is empty.  we're finished */
923
87.5k
                state->ptr = ptr;
924
87.5k
                RETURN_SUCCESS;
925
87.5k
            }
926
927
212M
            LASTMARK_SAVE();
928
212M
            if (state->repeat)
929
110M
                MARK_PUSH(ctx->lastmark);
930
931
212M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
22.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
22.7M
                for (;;) {
936
54.6M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
45.1M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
31.9M
                        ptr--;
939
31.9M
                        ctx->count--;
940
31.9M
                    }
941
22.7M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
9.44M
                        break;
943
13.2M
                    state->ptr = ptr;
944
13.2M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
13.2M
                            pattern+pattern[0]);
946
13.2M
                    if (ret) {
947
13.2M
                        if (state->repeat)
948
11.9M
                            MARK_POP_DISCARD(ctx->lastmark);
949
13.2M
                        RETURN_ON_ERROR(ret);
950
13.2M
                        RETURN_SUCCESS;
951
13.2M
                    }
952
779
                    if (state->repeat)
953
763
                        MARK_POP_KEEP(ctx->lastmark);
954
779
                    LASTMARK_RESTORE();
955
956
779
                    ptr--;
957
779
                    ctx->count--;
958
779
                }
959
9.44M
                if (state->repeat)
960
8.35M
                    MARK_POP_DISCARD(ctx->lastmark);
961
189M
            } else {
962
                /* general case */
963
287M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
225M
                    state->ptr = ptr;
965
225M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
225M
                            pattern+pattern[0]);
967
225M
                    if (ret) {
968
127M
                        if (state->repeat)
969
88.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
127M
                        RETURN_ON_ERROR(ret);
971
127M
                        RETURN_SUCCESS;
972
127M
                    }
973
97.8M
                    if (state->repeat)
974
1.65M
                        MARK_POP_KEEP(ctx->lastmark);
975
97.8M
                    LASTMARK_RESTORE();
976
977
97.8M
                    ptr--;
978
97.8M
                    ctx->count--;
979
97.8M
                }
980
61.8M
                if (state->repeat)
981
1.40M
                    MARK_POP_DISCARD(ctx->lastmark);
982
61.8M
            }
983
71.2M
            RETURN_FAILURE;
984
985
3.72M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
3.72M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
3.72M
                   pattern[1], pattern[2]));
997
998
3.72M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
3.72M
            state->ptr = ptr;
1002
1003
3.72M
            if (pattern[1] == 0)
1004
3.72M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
3.72M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
3.72M
            } else {
1028
                /* general case */
1029
3.72M
                LASTMARK_SAVE();
1030
3.72M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
63.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
63.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
63.5M
                    state->ptr = ptr;
1036
63.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
63.5M
                            pattern+pattern[0]);
1038
63.5M
                    if (ret) {
1039
3.72M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
3.72M
                        RETURN_ON_ERROR(ret);
1042
3.72M
                        RETURN_SUCCESS;
1043
3.72M
                    }
1044
59.8M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
59.8M
                    LASTMARK_RESTORE();
1047
1048
59.8M
                    state->ptr = ptr;
1049
59.8M
                    ret = SRE(count)(state, pattern+3, 1);
1050
59.8M
                    RETURN_ON_ERROR(ret);
1051
59.8M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
59.8M
                    if (ret == 0)
1053
16
                        break;
1054
59.8M
                    assert(ret == 1);
1055
59.8M
                    ptr++;
1056
59.8M
                    ctx->count++;
1057
59.8M
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
113M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
113M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
113M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
113M
            ctx->u.rep = repeat_pool_malloc(state);
1127
113M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
113M
            ctx->u.rep->count = -1;
1131
113M
            ctx->u.rep->pattern = pattern;
1132
113M
            ctx->u.rep->prev = state->repeat;
1133
113M
            ctx->u.rep->last_ptr = NULL;
1134
113M
            state->repeat = ctx->u.rep;
1135
1136
113M
            state->ptr = ptr;
1137
113M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
113M
            state->repeat = ctx->u.rep->prev;
1139
113M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
113M
            if (ret) {
1142
37.9M
                RETURN_ON_ERROR(ret);
1143
37.9M
                RETURN_SUCCESS;
1144
37.9M
            }
1145
75.4M
            RETURN_FAILURE;
1146
1147
213M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
213M
            ctx->u.rep = state->repeat;
1155
213M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
213M
            state->ptr = ptr;
1159
1160
213M
            ctx->count = ctx->u.rep->count+1;
1161
1162
213M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
213M
                   ptr, ctx->count));
1164
1165
213M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
213M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
15.9M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
197M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
197M
                ctx->u.rep->count = ctx->count;
1185
197M
                LASTMARK_SAVE();
1186
197M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
197M
                LAST_PTR_PUSH();
1189
197M
                ctx->u.rep->last_ptr = state->ptr;
1190
197M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
197M
                        ctx->u.rep->pattern+3);
1192
197M
                LAST_PTR_POP();
1193
197M
                if (ret) {
1194
99.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
99.9M
                    RETURN_ON_ERROR(ret);
1196
99.9M
                    RETURN_SUCCESS;
1197
99.9M
                }
1198
97.6M
                MARK_POP(ctx->lastmark);
1199
97.6M
                LASTMARK_RESTORE();
1200
97.6M
                ctx->u.rep->count = ctx->count-1;
1201
97.6M
                state->ptr = ptr;
1202
97.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
113M
            state->repeat = ctx->u.rep->prev;
1207
113M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
113M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
113M
            RETURN_ON_SUCCESS(ret);
1211
75.7M
            state->ptr = ptr;
1212
75.7M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
23.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
23.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
23.2M
                   ptr, pattern[1]));
1565
23.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
23.2M
            state->ptr = ptr - pattern[1];
1568
23.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
23.2M
            RETURN_ON_FAILURE(ret);
1570
19.5M
            pattern += pattern[0];
1571
19.5M
            DISPATCH;
1572
1573
25.5M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.5M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.5M
                   ptr, pattern[1]));
1578
25.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.5M
                state->ptr = ptr - pattern[1];
1580
25.5M
                LASTMARK_SAVE();
1581
25.5M
                if (state->repeat)
1582
25.5M
                    MARK_PUSH(ctx->lastmark);
1583
1584
51.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
51.0M
                if (ret) {
1586
6.67k
                    if (state->repeat)
1587
6.67k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
6.67k
                    RETURN_ON_ERROR(ret);
1589
6.67k
                    RETURN_FAILURE;
1590
6.67k
                }
1591
25.5M
                if (state->repeat)
1592
25.5M
                    MARK_POP(ctx->lastmark);
1593
25.5M
                LASTMARK_RESTORE();
1594
25.5M
            }
1595
25.5M
            pattern += pattern[0];
1596
25.5M
            DISPATCH;
1597
1598
25.5M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.35G
exit:
1620
1.35G
    ctx_pos = ctx->last_ctx_pos;
1621
1.35G
    jump = ctx->jump;
1622
1.35G
    DATA_POP_DISCARD(ctx);
1623
1.35G
    if (ctx_pos == -1) {
1624
481M
        state->sigcount = sigcount;
1625
481M
        return ret;
1626
481M
    }
1627
878M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
878M
    switch (jump) {
1630
197M
        case JUMP_MAX_UNTIL_2:
1631
197M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
197M
            goto jump_max_until_2;
1633
113M
        case JUMP_MAX_UNTIL_3:
1634
113M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
113M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
102M
        case JUMP_BRANCH:
1643
102M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
102M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
113M
        case JUMP_REPEAT:
1658
113M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
113M
            goto jump_repeat;
1660
13.2M
        case JUMP_REPEAT_ONE_1:
1661
13.2M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
13.2M
            goto jump_repeat_one_1;
1663
225M
        case JUMP_REPEAT_ONE_2:
1664
225M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
225M
            goto jump_repeat_one_2;
1666
63.5M
        case JUMP_MIN_REPEAT_ONE:
1667
63.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
63.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
23.2M
        case JUMP_ASSERT:
1673
23.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
23.2M
            goto jump_assert;
1675
25.5M
        case JUMP_ASSERT_NOT:
1676
25.5M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.5M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
878M
    }
1683
1684
0
    return ret; /* should never get here */
1685
878M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
171M
{
601
171M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
171M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
171M
    Py_ssize_t ret = 0;
604
171M
    int jump;
605
171M
    unsigned int sigcount = state->sigcount;
606
607
171M
    SRE(match_context)* ctx;
608
171M
    SRE(match_context)* nextctx;
609
171M
    INIT_TRACE(state);
610
611
171M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
171M
    DATA_ALLOC(SRE(match_context), ctx);
614
171M
    ctx->last_ctx_pos = -1;
615
171M
    ctx->jump = JUMP_NONE;
616
171M
    ctx->toplevel = toplevel;
617
171M
    ctx_pos = alloc_pos;
618
619
171M
#if USE_COMPUTED_GOTOS
620
171M
#include "sre_targets.h"
621
171M
#endif
622
623
432M
entrance:
624
625
432M
    ;  // Fashion statement.
626
432M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
432M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
34.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.43M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.43M
                   end - ptr, (size_t) pattern[3]));
634
3.43M
            RETURN_FAILURE;
635
3.43M
        }
636
30.6M
        pattern += pattern[1] + 1;
637
30.6M
    }
638
639
428M
#if USE_COMPUTED_GOTOS
640
428M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
428M
    {
647
648
428M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
177M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
177M
                   ptr, pattern[0]));
653
177M
            {
654
177M
                int i = pattern[0];
655
177M
                if (i & 1)
656
42.0M
                    state->lastindex = i/2 + 1;
657
177M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
174M
                    int j = state->lastmark + 1;
663
182M
                    while (j < i)
664
8.88M
                        state->mark[j++] = NULL;
665
174M
                    state->lastmark = i;
666
174M
                }
667
177M
                state->mark[i] = ptr;
668
177M
            }
669
177M
            pattern++;
670
177M
            DISPATCH;
671
672
177M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
78.4M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
78.4M
                   ptr, *pattern));
677
78.4M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
33.3M
                RETURN_FAILURE;
679
45.0M
            pattern++;
680
45.0M
            ptr++;
681
45.0M
            DISPATCH;
682
683
45.0M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
69.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
69.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
69.9M
            if (ctx->toplevel &&
698
23.3M
                ((state->match_all && ptr != state->end) ||
699
23.3M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
69.9M
            state->ptr = ptr;
704
69.9M
            RETURN_SUCCESS;
705
706
29.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
29.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
29.5M
            if (!SRE(at)(state, ptr, *pattern))
711
15.7M
                RETURN_FAILURE;
712
13.8M
            pattern++;
713
13.8M
            DISPATCH;
714
715
13.8M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
90.5M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
90.5M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
90.5M
            if (ptr >= end ||
749
90.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
17.4M
                RETURN_FAILURE;
751
73.1M
            pattern += pattern[0];
752
73.1M
            ptr++;
753
73.1M
            DISPATCH;
754
755
73.1M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
401k
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
401k
                   pattern, ptr, pattern[0]));
758
401k
            if (ptr >= end ||
759
401k
                sre_lower_ascii(*ptr) != *pattern)
760
4.71k
                RETURN_FAILURE;
761
397k
            pattern++;
762
397k
            ptr++;
763
397k
            DISPATCH;
764
765
397k
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
28
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
28
                   pattern, ptr, pattern[0]));
768
28
            if (ptr >= end ||
769
28
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
28
            pattern++;
772
28
            ptr++;
773
28
            DISPATCH;
774
775
28
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
28
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
28
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
28
            if (ptr >= end
828
20
                || !SRE(charset)(state, pattern+1,
829
20
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
16
                RETURN_FAILURE;
831
12
            pattern += pattern[0];
832
12
            ptr++;
833
12
            DISPATCH;
834
835
12
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
33.0M
        TARGET(SRE_OP_JUMP):
845
33.0M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
33.0M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
33.0M
                   ptr, pattern[0]));
850
33.0M
            pattern += pattern[0];
851
33.0M
            DISPATCH;
852
853
43.3M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
43.3M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
43.3M
            LASTMARK_SAVE();
858
43.3M
            if (state->repeat)
859
14.3M
                MARK_PUSH(ctx->lastmark);
860
103M
            for (; pattern[0]; pattern += pattern[0]) {
861
91.5M
                if (pattern[1] == SRE_OP_LITERAL &&
862
63.0M
                    (ptr >= end ||
863
62.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
26.5M
                    continue;
865
65.0M
                if (pattern[1] == SRE_OP_IN &&
866
14.1M
                    (ptr >= end ||
867
14.0M
                     !SRE(charset)(state, pattern + 3,
868
14.0M
                                   (SRE_CODE) *ptr)))
869
7.17M
                    continue;
870
57.8M
                state->ptr = ptr;
871
57.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
57.8M
                if (ret) {
873
31.5M
                    if (state->repeat)
874
13.5M
                        MARK_POP_DISCARD(ctx->lastmark);
875
31.5M
                    RETURN_ON_ERROR(ret);
876
31.5M
                    RETURN_SUCCESS;
877
31.5M
                }
878
26.3M
                if (state->repeat)
879
5.61k
                    MARK_POP_KEEP(ctx->lastmark);
880
26.3M
                LASTMARK_RESTORE();
881
26.3M
            }
882
11.7M
            if (state->repeat)
883
778k
                MARK_POP_DISCARD(ctx->lastmark);
884
11.7M
            RETURN_FAILURE;
885
886
159M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
159M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
159M
                   pattern[1], pattern[2]));
898
899
159M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
719k
                RETURN_FAILURE; /* cannot match */
901
902
158M
            state->ptr = ptr;
903
904
158M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
158M
            RETURN_ON_ERROR(ret);
906
158M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
158M
            ctx->count = ret;
908
158M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
158M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
90.4M
                RETURN_FAILURE;
917
918
67.9M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
547k
                ptr == state->end &&
920
61.2k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
61.2k
            {
922
                /* tail is empty.  we're finished */
923
61.2k
                state->ptr = ptr;
924
61.2k
                RETURN_SUCCESS;
925
61.2k
            }
926
927
67.8M
            LASTMARK_SAVE();
928
67.8M
            if (state->repeat)
929
41.3M
                MARK_PUSH(ctx->lastmark);
930
931
67.8M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.00M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.00M
                for (;;) {
936
15.9M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
9.92M
                        ptr--;
939
9.92M
                        ctx->count--;
940
9.92M
                    }
941
6.00M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.84M
                        break;
943
4.16M
                    state->ptr = ptr;
944
4.16M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
4.16M
                            pattern+pattern[0]);
946
4.16M
                    if (ret) {
947
4.15M
                        if (state->repeat)
948
2.89M
                            MARK_POP_DISCARD(ctx->lastmark);
949
4.15M
                        RETURN_ON_ERROR(ret);
950
4.15M
                        RETURN_SUCCESS;
951
4.15M
                    }
952
216
                    if (state->repeat)
953
200
                        MARK_POP_KEEP(ctx->lastmark);
954
216
                    LASTMARK_RESTORE();
955
956
216
                    ptr--;
957
216
                    ctx->count--;
958
216
                }
959
1.84M
                if (state->repeat)
960
764k
                    MARK_POP_DISCARD(ctx->lastmark);
961
61.8M
            } else {
962
                /* general case */
963
82.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
71.1M
                    state->ptr = ptr;
965
71.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
71.1M
                            pattern+pattern[0]);
967
71.1M
                    if (ret) {
968
50.0M
                        if (state->repeat)
969
36.5M
                            MARK_POP_DISCARD(ctx->lastmark);
970
50.0M
                        RETURN_ON_ERROR(ret);
971
50.0M
                        RETURN_SUCCESS;
972
50.0M
                    }
973
21.0M
                    if (state->repeat)
974
1.32M
                        MARK_POP_KEEP(ctx->lastmark);
975
21.0M
                    LASTMARK_RESTORE();
976
977
21.0M
                    ptr--;
978
21.0M
                    ctx->count--;
979
21.0M
                }
980
11.8M
                if (state->repeat)
981
1.18M
                    MARK_POP_DISCARD(ctx->lastmark);
982
11.8M
            }
983
13.6M
            RETURN_FAILURE;
984
985
2.44M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
2.44M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
2.44M
                   pattern[1], pattern[2]));
997
998
2.44M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
2.44M
            state->ptr = ptr;
1002
1003
2.44M
            if (pattern[1] == 0)
1004
2.44M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
2.44M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
2.44M
            } else {
1028
                /* general case */
1029
2.44M
                LASTMARK_SAVE();
1030
2.44M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
12.5M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
12.5M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
12.5M
                    state->ptr = ptr;
1036
12.5M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
12.5M
                            pattern+pattern[0]);
1038
12.5M
                    if (ret) {
1039
2.44M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
2.44M
                        RETURN_ON_ERROR(ret);
1042
2.44M
                        RETURN_SUCCESS;
1043
2.44M
                    }
1044
10.1M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
10.1M
                    LASTMARK_RESTORE();
1047
1048
10.1M
                    state->ptr = ptr;
1049
10.1M
                    ret = SRE(count)(state, pattern+3, 1);
1050
10.1M
                    RETURN_ON_ERROR(ret);
1051
10.1M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
10.1M
                    if (ret == 0)
1053
16
                        break;
1054
10.1M
                    assert(ret == 1);
1055
10.1M
                    ptr++;
1056
10.1M
                    ctx->count++;
1057
10.1M
                }
1058
16
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
16
            }
1061
16
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
24.5M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
24.5M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
24.5M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
24.5M
            ctx->u.rep = repeat_pool_malloc(state);
1127
24.5M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
24.5M
            ctx->u.rep->count = -1;
1131
24.5M
            ctx->u.rep->pattern = pattern;
1132
24.5M
            ctx->u.rep->prev = state->repeat;
1133
24.5M
            ctx->u.rep->last_ptr = NULL;
1134
24.5M
            state->repeat = ctx->u.rep;
1135
1136
24.5M
            state->ptr = ptr;
1137
24.5M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
24.5M
            state->repeat = ctx->u.rep->prev;
1139
24.5M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
24.5M
            if (ret) {
1142
11.5M
                RETURN_ON_ERROR(ret);
1143
11.5M
                RETURN_SUCCESS;
1144
11.5M
            }
1145
12.9M
            RETURN_FAILURE;
1146
1147
62.7M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
62.7M
            ctx->u.rep = state->repeat;
1155
62.7M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
62.7M
            state->ptr = ptr;
1159
1160
62.7M
            ctx->count = ctx->u.rep->count+1;
1161
1162
62.7M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
62.7M
                   ptr, ctx->count));
1164
1165
62.7M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
62.7M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
7.73M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
55.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
55.0M
                ctx->u.rep->count = ctx->count;
1185
55.0M
                LASTMARK_SAVE();
1186
55.0M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
55.0M
                LAST_PTR_PUSH();
1189
55.0M
                ctx->u.rep->last_ptr = state->ptr;
1190
55.0M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
55.0M
                        ctx->u.rep->pattern+3);
1192
55.0M
                LAST_PTR_POP();
1193
55.0M
                if (ret) {
1194
38.0M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
38.0M
                    RETURN_ON_ERROR(ret);
1196
38.0M
                    RETURN_SUCCESS;
1197
38.0M
                }
1198
17.0M
                MARK_POP(ctx->lastmark);
1199
17.0M
                LASTMARK_RESTORE();
1200
17.0M
                ctx->u.rep->count = ctx->count-1;
1201
17.0M
                state->ptr = ptr;
1202
17.0M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
24.7M
            state->repeat = ctx->u.rep->prev;
1207
24.7M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
24.7M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
24.7M
            RETURN_ON_SUCCESS(ret);
1211
13.1M
            state->ptr = ptr;
1212
13.1M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.52M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.52M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.52M
                   ptr, pattern[1]));
1565
3.52M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.52M
            state->ptr = ptr - pattern[1];
1568
3.52M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.52M
            RETURN_ON_FAILURE(ret);
1570
3.39M
            pattern += pattern[0];
1571
3.39M
            DISPATCH;
1572
1573
7.14M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.14M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.14M
                   ptr, pattern[1]));
1578
7.14M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.14M
                state->ptr = ptr - pattern[1];
1580
7.14M
                LASTMARK_SAVE();
1581
7.14M
                if (state->repeat)
1582
7.14M
                    MARK_PUSH(ctx->lastmark);
1583
1584
14.2M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
14.2M
                if (ret) {
1586
913
                    if (state->repeat)
1587
913
                        MARK_POP_DISCARD(ctx->lastmark);
1588
913
                    RETURN_ON_ERROR(ret);
1589
913
                    RETURN_FAILURE;
1590
913
                }
1591
7.14M
                if (state->repeat)
1592
7.14M
                    MARK_POP(ctx->lastmark);
1593
7.14M
                LASTMARK_RESTORE();
1594
7.14M
            }
1595
7.14M
            pattern += pattern[0];
1596
7.14M
            DISPATCH;
1597
1598
7.14M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
432M
exit:
1620
432M
    ctx_pos = ctx->last_ctx_pos;
1621
432M
    jump = ctx->jump;
1622
432M
    DATA_POP_DISCARD(ctx);
1623
432M
    if (ctx_pos == -1) {
1624
171M
        state->sigcount = sigcount;
1625
171M
        return ret;
1626
171M
    }
1627
260M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
260M
    switch (jump) {
1630
55.0M
        case JUMP_MAX_UNTIL_2:
1631
55.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
55.0M
            goto jump_max_until_2;
1633
24.7M
        case JUMP_MAX_UNTIL_3:
1634
24.7M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
24.7M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
57.8M
        case JUMP_BRANCH:
1643
57.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
57.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
24.5M
        case JUMP_REPEAT:
1658
24.5M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
24.5M
            goto jump_repeat;
1660
4.16M
        case JUMP_REPEAT_ONE_1:
1661
4.16M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
4.16M
            goto jump_repeat_one_1;
1663
71.1M
        case JUMP_REPEAT_ONE_2:
1664
71.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
71.1M
            goto jump_repeat_one_2;
1666
12.5M
        case JUMP_MIN_REPEAT_ONE:
1667
12.5M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
12.5M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.52M
        case JUMP_ASSERT:
1673
3.52M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.52M
            goto jump_assert;
1675
7.14M
        case JUMP_ASSERT_NOT:
1676
7.14M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.14M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
260M
    }
1683
1684
0
    return ret; /* should never get here */
1685
260M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
258M
{
601
258M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
258M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
258M
    Py_ssize_t ret = 0;
604
258M
    int jump;
605
258M
    unsigned int sigcount = state->sigcount;
606
607
258M
    SRE(match_context)* ctx;
608
258M
    SRE(match_context)* nextctx;
609
258M
    INIT_TRACE(state);
610
611
258M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
258M
    DATA_ALLOC(SRE(match_context), ctx);
614
258M
    ctx->last_ctx_pos = -1;
615
258M
    ctx->jump = JUMP_NONE;
616
258M
    ctx->toplevel = toplevel;
617
258M
    ctx_pos = alloc_pos;
618
619
258M
#if USE_COMPUTED_GOTOS
620
258M
#include "sre_targets.h"
621
258M
#endif
622
623
659M
entrance:
624
625
659M
    ;  // Fashion statement.
626
659M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
659M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
110k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
110k
                   end - ptr, (size_t) pattern[3]));
634
110k
            RETURN_FAILURE;
635
110k
        }
636
13.0M
        pattern += pattern[1] + 1;
637
13.0M
    }
638
639
659M
#if USE_COMPUTED_GOTOS
640
659M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
659M
    {
647
648
659M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
318M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
318M
                   ptr, pattern[0]));
653
318M
            {
654
318M
                int i = pattern[0];
655
318M
                if (i & 1)
656
60.2M
                    state->lastindex = i/2 + 1;
657
318M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
318M
                    int j = state->lastmark + 1;
663
321M
                    while (j < i)
664
3.24M
                        state->mark[j++] = NULL;
665
318M
                    state->lastmark = i;
666
318M
                }
667
318M
                state->mark[i] = ptr;
668
318M
            }
669
318M
            pattern++;
670
318M
            DISPATCH;
671
672
318M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
31.5M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
31.5M
                   ptr, *pattern));
677
31.5M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
17.0M
                RETURN_FAILURE;
679
14.4M
            pattern++;
680
14.4M
            ptr++;
681
14.4M
            DISPATCH;
682
683
14.4M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
65.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
65.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
65.9M
            if (ctx->toplevel &&
698
8.69M
                ((state->match_all && ptr != state->end) ||
699
8.69M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
65.9M
            state->ptr = ptr;
704
65.9M
            RETURN_SUCCESS;
705
706
59.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
59.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
59.1M
            if (!SRE(at)(state, ptr, *pattern))
711
56.4M
                RETURN_FAILURE;
712
2.70M
            pattern++;
713
2.70M
            DISPATCH;
714
715
2.70M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
141M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
141M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
141M
            if (ptr >= end ||
749
140M
                !SRE(charset)(state, pattern + 1, *ptr))
750
63.4M
                RETURN_FAILURE;
751
77.7M
            pattern += pattern[0];
752
77.7M
            ptr++;
753
77.7M
            DISPATCH;
754
755
77.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
3.43M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
3.43M
                   pattern, ptr, pattern[0]));
758
3.43M
            if (ptr >= end ||
759
3.43M
                sre_lower_ascii(*ptr) != *pattern)
760
20.6k
                RETURN_FAILURE;
761
3.41M
            pattern++;
762
3.41M
            ptr++;
763
3.41M
            DISPATCH;
764
765
3.41M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
17.2M
        TARGET(SRE_OP_JUMP):
845
17.2M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
17.2M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
17.2M
                   ptr, pattern[0]));
850
17.2M
            pattern += pattern[0];
851
17.2M
            DISPATCH;
852
853
21.5M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
21.5M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
21.5M
            LASTMARK_SAVE();
858
21.5M
            if (state->repeat)
859
17.1M
                MARK_PUSH(ctx->lastmark);
860
44.2M
            for (; pattern[0]; pattern += pattern[0]) {
861
39.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
17.7M
                    (ptr >= end ||
863
17.7M
                     (SRE_CODE) *ptr != pattern[2]))
864
10.2M
                    continue;
865
29.3M
                if (pattern[1] == SRE_OP_IN &&
866
14.8M
                    (ptr >= end ||
867
14.8M
                     !SRE(charset)(state, pattern + 3,
868
14.8M
                                   (SRE_CODE) *ptr)))
869
8.12M
                    continue;
870
21.2M
                state->ptr = ptr;
871
21.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
21.2M
                if (ret) {
873
16.8M
                    if (state->repeat)
874
14.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
16.8M
                    RETURN_ON_ERROR(ret);
876
16.8M
                    RETURN_SUCCESS;
877
16.8M
                }
878
4.39M
                if (state->repeat)
879
2.35k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.39M
                LASTMARK_RESTORE();
881
4.39M
            }
882
4.66M
            if (state->repeat)
883
2.45M
                MARK_POP_DISCARD(ctx->lastmark);
884
4.66M
            RETURN_FAILURE;
885
886
274M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
274M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
274M
                   pattern[1], pattern[2]));
898
899
274M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
155k
                RETURN_FAILURE; /* cannot match */
901
902
274M
            state->ptr = ptr;
903
904
274M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
274M
            RETURN_ON_ERROR(ret);
906
274M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
274M
            ctx->count = ret;
908
274M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
274M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
190M
                RETURN_FAILURE;
917
918
84.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
3.64M
                ptr == state->end &&
920
20.8k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
20.8k
            {
922
                /* tail is empty.  we're finished */
923
20.8k
                state->ptr = ptr;
924
20.8k
                RETURN_SUCCESS;
925
20.8k
            }
926
927
84.6M
            LASTMARK_SAVE();
928
84.6M
            if (state->repeat)
929
24.7M
                MARK_PUSH(ctx->lastmark);
930
931
84.6M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
4.68M
                ctx->u.chr = pattern[pattern[0]+1];
935
4.68M
                for (;;) {
936
9.49M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
7.58M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
4.80M
                        ptr--;
939
4.80M
                        ctx->count--;
940
4.80M
                    }
941
4.68M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
1.90M
                        break;
943
2.77M
                    state->ptr = ptr;
944
2.77M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
2.77M
                            pattern+pattern[0]);
946
2.77M
                    if (ret) {
947
2.77M
                        if (state->repeat)
948
2.73M
                            MARK_POP_DISCARD(ctx->lastmark);
949
2.77M
                        RETURN_ON_ERROR(ret);
950
2.77M
                        RETURN_SUCCESS;
951
2.77M
                    }
952
254
                    if (state->repeat)
953
254
                        MARK_POP_KEEP(ctx->lastmark);
954
254
                    LASTMARK_RESTORE();
955
956
254
                    ptr--;
957
254
                    ctx->count--;
958
254
                }
959
1.90M
                if (state->repeat)
960
1.89M
                    MARK_POP_DISCARD(ctx->lastmark);
961
79.9M
            } else {
962
                /* general case */
963
142M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
97.5M
                    state->ptr = ptr;
965
97.5M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
97.5M
                            pattern+pattern[0]);
967
97.5M
                    if (ret) {
968
35.0M
                        if (state->repeat)
969
19.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
35.0M
                        RETURN_ON_ERROR(ret);
971
35.0M
                        RETURN_SUCCESS;
972
35.0M
                    }
973
62.4M
                    if (state->repeat)
974
229k
                        MARK_POP_KEEP(ctx->lastmark);
975
62.4M
                    LASTMARK_RESTORE();
976
977
62.4M
                    ptr--;
978
62.4M
                    ctx->count--;
979
62.4M
                }
980
44.8M
                if (state->repeat)
981
157k
                    MARK_POP_DISCARD(ctx->lastmark);
982
44.8M
            }
983
46.7M
            RETURN_FAILURE;
984
985
1.26M
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
1.26M
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
1.26M
                   pattern[1], pattern[2]));
997
998
1.26M
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
1.26M
            state->ptr = ptr;
1002
1003
1.26M
            if (pattern[1] == 0)
1004
1.26M
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
1.26M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
1.26M
            } else {
1028
                /* general case */
1029
1.26M
                LASTMARK_SAVE();
1030
1.26M
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
45.9M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
45.9M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
45.9M
                    state->ptr = ptr;
1036
45.9M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
45.9M
                            pattern+pattern[0]);
1038
45.9M
                    if (ret) {
1039
1.26M
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
1.26M
                        RETURN_ON_ERROR(ret);
1042
1.26M
                        RETURN_SUCCESS;
1043
1.26M
                    }
1044
44.6M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
44.6M
                    LASTMARK_RESTORE();
1047
1048
44.6M
                    state->ptr = ptr;
1049
44.6M
                    ret = SRE(count)(state, pattern+3, 1);
1050
44.6M
                    RETURN_ON_ERROR(ret);
1051
44.6M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
44.6M
                    if (ret == 0)
1053
0
                        break;
1054
44.6M
                    assert(ret == 1);
1055
44.6M
                    ptr++;
1056
44.6M
                    ctx->count++;
1057
44.6M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
64.8M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
64.8M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
64.8M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
64.8M
            ctx->u.rep = repeat_pool_malloc(state);
1127
64.8M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
64.8M
            ctx->u.rep->count = -1;
1131
64.8M
            ctx->u.rep->pattern = pattern;
1132
64.8M
            ctx->u.rep->prev = state->repeat;
1133
64.8M
            ctx->u.rep->last_ptr = NULL;
1134
64.8M
            state->repeat = ctx->u.rep;
1135
1136
64.8M
            state->ptr = ptr;
1137
64.8M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
64.8M
            state->repeat = ctx->u.rep->prev;
1139
64.8M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
64.8M
            if (ret) {
1142
9.78M
                RETURN_ON_ERROR(ret);
1143
9.78M
                RETURN_SUCCESS;
1144
9.78M
            }
1145
55.0M
            RETURN_FAILURE;
1146
1147
91.3M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
91.3M
            ctx->u.rep = state->repeat;
1155
91.3M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
91.3M
            state->ptr = ptr;
1159
1160
91.3M
            ctx->count = ctx->u.rep->count+1;
1161
1162
91.3M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
91.3M
                   ptr, ctx->count));
1164
1165
91.3M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
91.3M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
2.68M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
88.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
88.6M
                ctx->u.rep->count = ctx->count;
1185
88.6M
                LASTMARK_SAVE();
1186
88.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
88.6M
                LAST_PTR_PUSH();
1189
88.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
88.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
88.6M
                        ctx->u.rep->pattern+3);
1192
88.6M
                LAST_PTR_POP();
1193
88.6M
                if (ret) {
1194
26.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
26.4M
                    RETURN_ON_ERROR(ret);
1196
26.4M
                    RETURN_SUCCESS;
1197
26.4M
                }
1198
62.1M
                MARK_POP(ctx->lastmark);
1199
62.1M
                LASTMARK_RESTORE();
1200
62.1M
                ctx->u.rep->count = ctx->count-1;
1201
62.1M
                state->ptr = ptr;
1202
62.1M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
64.8M
            state->repeat = ctx->u.rep->prev;
1207
64.8M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
64.8M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
64.8M
            RETURN_ON_SUCCESS(ret);
1211
55.0M
            state->ptr = ptr;
1212
55.0M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
7.57M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
7.57M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
7.57M
                   ptr, pattern[1]));
1565
7.57M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
7.57M
            state->ptr = ptr - pattern[1];
1568
7.57M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
7.57M
            RETURN_ON_FAILURE(ret);
1570
4.80M
            pattern += pattern[0];
1571
4.80M
            DISPATCH;
1572
1573
7.86M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.86M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.86M
                   ptr, pattern[1]));
1578
7.86M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.86M
                state->ptr = ptr - pattern[1];
1580
7.86M
                LASTMARK_SAVE();
1581
7.86M
                if (state->repeat)
1582
7.86M
                    MARK_PUSH(ctx->lastmark);
1583
1584
15.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
15.7M
                if (ret) {
1586
2.03k
                    if (state->repeat)
1587
2.03k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.03k
                    RETURN_ON_ERROR(ret);
1589
2.03k
                    RETURN_FAILURE;
1590
2.03k
                }
1591
7.86M
                if (state->repeat)
1592
7.86M
                    MARK_POP(ctx->lastmark);
1593
7.86M
                LASTMARK_RESTORE();
1594
7.86M
            }
1595
7.86M
            pattern += pattern[0];
1596
7.86M
            DISPATCH;
1597
1598
7.86M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
659M
exit:
1620
659M
    ctx_pos = ctx->last_ctx_pos;
1621
659M
    jump = ctx->jump;
1622
659M
    DATA_POP_DISCARD(ctx);
1623
659M
    if (ctx_pos == -1) {
1624
258M
        state->sigcount = sigcount;
1625
258M
        return ret;
1626
258M
    }
1627
401M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
401M
    switch (jump) {
1630
88.6M
        case JUMP_MAX_UNTIL_2:
1631
88.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
88.6M
            goto jump_max_until_2;
1633
64.8M
        case JUMP_MAX_UNTIL_3:
1634
64.8M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
64.8M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
21.2M
        case JUMP_BRANCH:
1643
21.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
21.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
64.8M
        case JUMP_REPEAT:
1658
64.8M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
64.8M
            goto jump_repeat;
1660
2.77M
        case JUMP_REPEAT_ONE_1:
1661
2.77M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
2.77M
            goto jump_repeat_one_1;
1663
97.5M
        case JUMP_REPEAT_ONE_2:
1664
97.5M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
97.5M
            goto jump_repeat_one_2;
1666
45.9M
        case JUMP_MIN_REPEAT_ONE:
1667
45.9M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
45.9M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
7.57M
        case JUMP_ASSERT:
1673
7.57M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
7.57M
            goto jump_assert;
1675
7.86M
        case JUMP_ASSERT_NOT:
1676
7.86M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.86M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
401M
    }
1683
1684
0
    return ret; /* should never get here */
1685
401M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
51.9M
{
601
51.9M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
51.9M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
51.9M
    Py_ssize_t ret = 0;
604
51.9M
    int jump;
605
51.9M
    unsigned int sigcount = state->sigcount;
606
607
51.9M
    SRE(match_context)* ctx;
608
51.9M
    SRE(match_context)* nextctx;
609
51.9M
    INIT_TRACE(state);
610
611
51.9M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
51.9M
    DATA_ALLOC(SRE(match_context), ctx);
614
51.9M
    ctx->last_ctx_pos = -1;
615
51.9M
    ctx->jump = JUMP_NONE;
616
51.9M
    ctx->toplevel = toplevel;
617
51.9M
    ctx_pos = alloc_pos;
618
619
51.9M
#if USE_COMPUTED_GOTOS
620
51.9M
#include "sre_targets.h"
621
51.9M
#endif
622
623
268M
entrance:
624
625
268M
    ;  // Fashion statement.
626
268M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
268M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
7.12M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
3.78k
            TRACE(("reject (got %tu chars, need %zu)\n",
633
3.78k
                   end - ptr, (size_t) pattern[3]));
634
3.78k
            RETURN_FAILURE;
635
3.78k
        }
636
7.12M
        pattern += pattern[1] + 1;
637
7.12M
    }
638
639
268M
#if USE_COMPUTED_GOTOS
640
268M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
268M
    {
647
648
268M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
83.7M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
83.7M
                   ptr, pattern[0]));
653
83.7M
            {
654
83.7M
                int i = pattern[0];
655
83.7M
                if (i & 1)
656
23.8M
                    state->lastindex = i/2 + 1;
657
83.7M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
81.5M
                    int j = state->lastmark + 1;
663
83.7M
                    while (j < i)
664
2.18M
                        state->mark[j++] = NULL;
665
81.5M
                    state->lastmark = i;
666
81.5M
                }
667
83.7M
                state->mark[i] = ptr;
668
83.7M
            }
669
83.7M
            pattern++;
670
83.7M
            DISPATCH;
671
672
83.7M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
28.0M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
28.0M
                   ptr, *pattern));
677
28.0M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
12.7M
                RETURN_FAILURE;
679
15.3M
            pattern++;
680
15.3M
            ptr++;
681
15.3M
            DISPATCH;
682
683
15.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
22.9M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
22.9M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
22.9M
            if (ctx->toplevel &&
698
5.97M
                ((state->match_all && ptr != state->end) ||
699
5.97M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
22.9M
            state->ptr = ptr;
704
22.9M
            RETURN_SUCCESS;
705
706
8.55M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
8.55M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
8.55M
            if (!SRE(at)(state, ptr, *pattern))
711
8.52M
                RETURN_FAILURE;
712
28.4k
            pattern++;
713
28.4k
            DISPATCH;
714
715
28.4k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
58.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
58.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
58.6M
            if (ptr >= end ||
749
58.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
13.8M
                RETURN_FAILURE;
751
44.7M
            pattern += pattern[0];
752
44.7M
            ptr++;
753
44.7M
            DISPATCH;
754
755
44.7M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.69M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.69M
                   pattern, ptr, pattern[0]));
758
1.69M
            if (ptr >= end ||
759
1.69M
                sre_lower_ascii(*ptr) != *pattern)
760
15.7k
                RETURN_FAILURE;
761
1.67M
            pattern++;
762
1.67M
            ptr++;
763
1.67M
            DISPATCH;
764
765
1.67M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
22.9M
        TARGET(SRE_OP_JUMP):
845
22.9M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
22.9M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
22.9M
                   ptr, pattern[0]));
850
22.9M
            pattern += pattern[0];
851
22.9M
            DISPATCH;
852
853
27.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
27.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
27.9M
            LASTMARK_SAVE();
858
27.9M
            if (state->repeat)
859
25.2M
                MARK_PUSH(ctx->lastmark);
860
59.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
53.6M
                if (pattern[1] == SRE_OP_LITERAL &&
862
27.3M
                    (ptr >= end ||
863
27.3M
                     (SRE_CODE) *ptr != pattern[2]))
864
19.7M
                    continue;
865
33.9M
                if (pattern[1] == SRE_OP_IN &&
866
19.8M
                    (ptr >= end ||
867
19.8M
                     !SRE(charset)(state, pattern + 3,
868
19.8M
                                   (SRE_CODE) *ptr)))
869
10.7M
                    continue;
870
23.2M
                state->ptr = ptr;
871
23.2M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.2M
                if (ret) {
873
22.3M
                    if (state->repeat)
874
20.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
22.3M
                    RETURN_ON_ERROR(ret);
876
22.3M
                    RETURN_SUCCESS;
877
22.3M
                }
878
833k
                if (state->repeat)
879
4.08k
                    MARK_POP_KEEP(ctx->lastmark);
880
833k
                LASTMARK_RESTORE();
881
833k
            }
882
5.53M
            if (state->repeat)
883
4.88M
                MARK_POP_DISCARD(ctx->lastmark);
884
5.53M
            RETURN_FAILURE;
885
886
97.6M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
97.6M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
97.6M
                   pattern[1], pattern[2]));
898
899
97.6M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
12.0k
                RETURN_FAILURE; /* cannot match */
901
902
97.6M
            state->ptr = ptr;
903
904
97.6M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
97.6M
            RETURN_ON_ERROR(ret);
906
97.6M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
97.6M
            ctx->count = ret;
908
97.6M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
97.6M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
37.9M
                RETURN_FAILURE;
917
918
59.6M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
861k
                ptr == state->end &&
920
5.37k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
5.37k
            {
922
                /* tail is empty.  we're finished */
923
5.37k
                state->ptr = ptr;
924
5.37k
                RETURN_SUCCESS;
925
5.37k
            }
926
927
59.6M
            LASTMARK_SAVE();
928
59.6M
            if (state->repeat)
929
44.1M
                MARK_PUSH(ctx->lastmark);
930
931
59.6M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
12.0M
                ctx->u.chr = pattern[pattern[0]+1];
935
12.0M
                for (;;) {
936
29.1M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
23.4M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
17.1M
                        ptr--;
939
17.1M
                        ctx->count--;
940
17.1M
                    }
941
12.0M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
5.69M
                        break;
943
6.31M
                    state->ptr = ptr;
944
6.31M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
6.31M
                            pattern+pattern[0]);
946
6.31M
                    if (ret) {
947
6.31M
                        if (state->repeat)
948
6.31M
                            MARK_POP_DISCARD(ctx->lastmark);
949
6.31M
                        RETURN_ON_ERROR(ret);
950
6.31M
                        RETURN_SUCCESS;
951
6.31M
                    }
952
309
                    if (state->repeat)
953
309
                        MARK_POP_KEEP(ctx->lastmark);
954
309
                    LASTMARK_RESTORE();
955
956
309
                    ptr--;
957
309
                    ctx->count--;
958
309
                }
959
5.69M
                if (state->repeat)
960
5.69M
                    MARK_POP_DISCARD(ctx->lastmark);
961
47.6M
            } else {
962
                /* general case */
963
61.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
56.8M
                    state->ptr = ptr;
965
56.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
56.8M
                            pattern+pattern[0]);
967
56.8M
                    if (ret) {
968
42.4M
                        if (state->repeat)
969
32.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
42.4M
                        RETURN_ON_ERROR(ret);
971
42.4M
                        RETURN_SUCCESS;
972
42.4M
                    }
973
14.3M
                    if (state->repeat)
974
100k
                        MARK_POP_KEEP(ctx->lastmark);
975
14.3M
                    LASTMARK_RESTORE();
976
977
14.3M
                    ptr--;
978
14.3M
                    ctx->count--;
979
14.3M
                }
980
5.15M
                if (state->repeat)
981
66.2k
                    MARK_POP_DISCARD(ctx->lastmark);
982
5.15M
            }
983
10.8M
            RETURN_FAILURE;
984
985
10.5k
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
10.5k
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
10.5k
                   pattern[1], pattern[2]));
997
998
10.5k
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
10.5k
            state->ptr = ptr;
1002
1003
10.5k
            if (pattern[1] == 0)
1004
10.5k
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
10.5k
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
10.5k
            } else {
1028
                /* general case */
1029
10.5k
                LASTMARK_SAVE();
1030
10.5k
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
5.08M
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
5.08M
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
5.08M
                    state->ptr = ptr;
1036
5.08M
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
5.08M
                            pattern+pattern[0]);
1038
5.08M
                    if (ret) {
1039
10.5k
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
10.5k
                        RETURN_ON_ERROR(ret);
1042
10.5k
                        RETURN_SUCCESS;
1043
10.5k
                    }
1044
5.07M
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
5.07M
                    LASTMARK_RESTORE();
1047
1048
5.07M
                    state->ptr = ptr;
1049
5.07M
                    ret = SRE(count)(state, pattern+3, 1);
1050
5.07M
                    RETURN_ON_ERROR(ret);
1051
5.07M
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
5.07M
                    if (ret == 0)
1053
0
                        break;
1054
5.07M
                    assert(ret == 1);
1055
5.07M
                    ptr++;
1056
5.07M
                    ctx->count++;
1057
5.07M
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
23.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
23.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
23.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
23.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
23.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
23.9M
            ctx->u.rep->count = -1;
1131
23.9M
            ctx->u.rep->pattern = pattern;
1132
23.9M
            ctx->u.rep->prev = state->repeat;
1133
23.9M
            ctx->u.rep->last_ptr = NULL;
1134
23.9M
            state->repeat = ctx->u.rep;
1135
1136
23.9M
            state->ptr = ptr;
1137
23.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
23.9M
            state->repeat = ctx->u.rep->prev;
1139
23.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
23.9M
            if (ret) {
1142
16.5M
                RETURN_ON_ERROR(ret);
1143
16.5M
                RETURN_SUCCESS;
1144
16.5M
            }
1145
7.41M
            RETURN_FAILURE;
1146
1147
59.5M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
59.5M
            ctx->u.rep = state->repeat;
1155
59.5M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
59.5M
            state->ptr = ptr;
1159
1160
59.5M
            ctx->count = ctx->u.rep->count+1;
1161
1162
59.5M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
59.5M
                   ptr, ctx->count));
1164
1165
59.5M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
59.5M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
5.56M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
53.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
53.9M
                ctx->u.rep->count = ctx->count;
1185
53.9M
                LASTMARK_SAVE();
1186
53.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
53.9M
                LAST_PTR_PUSH();
1189
53.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
53.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
53.9M
                        ctx->u.rep->pattern+3);
1192
53.9M
                LAST_PTR_POP();
1193
53.9M
                if (ret) {
1194
35.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
35.4M
                    RETURN_ON_ERROR(ret);
1196
35.4M
                    RETURN_SUCCESS;
1197
35.4M
                }
1198
18.4M
                MARK_POP(ctx->lastmark);
1199
18.4M
                LASTMARK_RESTORE();
1200
18.4M
                ctx->u.rep->count = ctx->count-1;
1201
18.4M
                state->ptr = ptr;
1202
18.4M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
24.0M
            state->repeat = ctx->u.rep->prev;
1207
24.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
24.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
24.0M
            RETURN_ON_SUCCESS(ret);
1211
7.44M
            state->ptr = ptr;
1212
7.44M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
12.2M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
12.2M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
12.2M
                   ptr, pattern[1]));
1565
12.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
12.2M
            state->ptr = ptr - pattern[1];
1568
12.2M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
12.2M
            RETURN_ON_FAILURE(ret);
1570
11.3M
            pattern += pattern[0];
1571
11.3M
            DISPATCH;
1572
1573
11.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
10.5M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
10.5M
                   ptr, pattern[1]));
1578
10.5M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
10.5M
                state->ptr = ptr - pattern[1];
1580
10.5M
                LASTMARK_SAVE();
1581
10.5M
                if (state->repeat)
1582
10.5M
                    MARK_PUSH(ctx->lastmark);
1583
1584
21.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
21.0M
                if (ret) {
1586
3.73k
                    if (state->repeat)
1587
3.73k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
3.73k
                    RETURN_ON_ERROR(ret);
1589
3.73k
                    RETURN_FAILURE;
1590
3.73k
                }
1591
10.5M
                if (state->repeat)
1592
10.5M
                    MARK_POP(ctx->lastmark);
1593
10.5M
                LASTMARK_RESTORE();
1594
10.5M
            }
1595
10.5M
            pattern += pattern[0];
1596
10.5M
            DISPATCH;
1597
1598
10.5M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
268M
exit:
1620
268M
    ctx_pos = ctx->last_ctx_pos;
1621
268M
    jump = ctx->jump;
1622
268M
    DATA_POP_DISCARD(ctx);
1623
268M
    if (ctx_pos == -1) {
1624
51.9M
        state->sigcount = sigcount;
1625
51.9M
        return ret;
1626
51.9M
    }
1627
216M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
216M
    switch (jump) {
1630
53.9M
        case JUMP_MAX_UNTIL_2:
1631
53.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
53.9M
            goto jump_max_until_2;
1633
24.0M
        case JUMP_MAX_UNTIL_3:
1634
24.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
24.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.2M
        case JUMP_BRANCH:
1643
23.2M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.2M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
23.9M
        case JUMP_REPEAT:
1658
23.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
23.9M
            goto jump_repeat;
1660
6.31M
        case JUMP_REPEAT_ONE_1:
1661
6.31M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
6.31M
            goto jump_repeat_one_1;
1663
56.8M
        case JUMP_REPEAT_ONE_2:
1664
56.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
56.8M
            goto jump_repeat_one_2;
1666
5.08M
        case JUMP_MIN_REPEAT_ONE:
1667
5.08M
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
5.08M
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
12.2M
        case JUMP_ASSERT:
1673
12.2M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
12.2M
            goto jump_assert;
1675
10.5M
        case JUMP_ASSERT_NOT:
1676
10.5M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
10.5M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
216M
    }
1683
1684
0
    return ret; /* should never get here */
1685
216M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
321M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
115M
{
1694
115M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
115M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
115M
    Py_ssize_t status = 0;
1697
115M
    Py_ssize_t prefix_len = 0;
1698
115M
    Py_ssize_t prefix_skip = 0;
1699
115M
    SRE_CODE* prefix = NULL;
1700
115M
    SRE_CODE* charset = NULL;
1701
115M
    SRE_CODE* overlap = NULL;
1702
115M
    int flags = 0;
1703
115M
    INIT_TRACE(state);
1704
1705
115M
    if (ptr > end)
1706
0
        return 0;
1707
1708
115M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
115M
        flags = pattern[2];
1713
1714
115M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.68M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.68M
                   end - ptr, (size_t) pattern[3]));
1717
5.68M
            return 0;
1718
5.68M
        }
1719
110M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
6.81M
            end -= pattern[3] - 1;
1723
6.81M
            if (end <= ptr)
1724
0
                end = ptr;
1725
6.81M
        }
1726
1727
110M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
6.81M
            prefix_len = pattern[5];
1731
6.81M
            prefix_skip = pattern[6];
1732
6.81M
            prefix = pattern + 7;
1733
6.81M
            overlap = prefix + prefix_len - 1;
1734
103M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
95.1M
            charset = pattern + 5;
1738
1739
110M
        pattern += 1 + pattern[1];
1740
110M
    }
1741
1742
110M
    TRACE(("prefix = %p %zd %zd\n",
1743
110M
           prefix, prefix_len, prefix_skip));
1744
110M
    TRACE(("charset = %p\n", charset));
1745
1746
110M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
6.17M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.19M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.19M
#endif
1753
4.19M
        end = (SRE_CHAR *)state->end;
1754
4.19M
        state->must_advance = 0;
1755
6.77M
        while (ptr < end) {
1756
103M
            while (*ptr != c) {
1757
97.6M
                if (++ptr >= end)
1758
476k
                    return 0;
1759
97.6M
            }
1760
6.15M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
6.15M
            state->start = ptr;
1762
6.15M
            state->ptr = ptr + prefix_skip;
1763
6.15M
            if (flags & SRE_INFO_LITERAL)
1764
6.79k
                return 1; /* we got all of it */
1765
6.14M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
6.14M
            if (status != 0)
1767
5.54M
                return status;
1768
602k
            ++ptr;
1769
602k
            RESET_CAPTURE_GROUP();
1770
602k
        }
1771
142k
        return 0;
1772
4.19M
    }
1773
1774
103M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
648k
        Py_ssize_t i = 0;
1778
1779
648k
        end = (SRE_CHAR *)state->end;
1780
648k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
1.57M
        for (i = 0; i < prefix_len; i++)
1784
1.05M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
525k
#endif
1787
1.27M
        while (ptr < end) {
1788
1.27M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
13.2M
            while (*ptr++ != c) {
1790
11.9M
                if (ptr >= end)
1791
311
                    return 0;
1792
11.9M
            }
1793
1.27M
            if (ptr >= end)
1794
63
                return 0;
1795
1796
1.27M
            i = 1;
1797
1.27M
            state->must_advance = 0;
1798
1.27M
            do {
1799
1.27M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.21M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.21M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.21M
                    state->start = ptr - (prefix_len - 1);
1808
1.21M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.21M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.21M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.21M
                    if (status != 0)
1813
647k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
571k
                    if (++ptr >= end)
1816
67
                        return 0;
1817
571k
                    RESET_CAPTURE_GROUP();
1818
571k
                }
1819
629k
                i = overlap[i];
1820
629k
            } while (i != 0);
1821
1.27M
        }
1822
0
        return 0;
1823
648k
    }
1824
1825
103M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
95.1M
        end = (SRE_CHAR *)state->end;
1828
95.1M
        state->must_advance = 0;
1829
97.5M
        for (;;) {
1830
389M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
291M
                ptr++;
1832
97.5M
            if (ptr >= end)
1833
3.49M
                return 0;
1834
94.0M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
94.0M
            state->start = ptr;
1836
94.0M
            state->ptr = ptr;
1837
94.0M
            status = SRE(match)(state, pattern, 0);
1838
94.0M
            if (status != 0)
1839
91.6M
                break;
1840
2.36M
            ptr++;
1841
2.36M
            RESET_CAPTURE_GROUP();
1842
2.36M
        }
1843
95.1M
    } else {
1844
        /* general case */
1845
8.09M
        assert(ptr <= end);
1846
8.09M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
8.09M
        state->start = state->ptr = ptr;
1848
8.09M
        status = SRE(match)(state, pattern, 1);
1849
8.09M
        state->must_advance = 0;
1850
8.09M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
4.05M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
67
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
4.05M
        {
1854
4.05M
            state->start = state->ptr = ptr = end;
1855
4.05M
            return 0;
1856
4.05M
        }
1857
321M
        while (status == 0 && ptr < end) {
1858
317M
            ptr++;
1859
317M
            RESET_CAPTURE_GROUP();
1860
317M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
317M
            state->start = state->ptr = ptr;
1862
317M
            status = SRE(match)(state, pattern, 0);
1863
317M
        }
1864
4.04M
    }
1865
1866
95.7M
    return status;
1867
103M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
54.9M
{
1694
54.9M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
54.9M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
54.9M
    Py_ssize_t status = 0;
1697
54.9M
    Py_ssize_t prefix_len = 0;
1698
54.9M
    Py_ssize_t prefix_skip = 0;
1699
54.9M
    SRE_CODE* prefix = NULL;
1700
54.9M
    SRE_CODE* charset = NULL;
1701
54.9M
    SRE_CODE* overlap = NULL;
1702
54.9M
    int flags = 0;
1703
54.9M
    INIT_TRACE(state);
1704
1705
54.9M
    if (ptr > end)
1706
0
        return 0;
1707
1708
54.9M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
54.9M
        flags = pattern[2];
1713
1714
54.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
5.58M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
5.58M
                   end - ptr, (size_t) pattern[3]));
1717
5.58M
            return 0;
1718
5.58M
        }
1719
49.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.19M
            end -= pattern[3] - 1;
1723
2.19M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.19M
        }
1726
1727
49.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.19M
            prefix_len = pattern[5];
1731
2.19M
            prefix_skip = pattern[6];
1732
2.19M
            prefix = pattern + 7;
1733
2.19M
            overlap = prefix + prefix_len - 1;
1734
47.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
41.6M
            charset = pattern + 5;
1738
1739
49.3M
        pattern += 1 + pattern[1];
1740
49.3M
    }
1741
1742
49.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
49.3M
           prefix, prefix_len, prefix_skip));
1744
49.3M
    TRACE(("charset = %p\n", charset));
1745
1746
49.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.12M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.12M
#if SIZEOF_SRE_CHAR < 4
1750
2.12M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.12M
#endif
1753
2.12M
        end = (SRE_CHAR *)state->end;
1754
2.12M
        state->must_advance = 0;
1755
2.55M
        while (ptr < end) {
1756
26.4M
            while (*ptr != c) {
1757
24.4M
                if (++ptr >= end)
1758
402k
                    return 0;
1759
24.4M
            }
1760
2.01M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.01M
            state->start = ptr;
1762
2.01M
            state->ptr = ptr + prefix_skip;
1763
2.01M
            if (flags & SRE_INFO_LITERAL)
1764
615
                return 1; /* we got all of it */
1765
2.01M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.01M
            if (status != 0)
1767
1.58M
                return status;
1768
427k
            ++ptr;
1769
427k
            RESET_CAPTURE_GROUP();
1770
427k
        }
1771
137k
        return 0;
1772
2.12M
    }
1773
1774
47.1M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
73.5k
        Py_ssize_t i = 0;
1778
1779
73.5k
        end = (SRE_CHAR *)state->end;
1780
73.5k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
73.5k
#if SIZEOF_SRE_CHAR < 4
1783
220k
        for (i = 0; i < prefix_len; i++)
1784
147k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
73.5k
#endif
1787
147k
        while (ptr < end) {
1788
147k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
1.84M
            while (*ptr++ != c) {
1790
1.70M
                if (ptr >= end)
1791
60
                    return 0;
1792
1.70M
            }
1793
147k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
147k
            i = 1;
1797
147k
            state->must_advance = 0;
1798
148k
            do {
1799
148k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
139k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
139k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
139k
                    state->start = ptr - (prefix_len - 1);
1808
139k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
139k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
139k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
139k
                    if (status != 0)
1813
73.4k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
66.3k
                    if (++ptr >= end)
1816
28
                        return 0;
1817
66.3k
                    RESET_CAPTURE_GROUP();
1818
66.3k
                }
1819
74.6k
                i = overlap[i];
1820
74.6k
            } while (i != 0);
1821
147k
        }
1822
0
        return 0;
1823
73.5k
    }
1824
1825
47.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
41.6M
        end = (SRE_CHAR *)state->end;
1828
41.6M
        state->must_advance = 0;
1829
43.1M
        for (;;) {
1830
109M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
65.8M
                ptr++;
1832
43.1M
            if (ptr >= end)
1833
2.41M
                return 0;
1834
40.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
40.7M
            state->start = ptr;
1836
40.7M
            state->ptr = ptr;
1837
40.7M
            status = SRE(match)(state, pattern, 0);
1838
40.7M
            if (status != 0)
1839
39.2M
                break;
1840
1.48M
            ptr++;
1841
1.48M
            RESET_CAPTURE_GROUP();
1842
1.48M
        }
1843
41.6M
    } else {
1844
        /* general case */
1845
5.45M
        assert(ptr <= end);
1846
5.45M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.45M
        state->start = state->ptr = ptr;
1848
5.45M
        status = SRE(match)(state, pattern, 1);
1849
5.45M
        state->must_advance = 0;
1850
5.45M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
2.63M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
25
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
2.63M
        {
1854
2.63M
            state->start = state->ptr = ptr = end;
1855
2.63M
            return 0;
1856
2.63M
        }
1857
91.8M
        while (status == 0 && ptr < end) {
1858
89.0M
            ptr++;
1859
89.0M
            RESET_CAPTURE_GROUP();
1860
89.0M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
89.0M
            state->start = state->ptr = ptr;
1862
89.0M
            status = SRE(match)(state, pattern, 0);
1863
89.0M
        }
1864
2.82M
    }
1865
1866
42.0M
    return status;
1867
47.1M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
55.1M
{
1694
55.1M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
55.1M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
55.1M
    Py_ssize_t status = 0;
1697
55.1M
    Py_ssize_t prefix_len = 0;
1698
55.1M
    Py_ssize_t prefix_skip = 0;
1699
55.1M
    SRE_CODE* prefix = NULL;
1700
55.1M
    SRE_CODE* charset = NULL;
1701
55.1M
    SRE_CODE* overlap = NULL;
1702
55.1M
    int flags = 0;
1703
55.1M
    INIT_TRACE(state);
1704
1705
55.1M
    if (ptr > end)
1706
0
        return 0;
1707
1708
55.1M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
55.1M
        flags = pattern[2];
1713
1714
55.1M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
94.5k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
94.5k
                   end - ptr, (size_t) pattern[3]));
1717
94.5k
            return 0;
1718
94.5k
        }
1719
55.0M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.52M
            end -= pattern[3] - 1;
1723
2.52M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.52M
        }
1726
1727
55.0M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.52M
            prefix_len = pattern[5];
1731
2.52M
            prefix_skip = pattern[6];
1732
2.52M
            prefix = pattern + 7;
1733
2.52M
            overlap = prefix + prefix_len - 1;
1734
52.5M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
50.0M
            charset = pattern + 5;
1738
1739
55.0M
        pattern += 1 + pattern[1];
1740
55.0M
    }
1741
1742
55.0M
    TRACE(("prefix = %p %zd %zd\n",
1743
55.0M
           prefix, prefix_len, prefix_skip));
1744
55.0M
    TRACE(("charset = %p\n", charset));
1745
1746
55.0M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.07M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.07M
#if SIZEOF_SRE_CHAR < 4
1750
2.07M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.07M
#endif
1753
2.07M
        end = (SRE_CHAR *)state->end;
1754
2.07M
        state->must_advance = 0;
1755
2.18M
        while (ptr < end) {
1756
53.2M
            while (*ptr != c) {
1757
51.0M
                if (++ptr >= end)
1758
69.5k
                    return 0;
1759
51.0M
            }
1760
2.10M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.10M
            state->start = ptr;
1762
2.10M
            state->ptr = ptr + prefix_skip;
1763
2.10M
            if (flags & SRE_INFO_LITERAL)
1764
4.47k
                return 1; /* we got all of it */
1765
2.10M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.10M
            if (status != 0)
1767
1.99M
                return status;
1768
107k
            ++ptr;
1769
107k
            RESET_CAPTURE_GROUP();
1770
107k
        }
1771
3.43k
        return 0;
1772
2.07M
    }
1773
1774
53.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
452k
        Py_ssize_t i = 0;
1778
1779
452k
        end = (SRE_CHAR *)state->end;
1780
452k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
452k
#if SIZEOF_SRE_CHAR < 4
1783
1.35M
        for (i = 0; i < prefix_len; i++)
1784
904k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
452k
#endif
1787
753k
        while (ptr < end) {
1788
753k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.73M
            while (*ptr++ != c) {
1790
3.97M
                if (ptr >= end)
1791
122
                    return 0;
1792
3.97M
            }
1793
753k
            if (ptr >= end)
1794
22
                return 0;
1795
1796
753k
            i = 1;
1797
753k
            state->must_advance = 0;
1798
754k
            do {
1799
754k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
733k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
733k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
733k
                    state->start = ptr - (prefix_len - 1);
1808
733k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
733k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
733k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
733k
                    if (status != 0)
1813
451k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
281k
                    if (++ptr >= end)
1816
19
                        return 0;
1817
281k
                    RESET_CAPTURE_GROUP();
1818
281k
                }
1819
302k
                i = overlap[i];
1820
302k
            } while (i != 0);
1821
753k
        }
1822
0
        return 0;
1823
452k
    }
1824
1825
52.5M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
50.0M
        end = (SRE_CHAR *)state->end;
1828
50.0M
        state->must_advance = 0;
1829
50.3M
        for (;;) {
1830
211M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
161M
                ptr++;
1832
50.3M
            if (ptr >= end)
1833
1.03M
                return 0;
1834
49.3M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
49.3M
            state->start = ptr;
1836
49.3M
            state->ptr = ptr;
1837
49.3M
            status = SRE(match)(state, pattern, 0);
1838
49.3M
            if (status != 0)
1839
49.0M
                break;
1840
340k
            ptr++;
1841
340k
            RESET_CAPTURE_GROUP();
1842
340k
        }
1843
50.0M
    } else {
1844
        /* general case */
1845
2.49M
        assert(ptr <= end);
1846
2.49M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
2.49M
        state->start = state->ptr = ptr;
1848
2.49M
        status = SRE(match)(state, pattern, 1);
1849
2.49M
        state->must_advance = 0;
1850
2.49M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
1.40M
            (pattern[1] == SRE_AT_BEGINNING ||
1852
23
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
1.40M
        {
1854
1.40M
            state->start = state->ptr = ptr = end;
1855
1.40M
            return 0;
1856
1.40M
        }
1857
191M
        while (status == 0 && ptr < end) {
1858
190M
            ptr++;
1859
190M
            RESET_CAPTURE_GROUP();
1860
190M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
190M
            state->start = state->ptr = ptr;
1862
190M
            status = SRE(match)(state, pattern, 0);
1863
190M
        }
1864
1.09M
    }
1865
1866
50.1M
    return status;
1867
52.5M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
5.69M
{
1694
5.69M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
5.69M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
5.69M
    Py_ssize_t status = 0;
1697
5.69M
    Py_ssize_t prefix_len = 0;
1698
5.69M
    Py_ssize_t prefix_skip = 0;
1699
5.69M
    SRE_CODE* prefix = NULL;
1700
5.69M
    SRE_CODE* charset = NULL;
1701
5.69M
    SRE_CODE* overlap = NULL;
1702
5.69M
    int flags = 0;
1703
5.69M
    INIT_TRACE(state);
1704
1705
5.69M
    if (ptr > end)
1706
0
        return 0;
1707
1708
5.69M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
5.69M
        flags = pattern[2];
1713
1714
5.69M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
4.69k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
4.69k
                   end - ptr, (size_t) pattern[3]));
1717
4.69k
            return 0;
1718
4.69k
        }
1719
5.68M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.09M
            end -= pattern[3] - 1;
1723
2.09M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.09M
        }
1726
1727
5.68M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.09M
            prefix_len = pattern[5];
1731
2.09M
            prefix_skip = pattern[6];
1732
2.09M
            prefix = pattern + 7;
1733
2.09M
            overlap = prefix + prefix_len - 1;
1734
3.59M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.44M
            charset = pattern + 5;
1738
1739
5.68M
        pattern += 1 + pattern[1];
1740
5.68M
    }
1741
1742
5.68M
    TRACE(("prefix = %p %zd %zd\n",
1743
5.68M
           prefix, prefix_len, prefix_skip));
1744
5.68M
    TRACE(("charset = %p\n", charset));
1745
1746
5.68M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
1.97M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
1.97M
        end = (SRE_CHAR *)state->end;
1754
1.97M
        state->must_advance = 0;
1755
2.04M
        while (ptr < end) {
1756
24.1M
            while (*ptr != c) {
1757
22.1M
                if (++ptr >= end)
1758
4.47k
                    return 0;
1759
22.1M
            }
1760
2.03M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.03M
            state->start = ptr;
1762
2.03M
            state->ptr = ptr + prefix_skip;
1763
2.03M
            if (flags & SRE_INFO_LITERAL)
1764
1.70k
                return 1; /* we got all of it */
1765
2.03M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.03M
            if (status != 0)
1767
1.96M
                return status;
1768
67.6k
            ++ptr;
1769
67.6k
            RESET_CAPTURE_GROUP();
1770
67.6k
        }
1771
818
        return 0;
1772
1.97M
    }
1773
1774
3.71M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
122k
        Py_ssize_t i = 0;
1778
1779
122k
        end = (SRE_CHAR *)state->end;
1780
122k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
374k
        while (ptr < end) {
1788
374k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
6.63M
            while (*ptr++ != c) {
1790
6.26M
                if (ptr >= end)
1791
129
                    return 0;
1792
6.26M
            }
1793
373k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
373k
            i = 1;
1797
373k
            state->must_advance = 0;
1798
374k
            do {
1799
374k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
346k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
346k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
346k
                    state->start = ptr - (prefix_len - 1);
1808
346k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
346k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
346k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
346k
                    if (status != 0)
1813
122k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
224k
                    if (++ptr >= end)
1816
20
                        return 0;
1817
224k
                    RESET_CAPTURE_GROUP();
1818
224k
                }
1819
251k
                i = overlap[i];
1820
251k
            } while (i != 0);
1821
373k
        }
1822
0
        return 0;
1823
122k
    }
1824
1825
3.59M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.44M
        end = (SRE_CHAR *)state->end;
1828
3.44M
        state->must_advance = 0;
1829
3.98M
        for (;;) {
1830
68.6M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
64.6M
                ptr++;
1832
3.98M
            if (ptr >= end)
1833
49.5k
                return 0;
1834
3.93M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.93M
            state->start = ptr;
1836
3.93M
            state->ptr = ptr;
1837
3.93M
            status = SRE(match)(state, pattern, 0);
1838
3.93M
            if (status != 0)
1839
3.39M
                break;
1840
540k
            ptr++;
1841
540k
            RESET_CAPTURE_GROUP();
1842
540k
        }
1843
3.44M
    } else {
1844
        /* general case */
1845
144k
        assert(ptr <= end);
1846
144k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
144k
        state->start = state->ptr = ptr;
1848
144k
        status = SRE(match)(state, pattern, 1);
1849
144k
        state->must_advance = 0;
1850
144k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
13.8k
            (pattern[1] == SRE_AT_BEGINNING ||
1852
19
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
13.8k
        {
1854
13.8k
            state->start = state->ptr = ptr = end;
1855
13.8k
            return 0;
1856
13.8k
        }
1857
38.4M
        while (status == 0 && ptr < end) {
1858
38.3M
            ptr++;
1859
38.3M
            RESET_CAPTURE_GROUP();
1860
38.3M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
38.3M
            state->start = state->ptr = ptr;
1862
38.3M
            status = SRE(match)(state, pattern, 0);
1863
38.3M
        }
1864
130k
    }
1865
1866
3.52M
    return status;
1867
3.59M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/