Coverage Report

Created: 2025-11-09 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_sre/sre_lib.h
Line
Count
Source
1
/*
2
 * Secret Labs' Regular Expression Engine
3
 *
4
 * regular expression matching engine
5
 *
6
 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7
 *
8
 * See the sre.c file for information on usage and redistribution.
9
 */
10
11
/* String matching engine */
12
13
/* This file is included three times, with different character settings */
14
15
LOCAL(int)
16
SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at)
17
14.1M
{
18
    /* check if pointer is at given position */
19
20
14.1M
    Py_ssize_t thisp, thatp;
21
22
14.1M
    switch (at) {
23
24
6.52M
    case SRE_AT_BEGINNING:
25
6.52M
    case SRE_AT_BEGINNING_STRING:
26
6.52M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.65M
    case SRE_AT_END:
33
4.65M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
26.7k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.65M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
3.02M
    case SRE_AT_END_STRING:
42
3.02M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
14.1M
    }
87
88
0
    return 0;
89
14.1M
}
sre.c:sre_ucs1_at
Line
Count
Source
17
12.5M
{
18
    /* check if pointer is at given position */
19
20
12.5M
    Py_ssize_t thisp, thatp;
21
22
12.5M
    switch (at) {
23
24
6.48M
    case SRE_AT_BEGINNING:
25
6.48M
    case SRE_AT_BEGINNING_STRING:
26
6.48M
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
4.30M
    case SRE_AT_END:
33
4.30M
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
26.4k
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
4.30M
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
1.75M
    case SRE_AT_END_STRING:
42
1.75M
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
12.5M
    }
87
88
0
    return 0;
89
12.5M
}
sre.c:sre_ucs2_at
Line
Count
Source
17
990k
{
18
    /* check if pointer is at given position */
19
20
990k
    Py_ssize_t thisp, thatp;
21
22
990k
    switch (at) {
23
24
32.0k
    case SRE_AT_BEGINNING:
25
32.0k
    case SRE_AT_BEGINNING_STRING:
26
32.0k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
261k
    case SRE_AT_END:
33
261k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
86
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
261k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
697k
    case SRE_AT_END_STRING:
42
697k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
990k
    }
87
88
0
    return 0;
89
990k
}
sre.c:sre_ucs4_at
Line
Count
Source
17
660k
{
18
    /* check if pointer is at given position */
19
20
660k
    Py_ssize_t thisp, thatp;
21
22
660k
    switch (at) {
23
24
3.50k
    case SRE_AT_BEGINNING:
25
3.50k
    case SRE_AT_BEGINNING_STRING:
26
3.50k
        return ((void*) ptr == state->beginning);
27
28
0
    case SRE_AT_BEGINNING_LINE:
29
0
        return ((void*) ptr == state->beginning ||
30
0
                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32
81.5k
    case SRE_AT_END:
33
81.5k
        return (((SRE_CHAR *)state->end - ptr == 1 &&
34
218
                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35
81.5k
                ((void*) ptr == state->end));
36
37
0
    case SRE_AT_END_LINE:
38
0
        return ((void*) ptr == state->end ||
39
0
                SRE_IS_LINEBREAK((int) ptr[0]));
40
41
575k
    case SRE_AT_END_STRING:
42
575k
        return ((void*) ptr == state->end);
43
44
0
    case SRE_AT_BOUNDARY:
45
0
        thatp = ((void*) ptr > state->beginning) ?
46
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
47
0
        thisp = ((void*) ptr < state->end) ?
48
0
            SRE_IS_WORD((int) ptr[0]) : 0;
49
0
        return thisp != thatp;
50
51
0
    case SRE_AT_NON_BOUNDARY:
52
0
        thatp = ((void*) ptr > state->beginning) ?
53
0
            SRE_IS_WORD((int) ptr[-1]) : 0;
54
0
        thisp = ((void*) ptr < state->end) ?
55
0
            SRE_IS_WORD((int) ptr[0]) : 0;
56
0
        return thisp == thatp;
57
58
0
    case SRE_AT_LOC_BOUNDARY:
59
0
        thatp = ((void*) ptr > state->beginning) ?
60
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
61
0
        thisp = ((void*) ptr < state->end) ?
62
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
63
0
        return thisp != thatp;
64
65
0
    case SRE_AT_LOC_NON_BOUNDARY:
66
0
        thatp = ((void*) ptr > state->beginning) ?
67
0
            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
68
0
        thisp = ((void*) ptr < state->end) ?
69
0
            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
70
0
        return thisp == thatp;
71
72
0
    case SRE_AT_UNI_BOUNDARY:
73
0
        thatp = ((void*) ptr > state->beginning) ?
74
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
75
0
        thisp = ((void*) ptr < state->end) ?
76
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
77
0
        return thisp != thatp;
78
79
0
    case SRE_AT_UNI_NON_BOUNDARY:
80
0
        thatp = ((void*) ptr > state->beginning) ?
81
0
            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
82
0
        thisp = ((void*) ptr < state->end) ?
83
0
            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
84
0
        return thisp == thatp;
85
86
660k
    }
87
88
0
    return 0;
89
660k
}
90
91
LOCAL(int)
92
SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
93
1.39G
{
94
    /* check if character is a member of the given set */
95
96
1.39G
    int ok = 1;
97
98
3.18G
    for (;;) {
99
3.18G
        switch (*set++) {
100
101
925M
        case SRE_OP_FAILURE:
102
925M
            return !ok;
103
104
1.02G
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
1.02G
            if (ch == set[0])
107
4.75M
                return ok;
108
1.01G
            set++;
109
1.01G
            break;
110
111
12.5M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
12.5M
            if (sre_category(set[0], (int) ch))
114
8.74M
                return ok;
115
3.80M
            set++;
116
3.80M
            break;
117
118
586M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
586M
            if (ch < 256 &&
121
544M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
242M
                return ok;
123
344M
            set += 256/SRE_CODE_BITS;
124
344M
            break;
125
126
352M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
352M
            if (set[0] <= ch && ch <= set[1])
129
217M
                return ok;
130
134M
            set += 2;
131
134M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
283M
        case SRE_OP_NEGATE:
148
283M
            ok = !ok;
149
283M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
3.18G
        }
175
3.18G
    }
176
1.39G
}
sre.c:sre_ucs1_charset
Line
Count
Source
93
337M
{
94
    /* check if character is a member of the given set */
95
96
337M
    int ok = 1;
97
98
681M
    for (;;) {
99
681M
        switch (*set++) {
100
101
179M
        case SRE_OP_FAILURE:
102
179M
            return !ok;
103
104
184M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
184M
            if (ch == set[0])
107
2.42M
                return ok;
108
181M
            set++;
109
181M
            break;
110
111
11.9M
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
11.9M
            if (sre_category(set[0], (int) ch))
114
8.19M
                return ok;
115
3.79M
            set++;
116
3.79M
            break;
117
118
95.7M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
95.7M
            if (ch < 256 &&
121
95.7M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
46.4M
                return ok;
123
49.2M
            set += 256/SRE_CODE_BITS;
124
49.2M
            break;
125
126
168M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
168M
            if (set[0] <= ch && ch <= set[1])
129
100M
                return ok;
130
67.5M
            set += 2;
131
67.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
41.7M
        case SRE_OP_NEGATE:
148
41.7M
            ok = !ok;
149
41.7M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
681M
        }
175
681M
    }
176
337M
}
sre.c:sre_ucs2_charset
Line
Count
Source
93
623M
{
94
    /* check if character is a member of the given set */
95
96
623M
    int ok = 1;
97
98
1.50G
    for (;;) {
99
1.50G
        switch (*set++) {
100
101
450M
        case SRE_OP_FAILURE:
102
450M
            return !ok;
103
104
566M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
566M
            if (ch == set[0])
107
1.42M
                return ok;
108
565M
            set++;
109
565M
            break;
110
111
120k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
120k
            if (sre_category(set[0], (int) ch))
114
112k
                return ok;
115
8.13k
            set++;
116
8.13k
            break;
117
118
205M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
205M
            if (ch < 256 &&
121
191M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
67.2M
                return ok;
123
138M
            set += 256/SRE_CODE_BITS;
124
138M
            break;
125
126
162M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
162M
            if (set[0] <= ch && ch <= set[1])
129
104M
                return ok;
130
58.5M
            set += 2;
131
58.5M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
119M
        case SRE_OP_NEGATE:
148
119M
            ok = !ok;
149
119M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
1.50G
        }
175
1.50G
    }
176
623M
}
sre.c:sre_ucs4_charset
Line
Count
Source
93
437M
{
94
    /* check if character is a member of the given set */
95
96
437M
    int ok = 1;
97
98
993M
    for (;;) {
99
993M
        switch (*set++) {
100
101
295M
        case SRE_OP_FAILURE:
102
295M
            return !ok;
103
104
269M
        case SRE_OP_LITERAL:
105
            /* <LITERAL> <code> */
106
269M
            if (ch == set[0])
107
902k
                return ok;
108
268M
            set++;
109
268M
            break;
110
111
436k
        case SRE_OP_CATEGORY:
112
            /* <CATEGORY> <code> */
113
436k
            if (sre_category(set[0], (int) ch))
114
436k
                return ok;
115
522
            set++;
116
522
            break;
117
118
285M
        case SRE_OP_CHARSET:
119
            /* <CHARSET> <bitmap> */
120
285M
            if (ch < 256 &&
121
257M
                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
122
128M
                return ok;
123
156M
            set += 256/SRE_CODE_BITS;
124
156M
            break;
125
126
21.2M
        case SRE_OP_RANGE:
127
            /* <RANGE> <lower> <upper> */
128
21.2M
            if (set[0] <= ch && ch <= set[1])
129
12.5M
                return ok;
130
8.65M
            set += 2;
131
8.65M
            break;
132
133
0
        case SRE_OP_RANGE_UNI_IGNORE:
134
            /* <RANGE_UNI_IGNORE> <lower> <upper> */
135
0
        {
136
0
            SRE_CODE uch;
137
            /* ch is already lower cased */
138
0
            if (set[0] <= ch && ch <= set[1])
139
0
                return ok;
140
0
            uch = sre_upper_unicode(ch);
141
0
            if (set[0] <= uch && uch <= set[1])
142
0
                return ok;
143
0
            set += 2;
144
0
            break;
145
0
        }
146
147
121M
        case SRE_OP_NEGATE:
148
121M
            ok = !ok;
149
121M
            break;
150
151
0
        case SRE_OP_BIGCHARSET:
152
            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
153
0
        {
154
0
            Py_ssize_t count, block;
155
0
            count = *(set++);
156
157
0
            if (ch < 0x10000u)
158
0
                block = ((unsigned char*)set)[ch >> 8];
159
0
            else
160
0
                block = -1;
161
0
            set += 256/sizeof(SRE_CODE);
162
0
            if (block >=0 &&
163
0
                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
164
0
                    (1u << (ch & (SRE_CODE_BITS-1)))))
165
0
                return ok;
166
0
            set += count * (256/SRE_CODE_BITS);
167
0
            break;
168
0
        }
169
170
0
        default:
171
            /* internal error -- there's not much we can do about it
172
               here, so let's just pretend it didn't match... */
173
0
            return 0;
174
993M
        }
175
993M
    }
176
437M
}
177
178
LOCAL(int)
179
SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch)
180
0
{
181
0
    SRE_CODE lo, up;
182
0
    lo = sre_lower_locale(ch);
183
0
    if (SRE(charset)(state, set, lo))
184
0
       return 1;
185
186
0
    up = sre_upper_locale(ch);
187
0
    return up != lo && SRE(charset)(state, set, up);
188
0
}
Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore
Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore
189
190
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel);
191
192
LOCAL(Py_ssize_t)
193
SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount)
194
518M
{
195
518M
    SRE_CODE chr;
196
518M
    SRE_CHAR c;
197
518M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
518M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
518M
    Py_ssize_t i;
200
518M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
518M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
18.6M
        end = ptr + maxcount;
205
206
518M
    switch (pattern[0]) {
207
208
436M
    case SRE_OP_IN:
209
        /* repeated set */
210
436M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
806M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
370M
            ptr++;
213
436M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
75.9M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
75.9M
        chr = pattern[1];
232
75.9M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
75.9M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
71.7M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
71.7M
        else
238
71.7M
#endif
239
82.0M
        while (ptr < end && *ptr == c)
240
6.14M
            ptr++;
241
75.9M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
5.99M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
5.99M
        chr = pattern[1];
270
5.99M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
5.99M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
3.20M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
3.20M
        else
276
3.20M
#endif
277
55.4M
        while (ptr < end && *ptr != c)
278
49.4M
            ptr++;
279
5.99M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
518M
    }
319
320
518M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
518M
           ptr - (SRE_CHAR*) state->ptr));
322
518M
    return ptr - (SRE_CHAR*) state->ptr;
323
518M
}
sre.c:sre_ucs1_count
Line
Count
Source
194
157M
{
195
157M
    SRE_CODE chr;
196
157M
    SRE_CHAR c;
197
157M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
157M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
157M
    Py_ssize_t i;
200
157M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
157M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
4.03M
        end = ptr + maxcount;
205
206
157M
    switch (pattern[0]) {
207
208
93.4M
    case SRE_OP_IN:
209
        /* repeated set */
210
93.4M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
213M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
120M
            ptr++;
213
93.4M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
63.0M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
63.0M
        chr = pattern[1];
232
63.0M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
63.0M
        c = (SRE_CHAR) chr;
234
63.0M
#if SIZEOF_SRE_CHAR < 4
235
63.0M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
63.0M
        else
238
63.0M
#endif
239
65.0M
        while (ptr < end && *ptr == c)
240
1.91M
            ptr++;
241
63.0M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
942k
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
942k
        chr = pattern[1];
270
942k
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
942k
        c = (SRE_CHAR) chr;
272
942k
#if SIZEOF_SRE_CHAR < 4
273
942k
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
942k
        else
276
942k
#endif
277
12.9M
        while (ptr < end && *ptr != c)
278
11.9M
            ptr++;
279
942k
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
157M
    }
319
320
157M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
157M
           ptr - (SRE_CHAR*) state->ptr));
322
157M
    return ptr - (SRE_CHAR*) state->ptr;
323
157M
}
sre.c:sre_ucs2_count
Line
Count
Source
194
210M
{
195
210M
    SRE_CODE chr;
196
210M
    SRE_CHAR c;
197
210M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
210M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
210M
    Py_ssize_t i;
200
210M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
210M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
6.81M
        end = ptr + maxcount;
205
206
210M
    switch (pattern[0]) {
207
208
199M
    case SRE_OP_IN:
209
        /* repeated set */
210
199M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
327M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
127M
            ptr++;
213
199M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
8.68M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
8.68M
        chr = pattern[1];
232
8.68M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
8.68M
        c = (SRE_CHAR) chr;
234
8.68M
#if SIZEOF_SRE_CHAR < 4
235
8.68M
        if ((SRE_CODE) c != chr)
236
0
            ; /* literal can't match: doesn't fit in char width */
237
8.68M
        else
238
8.68M
#endif
239
12.2M
        while (ptr < end && *ptr == c)
240
3.55M
            ptr++;
241
8.68M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.26M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.26M
        chr = pattern[1];
270
2.26M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.26M
        c = (SRE_CHAR) chr;
272
2.26M
#if SIZEOF_SRE_CHAR < 4
273
2.26M
        if ((SRE_CODE) c != chr)
274
0
            ptr = end; /* literal can't match: doesn't fit in char width */
275
2.26M
        else
276
2.26M
#endif
277
18.4M
        while (ptr < end && *ptr != c)
278
16.1M
            ptr++;
279
2.26M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
210M
    }
319
320
210M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
210M
           ptr - (SRE_CHAR*) state->ptr));
322
210M
    return ptr - (SRE_CHAR*) state->ptr;
323
210M
}
sre.c:sre_ucs4_count
Line
Count
Source
194
149M
{
195
149M
    SRE_CODE chr;
196
149M
    SRE_CHAR c;
197
149M
    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr;
198
149M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
199
149M
    Py_ssize_t i;
200
149M
    INIT_TRACE(state);
201
202
    /* adjust end */
203
149M
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
204
7.81M
        end = ptr + maxcount;
205
206
149M
    switch (pattern[0]) {
207
208
143M
    case SRE_OP_IN:
209
        /* repeated set */
210
143M
        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
211
265M
        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
212
122M
            ptr++;
213
143M
        break;
214
215
0
    case SRE_OP_ANY:
216
        /* repeated dot wildcard. */
217
0
        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
218
0
        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
219
0
            ptr++;
220
0
        break;
221
222
0
    case SRE_OP_ANY_ALL:
223
        /* repeated dot wildcard.  skip to the end of the target
224
           string, and backtrack from there */
225
0
        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
226
0
        ptr = end;
227
0
        break;
228
229
4.14M
    case SRE_OP_LITERAL:
230
        /* repeated literal */
231
4.14M
        chr = pattern[1];
232
4.14M
        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
233
4.14M
        c = (SRE_CHAR) chr;
234
#if SIZEOF_SRE_CHAR < 4
235
        if ((SRE_CODE) c != chr)
236
            ; /* literal can't match: doesn't fit in char width */
237
        else
238
#endif
239
4.81M
        while (ptr < end && *ptr == c)
240
670k
            ptr++;
241
4.14M
        break;
242
243
0
    case SRE_OP_LITERAL_IGNORE:
244
        /* repeated literal */
245
0
        chr = pattern[1];
246
0
        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
247
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
248
0
            ptr++;
249
0
        break;
250
251
0
    case SRE_OP_LITERAL_UNI_IGNORE:
252
        /* repeated literal */
253
0
        chr = pattern[1];
254
0
        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
255
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
256
0
            ptr++;
257
0
        break;
258
259
0
    case SRE_OP_LITERAL_LOC_IGNORE:
260
        /* repeated literal */
261
0
        chr = pattern[1];
262
0
        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
263
0
        while (ptr < end && char_loc_ignore(chr, *ptr))
264
0
            ptr++;
265
0
        break;
266
267
2.78M
    case SRE_OP_NOT_LITERAL:
268
        /* repeated non-literal */
269
2.78M
        chr = pattern[1];
270
2.78M
        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
271
2.78M
        c = (SRE_CHAR) chr;
272
#if SIZEOF_SRE_CHAR < 4
273
        if ((SRE_CODE) c != chr)
274
            ptr = end; /* literal can't match: doesn't fit in char width */
275
        else
276
#endif
277
24.1M
        while (ptr < end && *ptr != c)
278
21.3M
            ptr++;
279
2.78M
        break;
280
281
0
    case SRE_OP_NOT_LITERAL_IGNORE:
282
        /* repeated non-literal */
283
0
        chr = pattern[1];
284
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
285
0
        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
286
0
            ptr++;
287
0
        break;
288
289
0
    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
290
        /* repeated non-literal */
291
0
        chr = pattern[1];
292
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
293
0
        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
294
0
            ptr++;
295
0
        break;
296
297
0
    case SRE_OP_NOT_LITERAL_LOC_IGNORE:
298
        /* repeated non-literal */
299
0
        chr = pattern[1];
300
0
        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
301
0
        while (ptr < end && !char_loc_ignore(chr, *ptr))
302
0
            ptr++;
303
0
        break;
304
305
0
    default:
306
        /* repeated single character pattern */
307
0
        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
308
0
        while ((SRE_CHAR*) state->ptr < end) {
309
0
            i = SRE(match)(state, pattern, 0);
310
0
            if (i < 0)
311
0
                return i;
312
0
            if (!i)
313
0
                break;
314
0
        }
315
0
        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
316
0
               (SRE_CHAR*) state->ptr - ptr));
317
0
        return (SRE_CHAR*) state->ptr - ptr;
318
149M
    }
319
320
149M
    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr,
321
149M
           ptr - (SRE_CHAR*) state->ptr));
322
149M
    return ptr - (SRE_CHAR*) state->ptr;
323
149M
}
324
325
/* The macros below should be used to protect recursive SRE(match)()
326
 * calls that *failed* and do *not* return immediately (IOW, those
327
 * that will backtrack). Explaining:
328
 *
329
 * - Recursive SRE(match)() returned true: that's usually a success
330
 *   (besides atypical cases like ASSERT_NOT), therefore there's no
331
 *   reason to restore lastmark;
332
 *
333
 * - Recursive SRE(match)() returned false but the current SRE(match)()
334
 *   is returning to the caller: If the current SRE(match)() is the
335
 *   top function of the recursion, returning false will be a matching
336
 *   failure, and it doesn't matter where lastmark is pointing to.
337
 *   If it's *not* the top function, it will be a recursive SRE(match)()
338
 *   failure by itself, and the calling SRE(match)() will have to deal
339
 *   with the failure by the same rules explained here (it will restore
340
 *   lastmark by itself if necessary);
341
 *
342
 * - Recursive SRE(match)() returned false, and will continue the
343
 *   outside 'for' loop: must be protected when breaking, since the next
344
 *   OP could potentially depend on lastmark;
345
 *
346
 * - Recursive SRE(match)() returned false, and will be called again
347
 *   inside a local for/while loop: must be protected between each
348
 *   loop iteration, since the recursive SRE(match)() could do anything,
349
 *   and could potentially depend on lastmark.
350
 *
351
 * For more information, check the discussion at SF patch #712900.
352
 */
353
#define LASTMARK_SAVE()     \
354
441M
    do { \
355
441M
        ctx->lastmark = state->lastmark; \
356
441M
        ctx->lastindex = state->lastindex; \
357
441M
    } while (0)
358
#define LASTMARK_RESTORE()  \
359
159M
    do { \
360
159M
        state->lastmark = ctx->lastmark; \
361
159M
        state->lastindex = ctx->lastindex; \
362
159M
    } while (0)
363
364
#define LAST_PTR_PUSH()     \
365
128M
    do { \
366
128M
        TRACE(("push last_ptr: %zd", \
367
128M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
368
128M
        DATA_PUSH(&ctx->u.rep->last_ptr); \
369
128M
    } while (0)
370
#define LAST_PTR_POP()  \
371
128M
    do { \
372
128M
        DATA_POP(&ctx->u.rep->last_ptr); \
373
128M
        TRACE(("pop last_ptr: %zd", \
374
128M
                PTR_TO_INDEX(ctx->u.rep->last_ptr))); \
375
128M
    } while (0)
376
377
0
#define RETURN_ERROR(i) do { return i; } while(0)
378
509M
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
379
578M
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
380
381
#define RETURN_ON_ERROR(i) \
382
972M
    do { if (i < 0) RETURN_ERROR(i); } while (0)
383
#define RETURN_ON_SUCCESS(i) \
384
60.3M
    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
385
#define RETURN_ON_FAILURE(i) \
386
42.0M
    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
387
388
1.08G
#define DATA_STACK_ALLOC(state, type, ptr) \
389
1.08G
do { \
390
1.08G
    alloc_pos = state->data_stack_base; \
391
1.08G
    TRACE(("allocating %s in %zd (%zd)\n", \
392
1.08G
           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
393
1.08G
    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
394
150M
        int j = data_stack_grow(state, sizeof(type)); \
395
150M
        if (j < 0) return j; \
396
150M
        if (ctx_pos != -1) \
397
150M
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
398
150M
    } \
399
1.08G
    ptr = (type*)(state->data_stack+alloc_pos); \
400
1.08G
    state->data_stack_base += sizeof(type); \
401
1.08G
} while (0)
402
403
1.13G
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
404
1.13G
do { \
405
1.13G
    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \
406
1.13G
    ptr = (type*)(state->data_stack+pos); \
407
1.13G
} while (0)
408
409
338M
#define DATA_STACK_PUSH(state, data, size) \
410
338M
do { \
411
338M
    TRACE(("copy data in %p to %zd (%zd)\n", \
412
338M
           data, state->data_stack_base, size)); \
413
338M
    if (size > state->data_stack_size - state->data_stack_base) { \
414
77.1k
        int j = data_stack_grow(state, size); \
415
77.1k
        if (j < 0) return j; \
416
77.1k
        if (ctx_pos != -1) \
417
77.1k
            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
418
77.1k
    } \
419
338M
    memcpy(state->data_stack+state->data_stack_base, data, size); \
420
338M
    state->data_stack_base += size; \
421
338M
} while (0)
422
423
/* We add an explicit cast to memcpy here because MSVC has a bug when
424
   compiling C code where it believes that `const void**` cannot be
425
   safely casted to `void*`, see bpo-39943 for details. */
426
184M
#define DATA_STACK_POP(state, data, size, discard) \
427
184M
do { \
428
184M
    TRACE(("copy data to %p from %zd (%zd)\n", \
429
184M
           data, state->data_stack_base-size, size)); \
430
184M
    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \
431
184M
    if (discard) \
432
184M
        state->data_stack_base -= size; \
433
184M
} while (0)
434
435
1.24G
#define DATA_STACK_POP_DISCARD(state, size) \
436
1.24G
do { \
437
1.24G
    TRACE(("discard data from %zd (%zd)\n", \
438
1.24G
           state->data_stack_base-size, size)); \
439
1.24G
    state->data_stack_base -= size; \
440
1.24G
} while(0)
441
442
#define DATA_PUSH(x) \
443
128M
    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
444
#define DATA_POP(x) \
445
128M
    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
446
#define DATA_POP_DISCARD(x) \
447
1.08G
    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
448
#define DATA_ALLOC(t,p) \
449
1.08G
    DATA_STACK_ALLOC(state, t, p)
450
#define DATA_LOOKUP_AT(t,p,pos) \
451
1.13G
    DATA_STACK_LOOKUP_AT(state,t,p,pos)
452
453
#define PTR_TO_INDEX(ptr) \
454
    ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1)
455
456
#if VERBOSE
457
#  define MARK_TRACE(label, lastmark) \
458
    do if (DO_TRACE) { \
459
        TRACE(("%s %d marks:", (label), (lastmark)+1)); \
460
        for (int j = 0; j <= (lastmark); j++) { \
461
            if (j && (j & 1) == 0) { \
462
                TRACE((" ")); \
463
            } \
464
            TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \
465
        } \
466
        TRACE(("\n")); \
467
    } while (0)
468
#else
469
#  define MARK_TRACE(label, lastmark)
470
#endif
471
#define MARK_PUSH(lastmark) \
472
327M
    do if (lastmark >= 0) { \
473
210M
        MARK_TRACE("push", (lastmark)); \
474
210M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
475
210M
        DATA_STACK_PUSH(state, state->mark, _marks_size); \
476
327M
    } while (0)
477
#define MARK_POP(lastmark) \
478
73.4M
    do if (lastmark >= 0) { \
479
53.5M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
480
53.5M
        DATA_STACK_POP(state, state->mark, _marks_size, 1); \
481
53.5M
        MARK_TRACE("pop", (lastmark)); \
482
73.4M
    } while (0)
483
#define MARK_POP_KEEP(lastmark) \
484
1.89M
    do if (lastmark >= 0) { \
485
1.89M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
486
1.89M
        DATA_STACK_POP(state, state->mark, _marks_size, 0); \
487
1.89M
        MARK_TRACE("pop keep", (lastmark)); \
488
1.89M
    } while (0)
489
#define MARK_POP_DISCARD(lastmark) \
490
254M
    do if (lastmark >= 0) { \
491
156M
        size_t _marks_size = (lastmark+1) * sizeof(void*); \
492
156M
        DATA_STACK_POP_DISCARD(state, _marks_size); \
493
156M
        MARK_TRACE("pop discard", (lastmark)); \
494
254M
    } while (0)
495
496
473M
#define JUMP_NONE            0
497
0
#define JUMP_MAX_UNTIL_1     1
498
128M
#define JUMP_MAX_UNTIL_2     2
499
60.3M
#define JUMP_MAX_UNTIL_3     3
500
0
#define JUMP_MIN_UNTIL_1     4
501
0
#define JUMP_MIN_UNTIL_2     5
502
0
#define JUMP_MIN_UNTIL_3     6
503
59.3M
#define JUMP_REPEAT          7
504
16.0M
#define JUMP_REPEAT_ONE_1    8
505
116M
#define JUMP_REPEAT_ONE_2    9
506
0
#define JUMP_MIN_REPEAT_ONE  10
507
164M
#define JUMP_BRANCH          11
508
42.0M
#define JUMP_ASSERT          12
509
25.8M
#define JUMP_ASSERT_NOT      13
510
0
#define JUMP_POSS_REPEAT_1   14
511
0
#define JUMP_POSS_REPEAT_2   15
512
0
#define JUMP_ATOMIC_GROUP    16
513
514
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
515
613M
    ctx->pattern = pattern; \
516
613M
    ctx->ptr = ptr; \
517
613M
    DATA_ALLOC(SRE(match_context), nextctx); \
518
613M
    nextctx->pattern = nextpattern; \
519
613M
    nextctx->toplevel = toplevel_; \
520
613M
    nextctx->jump = jumpvalue; \
521
613M
    nextctx->last_ctx_pos = ctx_pos; \
522
613M
    pattern = nextpattern; \
523
613M
    ctx_pos = alloc_pos; \
524
613M
    ctx = nextctx; \
525
613M
    goto entrance; \
526
613M
    jumplabel: \
527
613M
    pattern = ctx->pattern; \
528
613M
    ptr = ctx->ptr;
529
530
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
531
545M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
532
533
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
534
67.9M
    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
535
536
typedef struct {
537
    Py_ssize_t count;
538
    union {
539
        SRE_CODE chr;
540
        SRE_REPEAT* rep;
541
    } u;
542
    int lastmark;
543
    int lastindex;
544
    const SRE_CODE* pattern;
545
    const SRE_CHAR* ptr;
546
    int toplevel;
547
    int jump;
548
    Py_ssize_t last_ctx_pos;
549
} SRE(match_context);
550
551
#define _MAYBE_CHECK_SIGNALS                                       \
552
1.96G
    do {                                                           \
553
1.96G
        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \
554
0
            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \
555
0
        }                                                          \
556
1.96G
    } while (0)
557
558
#ifdef Py_DEBUG
559
# define MAYBE_CHECK_SIGNALS                                       \
560
    do {                                                           \
561
        _MAYBE_CHECK_SIGNALS;                                      \
562
        if (state->fail_after_count >= 0) {                        \
563
            if (state->fail_after_count-- == 0) {                  \
564
                PyErr_SetNone(state->fail_after_exc);              \
565
                RETURN_ERROR(SRE_ERROR_INTERRUPTED);               \
566
            }                                                      \
567
        }                                                          \
568
    } while (0)
569
#else
570
1.96G
# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS
571
#endif /* Py_DEBUG */
572
573
#ifdef HAVE_COMPUTED_GOTOS
574
    #ifndef USE_COMPUTED_GOTOS
575
    #define USE_COMPUTED_GOTOS 1
576
    #endif
577
#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
578
    #error "Computed gotos are not supported on this compiler."
579
#else
580
    #undef USE_COMPUTED_GOTOS
581
    #define USE_COMPUTED_GOTOS 0
582
#endif
583
584
#if USE_COMPUTED_GOTOS
585
2.04G
    #define TARGET(OP) TARGET_ ## OP
586
    #define DISPATCH                       \
587
1.96G
        do {                               \
588
1.96G
            MAYBE_CHECK_SIGNALS;           \
589
1.96G
            goto *sre_targets[*pattern++]; \
590
1.96G
        } while (0)
591
#else
592
    #define TARGET(OP) case OP
593
    #define DISPATCH goto dispatch
594
#endif
595
596
/* check if string matches the given pattern.  returns <0 for
597
   error, 0 for failure, and 1 for success */
598
LOCAL(Py_ssize_t)
599
SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
600
473M
{
601
473M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
473M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
473M
    Py_ssize_t ret = 0;
604
473M
    int jump;
605
473M
    unsigned int sigcount = state->sigcount;
606
607
473M
    SRE(match_context)* ctx;
608
473M
    SRE(match_context)* nextctx;
609
473M
    INIT_TRACE(state);
610
611
473M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
473M
    DATA_ALLOC(SRE(match_context), ctx);
614
473M
    ctx->last_ctx_pos = -1;
615
473M
    ctx->jump = JUMP_NONE;
616
473M
    ctx->toplevel = toplevel;
617
473M
    ctx_pos = alloc_pos;
618
619
473M
#if USE_COMPUTED_GOTOS
620
473M
#include "sre_targets.h"
621
473M
#endif
622
623
1.08G
entrance:
624
625
1.08G
    ;  // Fashion statement.
626
1.08G
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
1.08G
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
65.0M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2.90M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2.90M
                   end - ptr, (size_t) pattern[3]));
634
2.90M
            RETURN_FAILURE;
635
2.90M
        }
636
62.1M
        pattern += pattern[1] + 1;
637
62.1M
    }
638
639
1.08G
#if USE_COMPUTED_GOTOS
640
1.08G
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
1.08G
    {
647
648
1.08G
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
444M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
444M
                   ptr, pattern[0]));
653
444M
            {
654
444M
                int i = pattern[0];
655
444M
                if (i & 1)
656
54.9M
                    state->lastindex = i/2 + 1;
657
444M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
436M
                    int j = state->lastmark + 1;
663
444M
                    while (j < i)
664
7.42M
                        state->mark[j++] = NULL;
665
436M
                    state->lastmark = i;
666
436M
                }
667
444M
                state->mark[i] = ptr;
668
444M
            }
669
444M
            pattern++;
670
444M
            DISPATCH;
671
672
444M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
133M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
133M
                   ptr, *pattern));
677
133M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
40.9M
                RETURN_FAILURE;
679
92.7M
            pattern++;
680
92.7M
            ptr++;
681
92.7M
            DISPATCH;
682
683
92.7M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
167M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
167M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
167M
            if (ctx->toplevel &&
698
49.2M
                ((state->match_all && ptr != state->end) ||
699
49.2M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
167M
            state->ptr = ptr;
704
167M
            RETURN_SUCCESS;
705
706
14.1M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
14.1M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
14.1M
            if (!SRE(at)(state, ptr, *pattern))
711
4.05M
                RETURN_FAILURE;
712
10.1M
            pattern++;
713
10.1M
            DISPATCH;
714
715
10.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
192M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
192M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
192M
            if (ptr >= end ||
749
192M
                !SRE(charset)(state, pattern + 1, *ptr))
750
14.0M
                RETURN_FAILURE;
751
178M
            pattern += pattern[0];
752
178M
            ptr++;
753
178M
            DISPATCH;
754
755
178M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
8.26M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
8.26M
                   pattern, ptr, pattern[0]));
758
8.26M
            if (ptr >= end ||
759
8.26M
                sre_lower_ascii(*ptr) != *pattern)
760
238k
                RETURN_FAILURE;
761
8.02M
            pattern++;
762
8.02M
            ptr++;
763
8.02M
            DISPATCH;
764
765
8.02M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
84.8M
        TARGET(SRE_OP_JUMP):
845
84.8M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
84.8M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
84.8M
                   ptr, pattern[0]));
850
84.8M
            pattern += pattern[0];
851
84.8M
            DISPATCH;
852
853
130M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
130M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
130M
            LASTMARK_SAVE();
858
130M
            if (state->repeat)
859
76.8M
                MARK_PUSH(ctx->lastmark);
860
316M
            for (; pattern[0]; pattern += pattern[0]) {
861
268M
                if (pattern[1] == SRE_OP_LITERAL &&
862
125M
                    (ptr >= end ||
863
125M
                     (SRE_CODE) *ptr != pattern[2]))
864
61.2M
                    continue;
865
206M
                if (pattern[1] == SRE_OP_IN &&
866
69.6M
                    (ptr >= end ||
867
69.6M
                     !SRE(charset)(state, pattern + 3,
868
69.6M
                                   (SRE_CODE) *ptr)))
869
42.6M
                    continue;
870
164M
                state->ptr = ptr;
871
164M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
164M
                if (ret) {
873
82.2M
                    if (state->repeat)
874
58.7M
                        MARK_POP_DISCARD(ctx->lastmark);
875
82.2M
                    RETURN_ON_ERROR(ret);
876
82.2M
                    RETURN_SUCCESS;
877
82.2M
                }
878
82.0M
                if (state->repeat)
879
26.6k
                    MARK_POP_KEEP(ctx->lastmark);
880
82.0M
                LASTMARK_RESTORE();
881
82.0M
            }
882
47.8M
            if (state->repeat)
883
18.1M
                MARK_POP_DISCARD(ctx->lastmark);
884
47.8M
            RETURN_FAILURE;
885
886
519M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
519M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
519M
                   pattern[1], pattern[2]));
898
899
519M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.34M
                RETURN_FAILURE; /* cannot match */
901
902
518M
            state->ptr = ptr;
903
904
518M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
518M
            RETURN_ON_ERROR(ret);
906
518M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
518M
            ctx->count = ret;
908
518M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
518M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
361M
                RETURN_FAILURE;
917
918
156M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
7.47M
                ptr == state->end &&
920
79.5k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
79.5k
            {
922
                /* tail is empty.  we're finished */
923
79.5k
                state->ptr = ptr;
924
79.5k
                RETURN_SUCCESS;
925
79.5k
            }
926
927
156M
            LASTMARK_SAVE();
928
156M
            if (state->repeat)
929
96.5M
                MARK_PUSH(ctx->lastmark);
930
931
156M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
42.0M
                ctx->u.chr = pattern[pattern[0]+1];
935
42.0M
                for (;;) {
936
98.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
72.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
56.6M
                        ptr--;
939
56.6M
                        ctx->count--;
940
56.6M
                    }
941
42.0M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
25.9M
                        break;
943
16.0M
                    state->ptr = ptr;
944
16.0M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
16.0M
                            pattern+pattern[0]);
946
16.0M
                    if (ret) {
947
16.0M
                        if (state->repeat)
948
14.8M
                            MARK_POP_DISCARD(ctx->lastmark);
949
16.0M
                        RETURN_ON_ERROR(ret);
950
16.0M
                        RETURN_SUCCESS;
951
16.0M
                    }
952
628
                    if (state->repeat)
953
628
                        MARK_POP_KEEP(ctx->lastmark);
954
628
                    LASTMARK_RESTORE();
955
956
628
                    ptr--;
957
628
                    ctx->count--;
958
628
                }
959
25.9M
                if (state->repeat)
960
24.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
114M
            } else {
962
                /* general case */
963
117M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
116M
                    state->ptr = ptr;
965
116M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
116M
                            pattern+pattern[0]);
967
116M
                    if (ret) {
968
113M
                        if (state->repeat)
969
56.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
113M
                        RETURN_ON_ERROR(ret);
971
113M
                        RETURN_SUCCESS;
972
113M
                    }
973
3.49M
                    if (state->repeat)
974
1.86M
                        MARK_POP_KEEP(ctx->lastmark);
975
3.49M
                    LASTMARK_RESTORE();
976
977
3.49M
                    ptr--;
978
3.49M
                    ctx->count--;
979
3.49M
                }
980
1.20M
                if (state->repeat)
981
984k
                    MARK_POP_DISCARD(ctx->lastmark);
982
1.20M
            }
983
27.1M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
59.3M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
59.3M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
59.3M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
59.3M
            ctx->u.rep = repeat_pool_malloc(state);
1127
59.3M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
59.3M
            ctx->u.rep->count = -1;
1131
59.3M
            ctx->u.rep->pattern = pattern;
1132
59.3M
            ctx->u.rep->prev = state->repeat;
1133
59.3M
            ctx->u.rep->last_ptr = NULL;
1134
59.3M
            state->repeat = ctx->u.rep;
1135
1136
59.3M
            state->ptr = ptr;
1137
59.3M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
59.3M
            state->repeat = ctx->u.rep->prev;
1139
59.3M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
59.3M
            if (ret) {
1142
59.2M
                RETURN_ON_ERROR(ret);
1143
59.2M
                RETURN_SUCCESS;
1144
59.2M
            }
1145
107k
            RETURN_FAILURE;
1146
1147
141M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
141M
            ctx->u.rep = state->repeat;
1155
141M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
141M
            state->ptr = ptr;
1159
1160
141M
            ctx->count = ctx->u.rep->count+1;
1161
1162
141M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
141M
                   ptr, ctx->count));
1164
1165
141M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
141M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
12.7M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
128M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
128M
                ctx->u.rep->count = ctx->count;
1185
128M
                LASTMARK_SAVE();
1186
128M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
128M
                LAST_PTR_PUSH();
1189
128M
                ctx->u.rep->last_ptr = state->ptr;
1190
128M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
128M
                        ctx->u.rep->pattern+3);
1192
128M
                LAST_PTR_POP();
1193
128M
                if (ret) {
1194
80.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
80.9M
                    RETURN_ON_ERROR(ret);
1196
80.9M
                    RETURN_SUCCESS;
1197
80.9M
                }
1198
47.6M
                MARK_POP(ctx->lastmark);
1199
47.6M
                LASTMARK_RESTORE();
1200
47.6M
                ctx->u.rep->count = ctx->count-1;
1201
47.6M
                state->ptr = ptr;
1202
47.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
60.3M
            state->repeat = ctx->u.rep->prev;
1207
60.3M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
60.3M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
60.3M
            RETURN_ON_SUCCESS(ret);
1211
1.04M
            state->ptr = ptr;
1212
1.04M
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
42.0M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
42.0M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
42.0M
                   ptr, pattern[1]));
1565
42.0M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
42.0M
            state->ptr = ptr - pattern[1];
1568
42.0M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
42.0M
            RETURN_ON_FAILURE(ret);
1570
34.3M
            pattern += pattern[0];
1571
34.3M
            DISPATCH;
1572
1573
34.3M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
25.8M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
25.8M
                   ptr, pattern[1]));
1578
25.8M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
25.8M
                state->ptr = ptr - pattern[1];
1580
25.8M
                LASTMARK_SAVE();
1581
25.8M
                if (state->repeat)
1582
25.8M
                    MARK_PUSH(ctx->lastmark);
1583
1584
51.7M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
51.7M
                if (ret) {
1586
19.5k
                    if (state->repeat)
1587
19.5k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
19.5k
                    RETURN_ON_ERROR(ret);
1589
19.5k
                    RETURN_FAILURE;
1590
19.5k
                }
1591
25.8M
                if (state->repeat)
1592
25.8M
                    MARK_POP(ctx->lastmark);
1593
25.8M
                LASTMARK_RESTORE();
1594
25.8M
            }
1595
25.8M
            pattern += pattern[0];
1596
25.8M
            DISPATCH;
1597
1598
25.8M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
1.08G
exit:
1620
1.08G
    ctx_pos = ctx->last_ctx_pos;
1621
1.08G
    jump = ctx->jump;
1622
1.08G
    DATA_POP_DISCARD(ctx);
1623
1.08G
    if (ctx_pos == -1) {
1624
473M
        state->sigcount = sigcount;
1625
473M
        return ret;
1626
473M
    }
1627
613M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
613M
    switch (jump) {
1630
128M
        case JUMP_MAX_UNTIL_2:
1631
128M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
128M
            goto jump_max_until_2;
1633
60.3M
        case JUMP_MAX_UNTIL_3:
1634
60.3M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
60.3M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
164M
        case JUMP_BRANCH:
1643
164M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
164M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
59.3M
        case JUMP_REPEAT:
1658
59.3M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
59.3M
            goto jump_repeat;
1660
16.0M
        case JUMP_REPEAT_ONE_1:
1661
16.0M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
16.0M
            goto jump_repeat_one_1;
1663
116M
        case JUMP_REPEAT_ONE_2:
1664
116M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
116M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
42.0M
        case JUMP_ASSERT:
1673
42.0M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
42.0M
            goto jump_assert;
1675
25.8M
        case JUMP_ASSERT_NOT:
1676
25.8M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
25.8M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
613M
    }
1683
1684
0
    return ret; /* should never get here */
1685
613M
}
sre.c:sre_ucs1_match
Line
Count
Source
600
148M
{
601
148M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
148M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
148M
    Py_ssize_t ret = 0;
604
148M
    int jump;
605
148M
    unsigned int sigcount = state->sigcount;
606
607
148M
    SRE(match_context)* ctx;
608
148M
    SRE(match_context)* nextctx;
609
148M
    INIT_TRACE(state);
610
611
148M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
148M
    DATA_ALLOC(SRE(match_context), ctx);
614
148M
    ctx->last_ctx_pos = -1;
615
148M
    ctx->jump = JUMP_NONE;
616
148M
    ctx->toplevel = toplevel;
617
148M
    ctx_pos = alloc_pos;
618
619
148M
#if USE_COMPUTED_GOTOS
620
148M
#include "sre_targets.h"
621
148M
#endif
622
623
331M
entrance:
624
625
331M
    ;  // Fashion statement.
626
331M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
331M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
30.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
2.90M
            TRACE(("reject (got %tu chars, need %zu)\n",
633
2.90M
                   end - ptr, (size_t) pattern[3]));
634
2.90M
            RETURN_FAILURE;
635
2.90M
        }
636
27.7M
        pattern += pattern[1] + 1;
637
27.7M
    }
638
639
328M
#if USE_COMPUTED_GOTOS
640
328M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
328M
    {
647
648
328M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
129M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
129M
                   ptr, pattern[0]));
653
129M
            {
654
129M
                int i = pattern[0];
655
129M
                if (i & 1)
656
21.2M
                    state->lastindex = i/2 + 1;
657
129M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
125M
                    int j = state->lastmark + 1;
663
129M
                    while (j < i)
664
3.92M
                        state->mark[j++] = NULL;
665
125M
                    state->lastmark = i;
666
125M
                }
667
129M
                state->mark[i] = ptr;
668
129M
            }
669
129M
            pattern++;
670
129M
            DISPATCH;
671
672
129M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
68.5M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
68.5M
                   ptr, *pattern));
677
68.5M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
22.1M
                RETURN_FAILURE;
679
46.3M
            pattern++;
680
46.3M
            ptr++;
681
46.3M
            DISPATCH;
682
683
46.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
53.3M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
53.3M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
53.3M
            if (ctx->toplevel &&
698
21.0M
                ((state->match_all && ptr != state->end) ||
699
21.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
53.3M
            state->ptr = ptr;
704
53.3M
            RETURN_SUCCESS;
705
706
12.5M
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
12.5M
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
12.5M
            if (!SRE(at)(state, ptr, *pattern))
711
2.44M
                RETURN_FAILURE;
712
10.1M
            pattern++;
713
10.1M
            DISPATCH;
714
715
10.1M
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
37.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
37.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
37.2M
            if (ptr >= end ||
749
37.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
845k
                RETURN_FAILURE;
751
36.4M
            pattern += pattern[0];
752
36.4M
            ptr++;
753
36.4M
            DISPATCH;
754
755
36.4M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
1.17M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
1.17M
                   pattern, ptr, pattern[0]));
758
1.17M
            if (ptr >= end ||
759
1.17M
                sre_lower_ascii(*ptr) != *pattern)
760
160k
                RETURN_FAILURE;
761
1.01M
            pattern++;
762
1.01M
            ptr++;
763
1.01M
            DISPATCH;
764
765
1.01M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
31.3M
        TARGET(SRE_OP_JUMP):
845
31.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
31.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
31.3M
                   ptr, pattern[0]));
850
31.3M
            pattern += pattern[0];
851
31.3M
            DISPATCH;
852
853
57.4M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
57.4M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
57.4M
            LASTMARK_SAVE();
858
57.4M
            if (state->repeat)
859
12.2M
                MARK_PUSH(ctx->lastmark);
860
166M
            for (; pattern[0]; pattern += pattern[0]) {
861
139M
                if (pattern[1] == SRE_OP_LITERAL &&
862
64.1M
                    (ptr >= end ||
863
64.0M
                     (SRE_CODE) *ptr != pattern[2]))
864
25.4M
                    continue;
865
113M
                if (pattern[1] == SRE_OP_IN &&
866
12.5M
                    (ptr >= end ||
867
12.5M
                     !SRE(charset)(state, pattern + 3,
868
12.5M
                                   (SRE_CODE) *ptr)))
869
7.03M
                    continue;
870
106M
                state->ptr = ptr;
871
106M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
106M
                if (ret) {
873
29.6M
                    if (state->repeat)
874
11.8M
                        MARK_POP_DISCARD(ctx->lastmark);
875
29.6M
                    RETURN_ON_ERROR(ret);
876
29.6M
                    RETURN_SUCCESS;
877
29.6M
                }
878
77.0M
                if (state->repeat)
879
7.72k
                    MARK_POP_KEEP(ctx->lastmark);
880
77.0M
                LASTMARK_RESTORE();
881
77.0M
            }
882
27.8M
            if (state->repeat)
883
447k
                MARK_POP_DISCARD(ctx->lastmark);
884
27.8M
            RETURN_FAILURE;
885
886
158M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
158M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
158M
                   pattern[1], pattern[2]));
898
899
158M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
1.13M
                RETURN_FAILURE; /* cannot match */
901
902
157M
            state->ptr = ptr;
903
904
157M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
157M
            RETURN_ON_ERROR(ret);
906
157M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
157M
            ctx->count = ret;
908
157M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
157M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
124M
                RETURN_FAILURE;
917
918
33.1M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
763k
                ptr == state->end &&
920
56.7k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
56.7k
            {
922
                /* tail is empty.  we're finished */
923
56.7k
                state->ptr = ptr;
924
56.7k
                RETURN_SUCCESS;
925
56.7k
            }
926
927
33.1M
            LASTMARK_SAVE();
928
33.1M
            if (state->repeat)
929
17.7M
                MARK_PUSH(ctx->lastmark);
930
931
33.1M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
6.80M
                ctx->u.chr = pattern[pattern[0]+1];
935
6.80M
                for (;;) {
936
17.4M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
14.0M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.6M
                        ptr--;
939
10.6M
                        ctx->count--;
940
10.6M
                    }
941
6.80M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
3.42M
                        break;
943
3.37M
                    state->ptr = ptr;
944
3.37M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
3.37M
                            pattern+pattern[0]);
946
3.37M
                    if (ret) {
947
3.37M
                        if (state->repeat)
948
2.12M
                            MARK_POP_DISCARD(ctx->lastmark);
949
3.37M
                        RETURN_ON_ERROR(ret);
950
3.37M
                        RETURN_SUCCESS;
951
3.37M
                    }
952
131
                    if (state->repeat)
953
131
                        MARK_POP_KEEP(ctx->lastmark);
954
131
                    LASTMARK_RESTORE();
955
956
131
                    ptr--;
957
131
                    ctx->count--;
958
131
                }
959
3.42M
                if (state->repeat)
960
2.00M
                    MARK_POP_DISCARD(ctx->lastmark);
961
26.3M
            } else {
962
                /* general case */
963
28.5M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
27.7M
                    state->ptr = ptr;
965
27.7M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
27.7M
                            pattern+pattern[0]);
967
27.7M
                    if (ret) {
968
25.4M
                        if (state->repeat)
969
12.9M
                            MARK_POP_DISCARD(ctx->lastmark);
970
25.4M
                        RETURN_ON_ERROR(ret);
971
25.4M
                        RETURN_SUCCESS;
972
25.4M
                    }
973
2.26M
                    if (state->repeat)
974
1.18M
                        MARK_POP_KEEP(ctx->lastmark);
975
2.26M
                    LASTMARK_RESTORE();
976
977
2.26M
                    ptr--;
978
2.26M
                    ctx->count--;
979
2.26M
                }
980
861k
                if (state->repeat)
981
642k
                    MARK_POP_DISCARD(ctx->lastmark);
982
861k
            }
983
4.29M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
7.50M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
7.50M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
7.50M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
7.50M
            ctx->u.rep = repeat_pool_malloc(state);
1127
7.50M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
7.50M
            ctx->u.rep->count = -1;
1131
7.50M
            ctx->u.rep->pattern = pattern;
1132
7.50M
            ctx->u.rep->prev = state->repeat;
1133
7.50M
            ctx->u.rep->last_ptr = NULL;
1134
7.50M
            state->repeat = ctx->u.rep;
1135
1136
7.50M
            state->ptr = ptr;
1137
7.50M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
7.50M
            state->repeat = ctx->u.rep->prev;
1139
7.50M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
7.50M
            if (ret) {
1142
7.40M
                RETURN_ON_ERROR(ret);
1143
7.40M
                RETURN_SUCCESS;
1144
7.40M
            }
1145
106k
            RETURN_FAILURE;
1146
1147
25.6M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
25.6M
            ctx->u.rep = state->repeat;
1155
25.6M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
25.6M
            state->ptr = ptr;
1159
1160
25.6M
            ctx->count = ctx->u.rep->count+1;
1161
1162
25.6M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
25.6M
                   ptr, ctx->count));
1164
1165
25.6M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
25.6M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.72M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
20.9M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
20.9M
                ctx->u.rep->count = ctx->count;
1185
20.9M
                LASTMARK_SAVE();
1186
20.9M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
20.9M
                LAST_PTR_PUSH();
1189
20.9M
                ctx->u.rep->last_ptr = state->ptr;
1190
20.9M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
20.9M
                        ctx->u.rep->pattern+3);
1192
20.9M
                LAST_PTR_POP();
1193
20.9M
                if (ret) {
1194
17.5M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
17.5M
                    RETURN_ON_ERROR(ret);
1196
17.5M
                    RETURN_SUCCESS;
1197
17.5M
                }
1198
3.38M
                MARK_POP(ctx->lastmark);
1199
3.38M
                LASTMARK_RESTORE();
1200
3.38M
                ctx->u.rep->count = ctx->count-1;
1201
3.38M
                state->ptr = ptr;
1202
3.38M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
8.10M
            state->repeat = ctx->u.rep->prev;
1207
8.10M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
8.10M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
8.10M
            RETURN_ON_SUCCESS(ret);
1211
704k
            state->ptr = ptr;
1212
704k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
3.46M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
3.46M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
3.46M
                   ptr, pattern[1]));
1565
3.46M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
3.46M
            state->ptr = ptr - pattern[1];
1568
3.46M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
3.46M
            RETURN_ON_FAILURE(ret);
1570
3.25M
            pattern += pattern[0];
1571
3.25M
            DISPATCH;
1572
1573
5.50M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
5.50M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
5.50M
                   ptr, pattern[1]));
1578
5.50M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
5.50M
                state->ptr = ptr - pattern[1];
1580
5.50M
                LASTMARK_SAVE();
1581
5.50M
                if (state->repeat)
1582
5.50M
                    MARK_PUSH(ctx->lastmark);
1583
1584
11.0M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
11.0M
                if (ret) {
1586
1.25k
                    if (state->repeat)
1587
1.25k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
1.25k
                    RETURN_ON_ERROR(ret);
1589
1.25k
                    RETURN_FAILURE;
1590
1.25k
                }
1591
5.50M
                if (state->repeat)
1592
5.50M
                    MARK_POP(ctx->lastmark);
1593
5.50M
                LASTMARK_RESTORE();
1594
5.50M
            }
1595
5.50M
            pattern += pattern[0];
1596
5.50M
            DISPATCH;
1597
1598
5.50M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
331M
exit:
1620
331M
    ctx_pos = ctx->last_ctx_pos;
1621
331M
    jump = ctx->jump;
1622
331M
    DATA_POP_DISCARD(ctx);
1623
331M
    if (ctx_pos == -1) {
1624
148M
        state->sigcount = sigcount;
1625
148M
        return ret;
1626
148M
    }
1627
183M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
183M
    switch (jump) {
1630
20.9M
        case JUMP_MAX_UNTIL_2:
1631
20.9M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
20.9M
            goto jump_max_until_2;
1633
8.10M
        case JUMP_MAX_UNTIL_3:
1634
8.10M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
8.10M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
106M
        case JUMP_BRANCH:
1643
106M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
106M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
7.50M
        case JUMP_REPEAT:
1658
7.50M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
7.50M
            goto jump_repeat;
1660
3.37M
        case JUMP_REPEAT_ONE_1:
1661
3.37M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
3.37M
            goto jump_repeat_one_1;
1663
27.7M
        case JUMP_REPEAT_ONE_2:
1664
27.7M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
27.7M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
3.46M
        case JUMP_ASSERT:
1673
3.46M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
3.46M
            goto jump_assert;
1675
5.50M
        case JUMP_ASSERT_NOT:
1676
5.50M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
5.50M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
183M
    }
1683
1684
0
    return ret; /* should never get here */
1685
183M
}
sre.c:sre_ucs2_match
Line
Count
Source
600
230M
{
601
230M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
230M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
230M
    Py_ssize_t ret = 0;
604
230M
    int jump;
605
230M
    unsigned int sigcount = state->sigcount;
606
607
230M
    SRE(match_context)* ctx;
608
230M
    SRE(match_context)* nextctx;
609
230M
    INIT_TRACE(state);
610
611
230M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
230M
    DATA_ALLOC(SRE(match_context), ctx);
614
230M
    ctx->last_ctx_pos = -1;
615
230M
    ctx->jump = JUMP_NONE;
616
230M
    ctx->toplevel = toplevel;
617
230M
    ctx_pos = alloc_pos;
618
619
230M
#if USE_COMPUTED_GOTOS
620
230M
#include "sre_targets.h"
621
230M
#endif
622
623
388M
entrance:
624
625
388M
    ;  // Fashion statement.
626
388M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
388M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
20.4M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
875
            TRACE(("reject (got %tu chars, need %zu)\n",
633
875
                   end - ptr, (size_t) pattern[3]));
634
875
            RETURN_FAILURE;
635
875
        }
636
20.4M
        pattern += pattern[1] + 1;
637
20.4M
    }
638
639
388M
#if USE_COMPUTED_GOTOS
640
388M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
388M
    {
647
648
388M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
192M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
192M
                   ptr, pattern[0]));
653
192M
            {
654
192M
                int i = pattern[0];
655
192M
                if (i & 1)
656
13.2M
                    state->lastindex = i/2 + 1;
657
192M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
190M
                    int j = state->lastmark + 1;
663
192M
                    while (j < i)
664
1.43M
                        state->mark[j++] = NULL;
665
190M
                    state->lastmark = i;
666
190M
                }
667
192M
                state->mark[i] = ptr;
668
192M
            }
669
192M
            pattern++;
670
192M
            DISPATCH;
671
672
192M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
28.8M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
28.8M
                   ptr, *pattern));
677
28.8M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
6.56M
                RETURN_FAILURE;
679
22.3M
            pattern++;
680
22.3M
            ptr++;
681
22.3M
            DISPATCH;
682
683
22.3M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
70.2M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
70.2M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
70.2M
            if (ctx->toplevel &&
698
15.1M
                ((state->match_all && ptr != state->end) ||
699
15.1M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
70.2M
            state->ptr = ptr;
704
70.2M
            RETURN_SUCCESS;
705
706
990k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
990k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
990k
            if (!SRE(at)(state, ptr, *pattern))
711
957k
                RETURN_FAILURE;
712
33.3k
            pattern++;
713
33.3k
            DISPATCH;
714
715
33.3k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
86.6M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
86.6M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
86.6M
            if (ptr >= end ||
749
86.6M
                !SRE(charset)(state, pattern + 1, *ptr))
750
9.81M
                RETURN_FAILURE;
751
76.8M
            pattern += pattern[0];
752
76.8M
            ptr++;
753
76.8M
            DISPATCH;
754
755
76.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
4.61M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
4.61M
                   pattern, ptr, pattern[0]));
758
4.61M
            if (ptr >= end ||
759
4.61M
                sre_lower_ascii(*ptr) != *pattern)
760
43.2k
                RETURN_FAILURE;
761
4.56M
            pattern++;
762
4.56M
            ptr++;
763
4.56M
            DISPATCH;
764
765
4.56M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
20.1M
        TARGET(SRE_OP_JUMP):
845
20.1M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
20.1M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
20.1M
                   ptr, pattern[0]));
850
20.1M
            pattern += pattern[0];
851
20.1M
            DISPATCH;
852
853
27.6M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
27.6M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
27.6M
            LASTMARK_SAVE();
858
27.6M
            if (state->repeat)
859
23.9M
                MARK_PUSH(ctx->lastmark);
860
55.9M
            for (; pattern[0]; pattern += pattern[0]) {
861
47.9M
                if (pattern[1] == SRE_OP_LITERAL &&
862
21.8M
                    (ptr >= end ||
863
21.8M
                     (SRE_CODE) *ptr != pattern[2]))
864
11.7M
                    continue;
865
36.2M
                if (pattern[1] == SRE_OP_IN &&
866
20.6M
                    (ptr >= end ||
867
20.6M
                     !SRE(charset)(state, pattern + 3,
868
20.6M
                                   (SRE_CODE) *ptr)))
869
12.4M
                    continue;
870
23.7M
                state->ptr = ptr;
871
23.7M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
23.7M
                if (ret) {
873
19.7M
                    if (state->repeat)
874
17.6M
                        MARK_POP_DISCARD(ctx->lastmark);
875
19.7M
                    RETURN_ON_ERROR(ret);
876
19.7M
                    RETURN_SUCCESS;
877
19.7M
                }
878
4.05M
                if (state->repeat)
879
2.62k
                    MARK_POP_KEEP(ctx->lastmark);
880
4.05M
                LASTMARK_RESTORE();
881
4.05M
            }
882
7.95M
            if (state->repeat)
883
6.29M
                MARK_POP_DISCARD(ctx->lastmark);
884
7.95M
            RETURN_FAILURE;
885
886
210M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
210M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
210M
                   pattern[1], pattern[2]));
898
899
210M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
188k
                RETURN_FAILURE; /* cannot match */
901
902
210M
            state->ptr = ptr;
903
904
210M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
210M
            RETURN_ON_ERROR(ret);
906
210M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
210M
            ctx->count = ret;
908
210M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
210M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
164M
                RETURN_FAILURE;
917
918
46.4M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
5.54M
                ptr == state->end &&
920
18.9k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
18.9k
            {
922
                /* tail is empty.  we're finished */
923
18.9k
                state->ptr = ptr;
924
18.9k
                RETURN_SUCCESS;
925
18.9k
            }
926
927
46.4M
            LASTMARK_SAVE();
928
46.4M
            if (state->repeat)
929
22.8M
                MARK_PUSH(ctx->lastmark);
930
931
46.4M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
9.60M
                ctx->u.chr = pattern[pattern[0]+1];
935
9.60M
                for (;;) {
936
19.7M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
15.7M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
10.1M
                        ptr--;
939
10.1M
                        ctx->count--;
940
10.1M
                    }
941
9.60M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
4.02M
                        break;
943
5.58M
                    state->ptr = ptr;
944
5.58M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
5.58M
                            pattern+pattern[0]);
946
5.58M
                    if (ret) {
947
5.58M
                        if (state->repeat)
948
5.55M
                            MARK_POP_DISCARD(ctx->lastmark);
949
5.58M
                        RETURN_ON_ERROR(ret);
950
5.58M
                        RETURN_SUCCESS;
951
5.58M
                    }
952
216
                    if (state->repeat)
953
216
                        MARK_POP_KEEP(ctx->lastmark);
954
216
                    LASTMARK_RESTORE();
955
956
216
                    ptr--;
957
216
                    ctx->count--;
958
216
                }
959
4.02M
                if (state->repeat)
960
4.01M
                    MARK_POP_DISCARD(ctx->lastmark);
961
36.7M
            } else {
962
                /* general case */
963
37.4M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
37.1M
                    state->ptr = ptr;
965
37.1M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
37.1M
                            pattern+pattern[0]);
967
37.1M
                    if (ret) {
968
36.5M
                        if (state->repeat)
969
13.0M
                            MARK_POP_DISCARD(ctx->lastmark);
970
36.5M
                        RETURN_ON_ERROR(ret);
971
36.5M
                        RETURN_SUCCESS;
972
36.5M
                    }
973
632k
                    if (state->repeat)
974
520k
                        MARK_POP_KEEP(ctx->lastmark);
975
632k
                    LASTMARK_RESTORE();
976
977
632k
                    ptr--;
978
632k
                    ctx->count--;
979
632k
                }
980
262k
                if (state->repeat)
981
260k
                    MARK_POP_DISCARD(ctx->lastmark);
982
262k
            }
983
4.28M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
16.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
16.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
16.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
16.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
16.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
16.9M
            ctx->u.rep->count = -1;
1131
16.9M
            ctx->u.rep->pattern = pattern;
1132
16.9M
            ctx->u.rep->prev = state->repeat;
1133
16.9M
            ctx->u.rep->last_ptr = NULL;
1134
16.9M
            state->repeat = ctx->u.rep;
1135
1136
16.9M
            state->ptr = ptr;
1137
16.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
16.9M
            state->repeat = ctx->u.rep->prev;
1139
16.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
16.9M
            if (ret) {
1142
16.9M
                RETURN_ON_ERROR(ret);
1143
16.9M
                RETURN_SUCCESS;
1144
16.9M
            }
1145
1.15k
            RETURN_FAILURE;
1146
1147
39.1M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
39.1M
            ctx->u.rep = state->repeat;
1155
39.1M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
39.1M
            state->ptr = ptr;
1159
1160
39.1M
            ctx->count = ctx->u.rep->count+1;
1161
1162
39.1M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
39.1M
                   ptr, ctx->count));
1164
1165
39.1M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
39.1M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
3.55M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
35.6M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
35.6M
                ctx->u.rep->count = ctx->count;
1185
35.6M
                LASTMARK_SAVE();
1186
35.6M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
35.6M
                LAST_PTR_PUSH();
1189
35.6M
                ctx->u.rep->last_ptr = state->ptr;
1190
35.6M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
35.6M
                        ctx->u.rep->pattern+3);
1192
35.6M
                LAST_PTR_POP();
1193
35.6M
                if (ret) {
1194
21.9M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
21.9M
                    RETURN_ON_ERROR(ret);
1196
21.9M
                    RETURN_SUCCESS;
1197
21.9M
                }
1198
13.6M
                MARK_POP(ctx->lastmark);
1199
13.6M
                LASTMARK_RESTORE();
1200
13.6M
                ctx->u.rep->count = ctx->count-1;
1201
13.6M
                state->ptr = ptr;
1202
13.6M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
17.1M
            state->repeat = ctx->u.rep->prev;
1207
17.1M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
17.1M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
17.1M
            RETURN_ON_SUCCESS(ret);
1211
261k
            state->ptr = ptr;
1212
261k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
14.1M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
14.1M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
14.1M
                   ptr, pattern[1]));
1565
14.1M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
14.1M
            state->ptr = ptr - pattern[1];
1568
14.1M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
14.1M
            RETURN_ON_FAILURE(ret);
1570
7.98M
            pattern += pattern[0];
1571
7.98M
            DISPATCH;
1572
1573
7.98M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
7.16M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
7.16M
                   ptr, pattern[1]));
1578
7.16M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
7.16M
                state->ptr = ptr - pattern[1];
1580
7.16M
                LASTMARK_SAVE();
1581
7.16M
                if (state->repeat)
1582
7.16M
                    MARK_PUSH(ctx->lastmark);
1583
1584
14.3M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
14.3M
                if (ret) {
1586
2.37k
                    if (state->repeat)
1587
2.37k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
2.37k
                    RETURN_ON_ERROR(ret);
1589
2.37k
                    RETURN_FAILURE;
1590
2.37k
                }
1591
7.16M
                if (state->repeat)
1592
7.16M
                    MARK_POP(ctx->lastmark);
1593
7.16M
                LASTMARK_RESTORE();
1594
7.16M
            }
1595
7.16M
            pattern += pattern[0];
1596
7.16M
            DISPATCH;
1597
1598
7.16M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
388M
exit:
1620
388M
    ctx_pos = ctx->last_ctx_pos;
1621
388M
    jump = ctx->jump;
1622
388M
    DATA_POP_DISCARD(ctx);
1623
388M
    if (ctx_pos == -1) {
1624
230M
        state->sigcount = sigcount;
1625
230M
        return ret;
1626
230M
    }
1627
157M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
157M
    switch (jump) {
1630
35.6M
        case JUMP_MAX_UNTIL_2:
1631
35.6M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
35.6M
            goto jump_max_until_2;
1633
17.1M
        case JUMP_MAX_UNTIL_3:
1634
17.1M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
17.1M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
23.7M
        case JUMP_BRANCH:
1643
23.7M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
23.7M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
16.9M
        case JUMP_REPEAT:
1658
16.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
16.9M
            goto jump_repeat;
1660
5.58M
        case JUMP_REPEAT_ONE_1:
1661
5.58M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
5.58M
            goto jump_repeat_one_1;
1663
37.1M
        case JUMP_REPEAT_ONE_2:
1664
37.1M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
37.1M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
14.1M
        case JUMP_ASSERT:
1673
14.1M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
14.1M
            goto jump_assert;
1675
7.16M
        case JUMP_ASSERT_NOT:
1676
7.16M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
7.16M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
157M
    }
1683
1684
0
    return ret; /* should never get here */
1685
157M
}
sre.c:sre_ucs4_match
Line
Count
Source
600
94.9M
{
601
94.9M
    const SRE_CHAR* end = (const SRE_CHAR *)state->end;
602
94.9M
    Py_ssize_t alloc_pos, ctx_pos = -1;
603
94.9M
    Py_ssize_t ret = 0;
604
94.9M
    int jump;
605
94.9M
    unsigned int sigcount = state->sigcount;
606
607
94.9M
    SRE(match_context)* ctx;
608
94.9M
    SRE(match_context)* nextctx;
609
94.9M
    INIT_TRACE(state);
610
611
94.9M
    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
612
613
94.9M
    DATA_ALLOC(SRE(match_context), ctx);
614
94.9M
    ctx->last_ctx_pos = -1;
615
94.9M
    ctx->jump = JUMP_NONE;
616
94.9M
    ctx->toplevel = toplevel;
617
94.9M
    ctx_pos = alloc_pos;
618
619
94.9M
#if USE_COMPUTED_GOTOS
620
94.9M
#include "sre_targets.h"
621
94.9M
#endif
622
623
367M
entrance:
624
625
367M
    ;  // Fashion statement.
626
367M
    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr;
627
628
367M
    if (pattern[0] == SRE_OP_INFO) {
629
        /* optimization info block */
630
        /* <INFO> <1=skip> <2=flags> <3=min> ... */
631
13.9M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
632
382
            TRACE(("reject (got %tu chars, need %zu)\n",
633
382
                   end - ptr, (size_t) pattern[3]));
634
382
            RETURN_FAILURE;
635
382
        }
636
13.9M
        pattern += pattern[1] + 1;
637
13.9M
    }
638
639
367M
#if USE_COMPUTED_GOTOS
640
367M
    DISPATCH;
641
#else
642
dispatch:
643
    MAYBE_CHECK_SIGNALS;
644
    switch (*pattern++)
645
#endif
646
367M
    {
647
648
367M
        TARGET(SRE_OP_MARK):
649
            /* set mark */
650
            /* <MARK> <gid> */
651
122M
            TRACE(("|%p|%p|MARK %d\n", pattern,
652
122M
                   ptr, pattern[0]));
653
122M
            {
654
122M
                int i = pattern[0];
655
122M
                if (i & 1)
656
20.4M
                    state->lastindex = i/2 + 1;
657
122M
                if (i > state->lastmark) {
658
                    /* state->lastmark is the highest valid index in the
659
                       state->mark array.  If it is increased by more than 1,
660
                       the intervening marks must be set to NULL to signal
661
                       that these marks have not been encountered. */
662
119M
                    int j = state->lastmark + 1;
663
122M
                    while (j < i)
664
2.06M
                        state->mark[j++] = NULL;
665
119M
                    state->lastmark = i;
666
119M
                }
667
122M
                state->mark[i] = ptr;
668
122M
            }
669
122M
            pattern++;
670
122M
            DISPATCH;
671
672
122M
        TARGET(SRE_OP_LITERAL):
673
            /* match literal string */
674
            /* <LITERAL> <code> */
675
36.2M
            TRACE(("|%p|%p|LITERAL %d\n", pattern,
676
36.2M
                   ptr, *pattern));
677
36.2M
            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
678
12.1M
                RETURN_FAILURE;
679
24.1M
            pattern++;
680
24.1M
            ptr++;
681
24.1M
            DISPATCH;
682
683
24.1M
        TARGET(SRE_OP_NOT_LITERAL):
684
            /* match anything that is not literal character */
685
            /* <NOT_LITERAL> <code> */
686
0
            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern,
687
0
                   ptr, *pattern));
688
0
            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
689
0
                RETURN_FAILURE;
690
0
            pattern++;
691
0
            ptr++;
692
0
            DISPATCH;
693
694
43.5M
        TARGET(SRE_OP_SUCCESS):
695
            /* end of pattern */
696
43.5M
            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr));
697
43.5M
            if (ctx->toplevel &&
698
13.0M
                ((state->match_all && ptr != state->end) ||
699
13.0M
                 (state->must_advance && ptr == state->start)))
700
0
            {
701
0
                RETURN_FAILURE;
702
0
            }
703
43.5M
            state->ptr = ptr;
704
43.5M
            RETURN_SUCCESS;
705
706
660k
        TARGET(SRE_OP_AT):
707
            /* match at given position */
708
            /* <AT> <code> */
709
660k
            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern));
710
660k
            if (!SRE(at)(state, ptr, *pattern))
711
656k
                RETURN_FAILURE;
712
3.92k
            pattern++;
713
3.92k
            DISPATCH;
714
715
3.92k
        TARGET(SRE_OP_CATEGORY):
716
            /* match at given category */
717
            /* <CATEGORY> <code> */
718
0
            TRACE(("|%p|%p|CATEGORY %d\n", pattern,
719
0
                   ptr, *pattern));
720
0
            if (ptr >= end || !sre_category(pattern[0], ptr[0]))
721
0
                RETURN_FAILURE;
722
0
            pattern++;
723
0
            ptr++;
724
0
            DISPATCH;
725
726
0
        TARGET(SRE_OP_ANY):
727
            /* match anything (except a newline) */
728
            /* <ANY> */
729
0
            TRACE(("|%p|%p|ANY\n", pattern, ptr));
730
0
            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0]))
731
0
                RETURN_FAILURE;
732
0
            ptr++;
733
0
            DISPATCH;
734
735
0
        TARGET(SRE_OP_ANY_ALL):
736
            /* match anything */
737
            /* <ANY_ALL> */
738
0
            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr));
739
0
            if (ptr >= end)
740
0
                RETURN_FAILURE;
741
0
            ptr++;
742
0
            DISPATCH;
743
744
68.2M
        TARGET(SRE_OP_IN):
745
            /* match set member (or non_member) */
746
            /* <IN> <skip> <set> */
747
68.2M
            TRACE(("|%p|%p|IN\n", pattern, ptr));
748
68.2M
            if (ptr >= end ||
749
68.2M
                !SRE(charset)(state, pattern + 1, *ptr))
750
3.40M
                RETURN_FAILURE;
751
64.8M
            pattern += pattern[0];
752
64.8M
            ptr++;
753
64.8M
            DISPATCH;
754
755
64.8M
        TARGET(SRE_OP_LITERAL_IGNORE):
756
2.48M
            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
757
2.48M
                   pattern, ptr, pattern[0]));
758
2.48M
            if (ptr >= end ||
759
2.48M
                sre_lower_ascii(*ptr) != *pattern)
760
34.7k
                RETURN_FAILURE;
761
2.44M
            pattern++;
762
2.44M
            ptr++;
763
2.44M
            DISPATCH;
764
765
2.44M
        TARGET(SRE_OP_LITERAL_UNI_IGNORE):
766
0
            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
767
0
                   pattern, ptr, pattern[0]));
768
0
            if (ptr >= end ||
769
0
                sre_lower_unicode(*ptr) != *pattern)
770
0
                RETURN_FAILURE;
771
0
            pattern++;
772
0
            ptr++;
773
0
            DISPATCH;
774
775
0
        TARGET(SRE_OP_LITERAL_LOC_IGNORE):
776
0
            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
777
0
                   pattern, ptr, pattern[0]));
778
0
            if (ptr >= end
779
0
                || !char_loc_ignore(*pattern, *ptr))
780
0
                RETURN_FAILURE;
781
0
            pattern++;
782
0
            ptr++;
783
0
            DISPATCH;
784
785
0
        TARGET(SRE_OP_NOT_LITERAL_IGNORE):
786
0
            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
787
0
                   pattern, ptr, *pattern));
788
0
            if (ptr >= end ||
789
0
                sre_lower_ascii(*ptr) == *pattern)
790
0
                RETURN_FAILURE;
791
0
            pattern++;
792
0
            ptr++;
793
0
            DISPATCH;
794
795
0
        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE):
796
0
            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
797
0
                   pattern, ptr, *pattern));
798
0
            if (ptr >= end ||
799
0
                sre_lower_unicode(*ptr) == *pattern)
800
0
                RETURN_FAILURE;
801
0
            pattern++;
802
0
            ptr++;
803
0
            DISPATCH;
804
805
0
        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE):
806
0
            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
807
0
                   pattern, ptr, *pattern));
808
0
            if (ptr >= end
809
0
                || char_loc_ignore(*pattern, *ptr))
810
0
                RETURN_FAILURE;
811
0
            pattern++;
812
0
            ptr++;
813
0
            DISPATCH;
814
815
0
        TARGET(SRE_OP_IN_IGNORE):
816
0
            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr));
817
0
            if (ptr >= end
818
0
                || !SRE(charset)(state, pattern+1,
819
0
                                 (SRE_CODE)sre_lower_ascii(*ptr)))
820
0
                RETURN_FAILURE;
821
0
            pattern += pattern[0];
822
0
            ptr++;
823
0
            DISPATCH;
824
825
0
        TARGET(SRE_OP_IN_UNI_IGNORE):
826
0
            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr));
827
0
            if (ptr >= end
828
0
                || !SRE(charset)(state, pattern+1,
829
0
                                 (SRE_CODE)sre_lower_unicode(*ptr)))
830
0
                RETURN_FAILURE;
831
0
            pattern += pattern[0];
832
0
            ptr++;
833
0
            DISPATCH;
834
835
0
        TARGET(SRE_OP_IN_LOC_IGNORE):
836
0
            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr));
837
0
            if (ptr >= end
838
0
                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr))
839
0
                RETURN_FAILURE;
840
0
            pattern += pattern[0];
841
0
            ptr++;
842
0
            DISPATCH;
843
844
33.3M
        TARGET(SRE_OP_JUMP):
845
33.3M
        TARGET(SRE_OP_INFO):
846
            /* jump forward */
847
            /* <JUMP> <offset> */
848
33.3M
            TRACE(("|%p|%p|JUMP %d\n", pattern,
849
33.3M
                   ptr, pattern[0]));
850
33.3M
            pattern += pattern[0];
851
33.3M
            DISPATCH;
852
853
44.9M
        TARGET(SRE_OP_BRANCH):
854
            /* alternation */
855
            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
856
44.9M
            TRACE(("|%p|%p|BRANCH\n", pattern, ptr));
857
44.9M
            LASTMARK_SAVE();
858
44.9M
            if (state->repeat)
859
40.7M
                MARK_PUSH(ctx->lastmark);
860
93.1M
            for (; pattern[0]; pattern += pattern[0]) {
861
81.0M
                if (pattern[1] == SRE_OP_LITERAL &&
862
39.5M
                    (ptr >= end ||
863
39.5M
                     (SRE_CODE) *ptr != pattern[2]))
864
24.0M
                    continue;
865
56.9M
                if (pattern[1] == SRE_OP_IN &&
866
36.4M
                    (ptr >= end ||
867
36.4M
                     !SRE(charset)(state, pattern + 3,
868
36.4M
                                   (SRE_CODE) *ptr)))
869
23.1M
                    continue;
870
33.8M
                state->ptr = ptr;
871
33.8M
                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1);
872
33.8M
                if (ret) {
873
32.8M
                    if (state->repeat)
874
29.3M
                        MARK_POP_DISCARD(ctx->lastmark);
875
32.8M
                    RETURN_ON_ERROR(ret);
876
32.8M
                    RETURN_SUCCESS;
877
32.8M
                }
878
955k
                if (state->repeat)
879
16.2k
                    MARK_POP_KEEP(ctx->lastmark);
880
955k
                LASTMARK_RESTORE();
881
955k
            }
882
12.0M
            if (state->repeat)
883
11.4M
                MARK_POP_DISCARD(ctx->lastmark);
884
12.0M
            RETURN_FAILURE;
885
886
149M
        TARGET(SRE_OP_REPEAT_ONE):
887
            /* match repeated sequence (maximizing regexp) */
888
889
            /* this operator only works if the repeated item is
890
               exactly one character wide, and we're not already
891
               collecting backtracking points.  for other cases,
892
               use the MAX_REPEAT operator */
893
894
            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
895
896
149M
            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr,
897
149M
                   pattern[1], pattern[2]));
898
899
149M
            if ((Py_ssize_t) pattern[1] > end - ptr)
900
21.6k
                RETURN_FAILURE; /* cannot match */
901
902
149M
            state->ptr = ptr;
903
904
149M
            ret = SRE(count)(state, pattern+3, pattern[2]);
905
149M
            RETURN_ON_ERROR(ret);
906
149M
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
907
149M
            ctx->count = ret;
908
149M
            ptr += ctx->count;
909
910
            /* when we arrive here, count contains the number of
911
               matches, and ptr points to the tail of the target
912
               string.  check if the rest of the pattern matches,
913
               and backtrack if not. */
914
915
149M
            if (ctx->count < (Py_ssize_t) pattern[1])
916
72.9M
                RETURN_FAILURE;
917
918
77.0M
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
919
1.16M
                ptr == state->end &&
920
3.79k
                !(ctx->toplevel && state->must_advance && ptr == state->start))
921
3.79k
            {
922
                /* tail is empty.  we're finished */
923
3.79k
                state->ptr = ptr;
924
3.79k
                RETURN_SUCCESS;
925
3.79k
            }
926
927
77.0M
            LASTMARK_SAVE();
928
77.0M
            if (state->repeat)
929
55.9M
                MARK_PUSH(ctx->lastmark);
930
931
77.0M
            if (pattern[pattern[0]] == SRE_OP_LITERAL) {
932
                /* tail starts with a literal. skip positions where
933
                   the rest of the pattern cannot possibly match */
934
25.6M
                ctx->u.chr = pattern[pattern[0]+1];
935
25.6M
                for (;;) {
936
61.5M
                    while (ctx->count >= (Py_ssize_t) pattern[1] &&
937
42.9M
                           (ptr >= end || *ptr != ctx->u.chr)) {
938
35.8M
                        ptr--;
939
35.8M
                        ctx->count--;
940
35.8M
                    }
941
25.6M
                    if (ctx->count < (Py_ssize_t) pattern[1])
942
18.5M
                        break;
943
7.13M
                    state->ptr = ptr;
944
7.13M
                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
945
7.13M
                            pattern+pattern[0]);
946
7.13M
                    if (ret) {
947
7.13M
                        if (state->repeat)
948
7.12M
                            MARK_POP_DISCARD(ctx->lastmark);
949
7.13M
                        RETURN_ON_ERROR(ret);
950
7.13M
                        RETURN_SUCCESS;
951
7.13M
                    }
952
281
                    if (state->repeat)
953
281
                        MARK_POP_KEEP(ctx->lastmark);
954
281
                    LASTMARK_RESTORE();
955
956
281
                    ptr--;
957
281
                    ctx->count--;
958
281
                }
959
18.5M
                if (state->repeat)
960
18.5M
                    MARK_POP_DISCARD(ctx->lastmark);
961
51.3M
            } else {
962
                /* general case */
963
51.9M
                while (ctx->count >= (Py_ssize_t) pattern[1]) {
964
51.8M
                    state->ptr = ptr;
965
51.8M
                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
966
51.8M
                            pattern+pattern[0]);
967
51.8M
                    if (ret) {
968
51.2M
                        if (state->repeat)
969
30.1M
                            MARK_POP_DISCARD(ctx->lastmark);
970
51.2M
                        RETURN_ON_ERROR(ret);
971
51.2M
                        RETURN_SUCCESS;
972
51.2M
                    }
973
599k
                    if (state->repeat)
974
162k
                        MARK_POP_KEEP(ctx->lastmark);
975
599k
                    LASTMARK_RESTORE();
976
977
599k
                    ptr--;
978
599k
                    ctx->count--;
979
599k
                }
980
81.8k
                if (state->repeat)
981
81.4k
                    MARK_POP_DISCARD(ctx->lastmark);
982
81.8k
            }
983
18.6M
            RETURN_FAILURE;
984
985
0
        TARGET(SRE_OP_MIN_REPEAT_ONE):
986
            /* match repeated sequence (minimizing regexp) */
987
988
            /* this operator only works if the repeated item is
989
               exactly one character wide, and we're not already
990
               collecting backtracking points.  for other cases,
991
               use the MIN_REPEAT operator */
992
993
            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
994
995
0
            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr,
996
0
                   pattern[1], pattern[2]));
997
998
0
            if ((Py_ssize_t) pattern[1] > end - ptr)
999
0
                RETURN_FAILURE; /* cannot match */
1000
1001
0
            state->ptr = ptr;
1002
1003
0
            if (pattern[1] == 0)
1004
0
                ctx->count = 0;
1005
0
            else {
1006
                /* count using pattern min as the maximum */
1007
0
                ret = SRE(count)(state, pattern+3, pattern[1]);
1008
0
                RETURN_ON_ERROR(ret);
1009
0
                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1010
0
                if (ret < (Py_ssize_t) pattern[1])
1011
                    /* didn't match minimum number of times */
1012
0
                    RETURN_FAILURE;
1013
                /* advance past minimum matches of repeat */
1014
0
                ctx->count = ret;
1015
0
                ptr += ctx->count;
1016
0
            }
1017
1018
0
            if (pattern[pattern[0]] == SRE_OP_SUCCESS &&
1019
0
                !(ctx->toplevel &&
1020
0
                  ((state->match_all && ptr != state->end) ||
1021
0
                   (state->must_advance && ptr == state->start))))
1022
0
            {
1023
                /* tail is empty.  we're finished */
1024
0
                state->ptr = ptr;
1025
0
                RETURN_SUCCESS;
1026
1027
0
            } else {
1028
                /* general case */
1029
0
                LASTMARK_SAVE();
1030
0
                if (state->repeat)
1031
0
                    MARK_PUSH(ctx->lastmark);
1032
1033
0
                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT
1034
0
                       || ctx->count <= (Py_ssize_t)pattern[2]) {
1035
0
                    state->ptr = ptr;
1036
0
                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1037
0
                            pattern+pattern[0]);
1038
0
                    if (ret) {
1039
0
                        if (state->repeat)
1040
0
                            MARK_POP_DISCARD(ctx->lastmark);
1041
0
                        RETURN_ON_ERROR(ret);
1042
0
                        RETURN_SUCCESS;
1043
0
                    }
1044
0
                    if (state->repeat)
1045
0
                        MARK_POP_KEEP(ctx->lastmark);
1046
0
                    LASTMARK_RESTORE();
1047
1048
0
                    state->ptr = ptr;
1049
0
                    ret = SRE(count)(state, pattern+3, 1);
1050
0
                    RETURN_ON_ERROR(ret);
1051
0
                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1052
0
                    if (ret == 0)
1053
0
                        break;
1054
0
                    assert(ret == 1);
1055
0
                    ptr++;
1056
0
                    ctx->count++;
1057
0
                }
1058
0
                if (state->repeat)
1059
0
                    MARK_POP_DISCARD(ctx->lastmark);
1060
0
            }
1061
0
            RETURN_FAILURE;
1062
1063
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE):
1064
            /* match repeated sequence (maximizing regexp) without
1065
               backtracking */
1066
1067
            /* this operator only works if the repeated item is
1068
               exactly one character wide, and we're not already
1069
               collecting backtracking points.  for other cases,
1070
               use the MAX_REPEAT operator */
1071
1072
            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1073
               tail */
1074
1075
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern,
1076
0
                   ptr, pattern[1], pattern[2]));
1077
1078
0
            if (ptr + pattern[1] > end) {
1079
0
                RETURN_FAILURE; /* cannot match */
1080
0
            }
1081
1082
0
            state->ptr = ptr;
1083
1084
0
            ret = SRE(count)(state, pattern + 3, pattern[2]);
1085
0
            RETURN_ON_ERROR(ret);
1086
0
            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1087
0
            ctx->count = ret;
1088
0
            ptr += ctx->count;
1089
1090
            /* when we arrive here, count contains the number of
1091
               matches, and ptr points to the tail of the target
1092
               string.  check if the rest of the pattern matches,
1093
               and fail if not. */
1094
1095
            /* Test for not enough repetitions in match */
1096
0
            if (ctx->count < (Py_ssize_t) pattern[1]) {
1097
0
                RETURN_FAILURE;
1098
0
            }
1099
1100
            /* Update the pattern to point to the next op code */
1101
0
            pattern += pattern[0];
1102
1103
            /* Let the tail be evaluated separately and consider this
1104
               match successful. */
1105
0
            if (*pattern == SRE_OP_SUCCESS &&
1106
0
                ptr == state->end &&
1107
0
                !(ctx->toplevel && state->must_advance && ptr == state->start))
1108
0
            {
1109
                /* tail is empty.  we're finished */
1110
0
                state->ptr = ptr;
1111
0
                RETURN_SUCCESS;
1112
0
            }
1113
1114
            /* Attempt to match the rest of the string */
1115
0
            DISPATCH;
1116
1117
34.9M
        TARGET(SRE_OP_REPEAT):
1118
            /* create repeat context.  all the hard work is done
1119
               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1120
            /* <REPEAT> <skip> <1=min> <2=max>
1121
               <3=repeat_index> item <UNTIL> tail */
1122
34.9M
            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
1123
34.9M
                   pattern[1], pattern[2]));
1124
1125
            /* install new repeat context */
1126
34.9M
            ctx->u.rep = repeat_pool_malloc(state);
1127
34.9M
            if (!ctx->u.rep) {
1128
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1129
0
            }
1130
34.9M
            ctx->u.rep->count = -1;
1131
34.9M
            ctx->u.rep->pattern = pattern;
1132
34.9M
            ctx->u.rep->prev = state->repeat;
1133
34.9M
            ctx->u.rep->last_ptr = NULL;
1134
34.9M
            state->repeat = ctx->u.rep;
1135
1136
34.9M
            state->ptr = ptr;
1137
34.9M
            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
1138
34.9M
            state->repeat = ctx->u.rep->prev;
1139
34.9M
            repeat_pool_free(state, ctx->u.rep);
1140
1141
34.9M
            if (ret) {
1142
34.9M
                RETURN_ON_ERROR(ret);
1143
34.9M
                RETURN_SUCCESS;
1144
34.9M
            }
1145
451
            RETURN_FAILURE;
1146
1147
76.4M
        TARGET(SRE_OP_MAX_UNTIL):
1148
            /* maximizing repeat */
1149
            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1150
1151
            /* FIXME: we probably need to deal with zero-width
1152
               matches in here... */
1153
1154
76.4M
            ctx->u.rep = state->repeat;
1155
76.4M
            if (!ctx->u.rep)
1156
0
                RETURN_ERROR(SRE_ERROR_STATE);
1157
1158
76.4M
            state->ptr = ptr;
1159
1160
76.4M
            ctx->count = ctx->u.rep->count+1;
1161
1162
76.4M
            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern,
1163
76.4M
                   ptr, ctx->count));
1164
1165
76.4M
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1166
                /* not enough matches */
1167
0
                ctx->u.rep->count = ctx->count;
1168
0
                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1169
0
                        ctx->u.rep->pattern+3);
1170
0
                if (ret) {
1171
0
                    RETURN_ON_ERROR(ret);
1172
0
                    RETURN_SUCCESS;
1173
0
                }
1174
0
                ctx->u.rep->count = ctx->count-1;
1175
0
                state->ptr = ptr;
1176
0
                RETURN_FAILURE;
1177
0
            }
1178
1179
76.4M
            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1180
4.42M
                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1181
72.0M
                state->ptr != ctx->u.rep->last_ptr) {
1182
                /* we may have enough matches, but if we can
1183
                   match another item, do so */
1184
72.0M
                ctx->u.rep->count = ctx->count;
1185
72.0M
                LASTMARK_SAVE();
1186
72.0M
                MARK_PUSH(ctx->lastmark);
1187
                /* zero-width match protection */
1188
72.0M
                LAST_PTR_PUSH();
1189
72.0M
                ctx->u.rep->last_ptr = state->ptr;
1190
72.0M
                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1191
72.0M
                        ctx->u.rep->pattern+3);
1192
72.0M
                LAST_PTR_POP();
1193
72.0M
                if (ret) {
1194
41.4M
                    MARK_POP_DISCARD(ctx->lastmark);
1195
41.4M
                    RETURN_ON_ERROR(ret);
1196
41.4M
                    RETURN_SUCCESS;
1197
41.4M
                }
1198
30.5M
                MARK_POP(ctx->lastmark);
1199
30.5M
                LASTMARK_RESTORE();
1200
30.5M
                ctx->u.rep->count = ctx->count-1;
1201
30.5M
                state->ptr = ptr;
1202
30.5M
            }
1203
1204
            /* cannot match more repeated items here.  make sure the
1205
               tail matches */
1206
35.0M
            state->repeat = ctx->u.rep->prev;
1207
35.0M
            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern);
1208
35.0M
            state->repeat = ctx->u.rep; // restore repeat before return
1209
1210
35.0M
            RETURN_ON_SUCCESS(ret);
1211
81.6k
            state->ptr = ptr;
1212
81.6k
            RETURN_FAILURE;
1213
1214
0
        TARGET(SRE_OP_MIN_UNTIL):
1215
            /* minimizing repeat */
1216
            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1217
1218
0
            ctx->u.rep = state->repeat;
1219
0
            if (!ctx->u.rep)
1220
0
                RETURN_ERROR(SRE_ERROR_STATE);
1221
1222
0
            state->ptr = ptr;
1223
1224
0
            ctx->count = ctx->u.rep->count+1;
1225
1226
0
            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern,
1227
0
                   ptr, ctx->count, ctx->u.rep->pattern));
1228
1229
0
            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1230
                /* not enough matches */
1231
0
                ctx->u.rep->count = ctx->count;
1232
0
                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1233
0
                        ctx->u.rep->pattern+3);
1234
0
                if (ret) {
1235
0
                    RETURN_ON_ERROR(ret);
1236
0
                    RETURN_SUCCESS;
1237
0
                }
1238
0
                ctx->u.rep->count = ctx->count-1;
1239
0
                state->ptr = ptr;
1240
0
                RETURN_FAILURE;
1241
0
            }
1242
1243
            /* see if the tail matches */
1244
0
            state->repeat = ctx->u.rep->prev;
1245
1246
0
            LASTMARK_SAVE();
1247
0
            if (state->repeat)
1248
0
                MARK_PUSH(ctx->lastmark);
1249
1250
0
            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern);
1251
0
            SRE_REPEAT *repeat_of_tail = state->repeat;
1252
0
            state->repeat = ctx->u.rep; // restore repeat before return
1253
1254
0
            if (ret) {
1255
0
                if (repeat_of_tail)
1256
0
                    MARK_POP_DISCARD(ctx->lastmark);
1257
0
                RETURN_ON_ERROR(ret);
1258
0
                RETURN_SUCCESS;
1259
0
            }
1260
0
            if (repeat_of_tail)
1261
0
                MARK_POP(ctx->lastmark);
1262
0
            LASTMARK_RESTORE();
1263
1264
0
            state->ptr = ptr;
1265
1266
0
            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1267
0
                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1268
0
                state->ptr == ctx->u.rep->last_ptr)
1269
0
                RETURN_FAILURE;
1270
1271
0
            ctx->u.rep->count = ctx->count;
1272
            /* zero-width match protection */
1273
0
            LAST_PTR_PUSH();
1274
0
            ctx->u.rep->last_ptr = state->ptr;
1275
0
            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1276
0
                    ctx->u.rep->pattern+3);
1277
0
            LAST_PTR_POP();
1278
0
            if (ret) {
1279
0
                RETURN_ON_ERROR(ret);
1280
0
                RETURN_SUCCESS;
1281
0
            }
1282
0
            ctx->u.rep->count = ctx->count-1;
1283
0
            state->ptr = ptr;
1284
0
            RETURN_FAILURE;
1285
1286
0
        TARGET(SRE_OP_POSSESSIVE_REPEAT):
1287
            /* create possessive repeat contexts. */
1288
            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1289
               <SUCCESS> tail */
1290
0
            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern,
1291
0
                   ptr, pattern[1], pattern[2]));
1292
1293
            /* Set the global Input pointer to this context's Input
1294
               pointer */
1295
0
            state->ptr = ptr;
1296
1297
            /* Set state->repeat to non-NULL */
1298
0
            ctx->u.rep = repeat_pool_malloc(state);
1299
0
            if (!ctx->u.rep) {
1300
0
                RETURN_ERROR(SRE_ERROR_MEMORY);
1301
0
            }
1302
0
            ctx->u.rep->count = -1;
1303
0
            ctx->u.rep->pattern = NULL;
1304
0
            ctx->u.rep->prev = state->repeat;
1305
0
            ctx->u.rep->last_ptr = NULL;
1306
0
            state->repeat = ctx->u.rep;
1307
1308
            /* Initialize Count to 0 */
1309
0
            ctx->count = 0;
1310
1311
            /* Check for minimum required matches. */
1312
0
            while (ctx->count < (Py_ssize_t)pattern[1]) {
1313
                /* not enough matches */
1314
0
                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1315
0
                         &pattern[3]);
1316
0
                if (ret) {
1317
0
                    RETURN_ON_ERROR(ret);
1318
0
                    ctx->count++;
1319
0
                }
1320
0
                else {
1321
0
                    state->ptr = ptr;
1322
                    /* Restore state->repeat */
1323
0
                    state->repeat = ctx->u.rep->prev;
1324
0
                    repeat_pool_free(state, ctx->u.rep);
1325
0
                    RETURN_FAILURE;
1326
0
                }
1327
0
            }
1328
1329
            /* Clear the context's Input stream pointer so that it
1330
               doesn't match the global state so that the while loop can
1331
               be entered. */
1332
0
            ptr = NULL;
1333
1334
            /* Keep trying to parse the <pattern> sub-pattern until the
1335
               end is reached, creating a new context each time. */
1336
0
            while ((ctx->count < (Py_ssize_t)pattern[2] ||
1337
0
                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) &&
1338
0
                   state->ptr != ptr) {
1339
                /* Save the Capture Group Marker state into the current
1340
                   Context and back up the current highest number
1341
                   Capture Group marker. */
1342
0
                LASTMARK_SAVE();
1343
0
                MARK_PUSH(ctx->lastmark);
1344
1345
                /* zero-width match protection */
1346
                /* Set the context's Input Stream pointer to be the
1347
                   current Input Stream pointer from the global
1348
                   state.  When the loop reaches the next iteration,
1349
                   the context will then store the last known good
1350
                   position with the global state holding the Input
1351
                   Input Stream position that has been updated with
1352
                   the most recent match.  Thus, if state's Input
1353
                   stream remains the same as the one stored in the
1354
                   current Context, we know we have successfully
1355
                   matched an empty string and that all subsequent
1356
                   matches will also be the empty string until the
1357
                   maximum number of matches are counted, and because
1358
                   of this, we could immediately stop at that point and
1359
                   consider this match successful. */
1360
0
                ptr = state->ptr;
1361
1362
                /* We have not reached the maximin matches, so try to
1363
                   match once more. */
1364
0
                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1365
0
                         &pattern[3]);
1366
1367
                /* Check to see if the last attempted match
1368
                   succeeded. */
1369
0
                if (ret) {
1370
                    /* Drop the saved highest number Capture Group
1371
                       marker saved above and use the newly updated
1372
                       value. */
1373
0
                    MARK_POP_DISCARD(ctx->lastmark);
1374
0
                    RETURN_ON_ERROR(ret);
1375
1376
                    /* Success, increment the count. */
1377
0
                    ctx->count++;
1378
0
                }
1379
                /* Last attempted match failed. */
1380
0
                else {
1381
                    /* Restore the previously saved highest number
1382
                       Capture Group marker since the last iteration
1383
                       did not match, then restore that to the global
1384
                       state. */
1385
0
                    MARK_POP(ctx->lastmark);
1386
0
                    LASTMARK_RESTORE();
1387
1388
                    /* Restore the global Input Stream pointer
1389
                       since it can change after jumps. */
1390
0
                    state->ptr = ptr;
1391
1392
                    /* We have sufficient matches, so exit loop. */
1393
0
                    break;
1394
0
                }
1395
0
            }
1396
1397
            /* Restore state->repeat */
1398
0
            state->repeat = ctx->u.rep->prev;
1399
0
            repeat_pool_free(state, ctx->u.rep);
1400
1401
            /* Evaluate Tail */
1402
            /* Jump to end of pattern indicated by skip, and then skip
1403
               the SUCCESS op code that follows it. */
1404
0
            pattern += pattern[0] + 1;
1405
0
            ptr = state->ptr;
1406
0
            DISPATCH;
1407
1408
0
        TARGET(SRE_OP_ATOMIC_GROUP):
1409
            /* Atomic Group Sub Pattern */
1410
            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1411
0
            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr));
1412
1413
            /* Set the global Input pointer to this context's Input
1414
               pointer */
1415
0
            state->ptr = ptr;
1416
1417
            /* Evaluate the Atomic Group in a new context, terminating
1418
               when the end of the group, represented by a SUCCESS op
1419
               code, is reached. */
1420
            /* Group Pattern begins at an offset of 1 code. */
1421
0
            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group,
1422
0
                     &pattern[1]);
1423
1424
            /* Test Exit Condition */
1425
0
            RETURN_ON_ERROR(ret);
1426
1427
0
            if (ret == 0) {
1428
                /* Atomic Group failed to Match. */
1429
0
                state->ptr = ptr;
1430
0
                RETURN_FAILURE;
1431
0
            }
1432
1433
            /* Evaluate Tail */
1434
            /* Jump to end of pattern indicated by skip, and then skip
1435
               the SUCCESS op code that follows it. */
1436
0
            pattern += pattern[0];
1437
0
            ptr = state->ptr;
1438
0
            DISPATCH;
1439
1440
0
        TARGET(SRE_OP_GROUPREF):
1441
            /* match backreference */
1442
0
            TRACE(("|%p|%p|GROUPREF %d\n", pattern,
1443
0
                   ptr, pattern[0]));
1444
0
            {
1445
0
                int groupref = pattern[0] * 2;
1446
0
                if (groupref >= state->lastmark) {
1447
0
                    RETURN_FAILURE;
1448
0
                } else {
1449
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1450
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1451
0
                    if (!p || !e || e < p)
1452
0
                        RETURN_FAILURE;
1453
0
                    while (p < e) {
1454
0
                        if (ptr >= end || *ptr != *p)
1455
0
                            RETURN_FAILURE;
1456
0
                        p++;
1457
0
                        ptr++;
1458
0
                    }
1459
0
                }
1460
0
            }
1461
0
            pattern++;
1462
0
            DISPATCH;
1463
1464
0
        TARGET(SRE_OP_GROUPREF_IGNORE):
1465
            /* match backreference */
1466
0
            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
1467
0
                   ptr, pattern[0]));
1468
0
            {
1469
0
                int groupref = pattern[0] * 2;
1470
0
                if (groupref >= state->lastmark) {
1471
0
                    RETURN_FAILURE;
1472
0
                } else {
1473
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1474
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1475
0
                    if (!p || !e || e < p)
1476
0
                        RETURN_FAILURE;
1477
0
                    while (p < e) {
1478
0
                        if (ptr >= end ||
1479
0
                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p))
1480
0
                            RETURN_FAILURE;
1481
0
                        p++;
1482
0
                        ptr++;
1483
0
                    }
1484
0
                }
1485
0
            }
1486
0
            pattern++;
1487
0
            DISPATCH;
1488
1489
0
        TARGET(SRE_OP_GROUPREF_UNI_IGNORE):
1490
            /* match backreference */
1491
0
            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
1492
0
                   ptr, pattern[0]));
1493
0
            {
1494
0
                int groupref = pattern[0] * 2;
1495
0
                if (groupref >= state->lastmark) {
1496
0
                    RETURN_FAILURE;
1497
0
                } else {
1498
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1499
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1500
0
                    if (!p || !e || e < p)
1501
0
                        RETURN_FAILURE;
1502
0
                    while (p < e) {
1503
0
                        if (ptr >= end ||
1504
0
                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p))
1505
0
                            RETURN_FAILURE;
1506
0
                        p++;
1507
0
                        ptr++;
1508
0
                    }
1509
0
                }
1510
0
            }
1511
0
            pattern++;
1512
0
            DISPATCH;
1513
1514
0
        TARGET(SRE_OP_GROUPREF_LOC_IGNORE):
1515
            /* match backreference */
1516
0
            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
1517
0
                   ptr, pattern[0]));
1518
0
            {
1519
0
                int groupref = pattern[0] * 2;
1520
0
                if (groupref >= state->lastmark) {
1521
0
                    RETURN_FAILURE;
1522
0
                } else {
1523
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1524
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1525
0
                    if (!p || !e || e < p)
1526
0
                        RETURN_FAILURE;
1527
0
                    while (p < e) {
1528
0
                        if (ptr >= end ||
1529
0
                            sre_lower_locale(*ptr) != sre_lower_locale(*p))
1530
0
                            RETURN_FAILURE;
1531
0
                        p++;
1532
0
                        ptr++;
1533
0
                    }
1534
0
                }
1535
0
            }
1536
0
            pattern++;
1537
0
            DISPATCH;
1538
1539
0
        TARGET(SRE_OP_GROUPREF_EXISTS):
1540
0
            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
1541
0
                   ptr, pattern[0]));
1542
            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1543
0
            {
1544
0
                int groupref = pattern[0] * 2;
1545
0
                if (groupref >= state->lastmark) {
1546
0
                    pattern += pattern[1];
1547
0
                    DISPATCH;
1548
0
                } else {
1549
0
                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1550
0
                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1551
0
                    if (!p || !e || e < p) {
1552
0
                        pattern += pattern[1];
1553
0
                        DISPATCH;
1554
0
                    }
1555
0
                }
1556
0
            }
1557
0
            pattern += 2;
1558
0
            DISPATCH;
1559
1560
24.4M
        TARGET(SRE_OP_ASSERT):
1561
            /* assert subpattern */
1562
            /* <ASSERT> <skip> <back> <pattern> */
1563
24.4M
            TRACE(("|%p|%p|ASSERT %d\n", pattern,
1564
24.4M
                   ptr, pattern[1]));
1565
24.4M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1])
1566
0
                RETURN_FAILURE;
1567
24.4M
            state->ptr = ptr - pattern[1];
1568
24.4M
            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2);
1569
24.4M
            RETURN_ON_FAILURE(ret);
1570
23.1M
            pattern += pattern[0];
1571
23.1M
            DISPATCH;
1572
1573
23.1M
        TARGET(SRE_OP_ASSERT_NOT):
1574
            /* assert not subpattern */
1575
            /* <ASSERT_NOT> <skip> <back> <pattern> */
1576
13.2M
            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern,
1577
13.2M
                   ptr, pattern[1]));
1578
13.2M
            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) {
1579
13.2M
                state->ptr = ptr - pattern[1];
1580
13.2M
                LASTMARK_SAVE();
1581
13.2M
                if (state->repeat)
1582
13.2M
                    MARK_PUSH(ctx->lastmark);
1583
1584
26.4M
                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2);
1585
26.4M
                if (ret) {
1586
15.9k
                    if (state->repeat)
1587
15.9k
                        MARK_POP_DISCARD(ctx->lastmark);
1588
15.9k
                    RETURN_ON_ERROR(ret);
1589
15.9k
                    RETURN_FAILURE;
1590
15.9k
                }
1591
13.2M
                if (state->repeat)
1592
13.2M
                    MARK_POP(ctx->lastmark);
1593
13.2M
                LASTMARK_RESTORE();
1594
13.2M
            }
1595
13.2M
            pattern += pattern[0];
1596
13.2M
            DISPATCH;
1597
1598
13.2M
        TARGET(SRE_OP_FAILURE):
1599
            /* immediate failure */
1600
0
            TRACE(("|%p|%p|FAILURE\n", pattern, ptr));
1601
0
            RETURN_FAILURE;
1602
1603
#if !USE_COMPUTED_GOTOS
1604
        default:
1605
#endif
1606
        // Also any unused opcodes:
1607
0
        TARGET(SRE_OP_RANGE_UNI_IGNORE):
1608
0
        TARGET(SRE_OP_SUBPATTERN):
1609
0
        TARGET(SRE_OP_RANGE):
1610
0
        TARGET(SRE_OP_NEGATE):
1611
0
        TARGET(SRE_OP_BIGCHARSET):
1612
0
        TARGET(SRE_OP_CHARSET):
1613
0
            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr,
1614
0
                   pattern[-1]));
1615
0
            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1616
1617
0
    }
1618
1619
367M
exit:
1620
367M
    ctx_pos = ctx->last_ctx_pos;
1621
367M
    jump = ctx->jump;
1622
367M
    DATA_POP_DISCARD(ctx);
1623
367M
    if (ctx_pos == -1) {
1624
94.9M
        state->sigcount = sigcount;
1625
94.9M
        return ret;
1626
94.9M
    }
1627
272M
    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1628
1629
272M
    switch (jump) {
1630
72.0M
        case JUMP_MAX_UNTIL_2:
1631
72.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr));
1632
72.0M
            goto jump_max_until_2;
1633
35.0M
        case JUMP_MAX_UNTIL_3:
1634
35.0M
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr));
1635
35.0M
            goto jump_max_until_3;
1636
0
        case JUMP_MIN_UNTIL_2:
1637
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr));
1638
0
            goto jump_min_until_2;
1639
0
        case JUMP_MIN_UNTIL_3:
1640
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr));
1641
0
            goto jump_min_until_3;
1642
33.8M
        case JUMP_BRANCH:
1643
33.8M
            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr));
1644
33.8M
            goto jump_branch;
1645
0
        case JUMP_MAX_UNTIL_1:
1646
0
            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr));
1647
0
            goto jump_max_until_1;
1648
0
        case JUMP_MIN_UNTIL_1:
1649
0
            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr));
1650
0
            goto jump_min_until_1;
1651
0
        case JUMP_POSS_REPEAT_1:
1652
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr));
1653
0
            goto jump_poss_repeat_1;
1654
0
        case JUMP_POSS_REPEAT_2:
1655
0
            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr));
1656
0
            goto jump_poss_repeat_2;
1657
34.9M
        case JUMP_REPEAT:
1658
34.9M
            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr));
1659
34.9M
            goto jump_repeat;
1660
7.13M
        case JUMP_REPEAT_ONE_1:
1661
7.13M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr));
1662
7.13M
            goto jump_repeat_one_1;
1663
51.8M
        case JUMP_REPEAT_ONE_2:
1664
51.8M
            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr));
1665
51.8M
            goto jump_repeat_one_2;
1666
0
        case JUMP_MIN_REPEAT_ONE:
1667
0
            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr));
1668
0
            goto jump_min_repeat_one;
1669
0
        case JUMP_ATOMIC_GROUP:
1670
0
            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr));
1671
0
            goto jump_atomic_group;
1672
24.4M
        case JUMP_ASSERT:
1673
24.4M
            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr));
1674
24.4M
            goto jump_assert;
1675
13.2M
        case JUMP_ASSERT_NOT:
1676
13.2M
            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr));
1677
13.2M
            goto jump_assert_not;
1678
0
        case JUMP_NONE:
1679
0
            TRACE(("|%p|%p|RETURN %zd\n", pattern,
1680
0
                   ptr, ret));
1681
0
            break;
1682
272M
    }
1683
1684
0
    return ret; /* should never get here */
1685
272M
}
1686
1687
/* need to reset capturing groups between two SRE(match) callings in loops */
1688
#define RESET_CAPTURE_GROUP() \
1689
324M
    do { state->lastmark = state->lastindex = -1; } while (0)
1690
1691
LOCAL(Py_ssize_t)
1692
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1693
91.6M
{
1694
91.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
91.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
91.6M
    Py_ssize_t status = 0;
1697
91.6M
    Py_ssize_t prefix_len = 0;
1698
91.6M
    Py_ssize_t prefix_skip = 0;
1699
91.6M
    SRE_CODE* prefix = NULL;
1700
91.6M
    SRE_CODE* charset = NULL;
1701
91.6M
    SRE_CODE* overlap = NULL;
1702
91.6M
    int flags = 0;
1703
91.6M
    INIT_TRACE(state);
1704
1705
91.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
91.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
91.6M
        flags = pattern[2];
1713
1714
91.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
2.30M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
2.30M
                   end - ptr, (size_t) pattern[3]));
1717
2.30M
            return 0;
1718
2.30M
        }
1719
89.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
9.17M
            end -= pattern[3] - 1;
1723
9.17M
            if (end <= ptr)
1724
0
                end = ptr;
1725
9.17M
        }
1726
1727
89.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
9.17M
            prefix_len = pattern[5];
1731
9.17M
            prefix_skip = pattern[6];
1732
9.17M
            prefix = pattern + 7;
1733
9.17M
            overlap = prefix + prefix_len - 1;
1734
80.1M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
74.8M
            charset = pattern + 5;
1738
1739
89.3M
        pattern += 1 + pattern[1];
1740
89.3M
    }
1741
1742
89.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
89.3M
           prefix, prefix_len, prefix_skip));
1744
89.3M
    TRACE(("charset = %p\n", charset));
1745
1746
89.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
8.21M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
4.62M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
4.62M
#endif
1753
4.62M
        end = (SRE_CHAR *)state->end;
1754
4.62M
        state->must_advance = 0;
1755
8.78M
        while (ptr < end) {
1756
115M
            while (*ptr != c) {
1757
107M
                if (++ptr >= end)
1758
599k
                    return 0;
1759
107M
            }
1760
8.14M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
8.14M
            state->start = ptr;
1762
8.14M
            state->ptr = ptr + prefix_skip;
1763
8.14M
            if (flags & SRE_INFO_LITERAL)
1764
3.72k
                return 1; /* we got all of it */
1765
8.14M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
8.14M
            if (status != 0)
1767
7.57M
                return status;
1768
569k
            ++ptr;
1769
569k
            RESET_CAPTURE_GROUP();
1770
569k
        }
1771
38.0k
        return 0;
1772
4.62M
    }
1773
1774
81.0M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
963k
        Py_ssize_t i = 0;
1778
1779
963k
        end = (SRE_CHAR *)state->end;
1780
963k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
2.17M
        for (i = 0; i < prefix_len; i++)
1784
1.44M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
724k
#endif
1787
1.84M
        while (ptr < end) {
1788
1.84M
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
10.0M
            while (*ptr++ != c) {
1790
8.19M
                if (ptr >= end)
1791
347
                    return 0;
1792
8.19M
            }
1793
1.84M
            if (ptr >= end)
1794
63
                return 0;
1795
1796
1.84M
            i = 1;
1797
1.84M
            state->must_advance = 0;
1798
1.84M
            do {
1799
1.84M
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
1.69M
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
1.69M
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
1.69M
                    state->start = ptr - (prefix_len - 1);
1808
1.69M
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
1.69M
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
1.69M
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
1.69M
                    if (status != 0)
1813
962k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
737k
                    if (++ptr >= end)
1816
36
                        return 0;
1817
737k
                    RESET_CAPTURE_GROUP();
1818
737k
                }
1819
882k
                i = overlap[i];
1820
882k
            } while (i != 0);
1821
1.84M
        }
1822
0
        return 0;
1823
963k
    }
1824
1825
80.1M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
74.8M
        end = (SRE_CHAR *)state->end;
1828
74.8M
        state->must_advance = 0;
1829
77.4M
        for (;;) {
1830
338M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
260M
                ptr++;
1832
77.4M
            if (ptr >= end)
1833
4.26M
                return 0;
1834
73.2M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
73.2M
            state->start = ptr;
1836
73.2M
            state->ptr = ptr;
1837
73.2M
            status = SRE(match)(state, pattern, 0);
1838
73.2M
            if (status != 0)
1839
70.5M
                break;
1840
2.64M
            ptr++;
1841
2.64M
            RESET_CAPTURE_GROUP();
1842
2.64M
        }
1843
74.8M
    } else {
1844
        /* general case */
1845
5.28M
        assert(ptr <= end);
1846
5.28M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
5.28M
        state->start = state->ptr = ptr;
1848
5.28M
        status = SRE(match)(state, pattern, 1);
1849
5.28M
        state->must_advance = 0;
1850
5.28M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
52
            (pattern[1] == SRE_AT_BEGINNING ||
1852
52
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
325M
        while (status == 0 && ptr < end) {
1858
320M
            ptr++;
1859
320M
            RESET_CAPTURE_GROUP();
1860
320M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
320M
            state->start = state->ptr = ptr;
1862
320M
            status = SRE(match)(state, pattern, 0);
1863
320M
        }
1864
5.28M
    }
1865
1866
75.8M
    return status;
1867
80.1M
}
sre.c:sre_ucs1_search
Line
Count
Source
1693
35.5M
{
1694
35.5M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
35.5M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
35.5M
    Py_ssize_t status = 0;
1697
35.5M
    Py_ssize_t prefix_len = 0;
1698
35.5M
    Py_ssize_t prefix_skip = 0;
1699
35.5M
    SRE_CODE* prefix = NULL;
1700
35.5M
    SRE_CODE* charset = NULL;
1701
35.5M
    SRE_CODE* overlap = NULL;
1702
35.5M
    int flags = 0;
1703
35.5M
    INIT_TRACE(state);
1704
1705
35.5M
    if (ptr > end)
1706
0
        return 0;
1707
1708
35.5M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
35.5M
        flags = pattern[2];
1713
1714
35.5M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
2.16M
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
2.16M
                   end - ptr, (size_t) pattern[3]));
1717
2.16M
            return 0;
1718
2.16M
        }
1719
33.3M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.59M
            end -= pattern[3] - 1;
1723
2.59M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.59M
        }
1726
1727
33.3M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.59M
            prefix_len = pattern[5];
1731
2.59M
            prefix_skip = pattern[6];
1732
2.59M
            prefix = pattern + 7;
1733
2.59M
            overlap = prefix + prefix_len - 1;
1734
30.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
26.9M
            charset = pattern + 5;
1738
1739
33.3M
        pattern += 1 + pattern[1];
1740
33.3M
    }
1741
1742
33.3M
    TRACE(("prefix = %p %zd %zd\n",
1743
33.3M
           prefix, prefix_len, prefix_skip));
1744
33.3M
    TRACE(("charset = %p\n", charset));
1745
1746
33.3M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.53M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.53M
#if SIZEOF_SRE_CHAR < 4
1750
2.53M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.53M
#endif
1753
2.53M
        end = (SRE_CHAR *)state->end;
1754
2.53M
        state->must_advance = 0;
1755
2.72M
        while (ptr < end) {
1756
29.7M
            while (*ptr != c) {
1757
27.6M
                if (++ptr >= end)
1758
527k
                    return 0;
1759
27.6M
            }
1760
2.16M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.16M
            state->start = ptr;
1762
2.16M
            state->ptr = ptr + prefix_skip;
1763
2.16M
            if (flags & SRE_INFO_LITERAL)
1764
252
                return 1; /* we got all of it */
1765
2.16M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.16M
            if (status != 0)
1767
1.96M
                return status;
1768
195k
            ++ptr;
1769
195k
            RESET_CAPTURE_GROUP();
1770
195k
        }
1771
34.5k
        return 0;
1772
2.53M
    }
1773
1774
30.8M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
61.4k
        Py_ssize_t i = 0;
1778
1779
61.4k
        end = (SRE_CHAR *)state->end;
1780
61.4k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
61.4k
#if SIZEOF_SRE_CHAR < 4
1783
184k
        for (i = 0; i < prefix_len; i++)
1784
122k
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
61.4k
#endif
1787
311k
        while (ptr < end) {
1788
311k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
2.12M
            while (*ptr++ != c) {
1790
1.80M
                if (ptr >= end)
1791
71
                    return 0;
1792
1.80M
            }
1793
311k
            if (ptr >= end)
1794
26
                return 0;
1795
1796
311k
            i = 1;
1797
311k
            state->must_advance = 0;
1798
311k
            do {
1799
311k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
243k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
243k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
243k
                    state->start = ptr - (prefix_len - 1);
1808
243k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
243k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
243k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
243k
                    if (status != 0)
1813
61.2k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
181k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
181k
                    RESET_CAPTURE_GROUP();
1818
181k
                }
1819
250k
                i = overlap[i];
1820
250k
            } while (i != 0);
1821
311k
        }
1822
0
        return 0;
1823
61.4k
    }
1824
1825
30.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
26.9M
        end = (SRE_CHAR *)state->end;
1828
26.9M
        state->must_advance = 0;
1829
28.6M
        for (;;) {
1830
80.5M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
51.9M
                ptr++;
1832
28.6M
            if (ptr >= end)
1833
3.02M
                return 0;
1834
25.6M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
25.6M
            state->start = ptr;
1836
25.6M
            state->ptr = ptr;
1837
25.6M
            status = SRE(match)(state, pattern, 0);
1838
25.6M
            if (status != 0)
1839
23.8M
                break;
1840
1.74M
            ptr++;
1841
1.74M
            RESET_CAPTURE_GROUP();
1842
1.74M
        }
1843
26.9M
    } else {
1844
        /* general case */
1845
3.83M
        assert(ptr <= end);
1846
3.83M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
3.83M
        state->start = state->ptr = ptr;
1848
3.83M
        status = SRE(match)(state, pattern, 1);
1849
3.83M
        state->must_advance = 0;
1850
3.83M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
10
            (pattern[1] == SRE_AT_BEGINNING ||
1852
10
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
89.3M
        while (status == 0 && ptr < end) {
1858
85.5M
            ptr++;
1859
85.5M
            RESET_CAPTURE_GROUP();
1860
85.5M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
85.5M
            state->start = state->ptr = ptr;
1862
85.5M
            status = SRE(match)(state, pattern, 0);
1863
85.5M
        }
1864
3.83M
    }
1865
1866
27.7M
    return status;
1867
30.7M
}
sre.c:sre_ucs2_search
Line
Count
Source
1693
48.6M
{
1694
48.6M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
48.6M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
48.6M
    Py_ssize_t status = 0;
1697
48.6M
    Py_ssize_t prefix_len = 0;
1698
48.6M
    Py_ssize_t prefix_skip = 0;
1699
48.6M
    SRE_CODE* prefix = NULL;
1700
48.6M
    SRE_CODE* charset = NULL;
1701
48.6M
    SRE_CODE* overlap = NULL;
1702
48.6M
    int flags = 0;
1703
48.6M
    INIT_TRACE(state);
1704
1705
48.6M
    if (ptr > end)
1706
0
        return 0;
1707
1708
48.6M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
48.6M
        flags = pattern[2];
1713
1714
48.6M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
132k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
132k
                   end - ptr, (size_t) pattern[3]));
1717
132k
            return 0;
1718
132k
        }
1719
48.5M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
2.75M
            end -= pattern[3] - 1;
1723
2.75M
            if (end <= ptr)
1724
0
                end = ptr;
1725
2.75M
        }
1726
1727
48.5M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
2.75M
            prefix_len = pattern[5];
1731
2.75M
            prefix_skip = pattern[6];
1732
2.75M
            prefix = pattern + 7;
1733
2.75M
            overlap = prefix + prefix_len - 1;
1734
45.7M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
44.5M
            charset = pattern + 5;
1738
1739
48.5M
        pattern += 1 + pattern[1];
1740
48.5M
    }
1741
1742
48.5M
    TRACE(("prefix = %p %zd %zd\n",
1743
48.5M
           prefix, prefix_len, prefix_skip));
1744
48.5M
    TRACE(("charset = %p\n", charset));
1745
1746
48.5M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
2.09M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
2.09M
#if SIZEOF_SRE_CHAR < 4
1750
2.09M
        if ((SRE_CODE) c != prefix[0])
1751
0
            return 0; /* literal can't match: doesn't fit in char width */
1752
2.09M
#endif
1753
2.09M
        end = (SRE_CHAR *)state->end;
1754
2.09M
        state->must_advance = 0;
1755
2.27M
        while (ptr < end) {
1756
54.2M
            while (*ptr != c) {
1757
52.0M
                if (++ptr >= end)
1758
68.0k
                    return 0;
1759
52.0M
            }
1760
2.20M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
2.20M
            state->start = ptr;
1762
2.20M
            state->ptr = ptr + prefix_skip;
1763
2.20M
            if (flags & SRE_INFO_LITERAL)
1764
1.83k
                return 1; /* we got all of it */
1765
2.20M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
2.20M
            if (status != 0)
1767
2.02M
                return status;
1768
182k
            ++ptr;
1769
182k
            RESET_CAPTURE_GROUP();
1770
182k
        }
1771
2.73k
        return 0;
1772
2.09M
    }
1773
1774
46.4M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
663k
        Py_ssize_t i = 0;
1778
1779
663k
        end = (SRE_CHAR *)state->end;
1780
663k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
663k
#if SIZEOF_SRE_CHAR < 4
1783
1.99M
        for (i = 0; i < prefix_len; i++)
1784
1.32M
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
0
                return 0; /* literal can't match: doesn't fit in char width */
1786
663k
#endif
1787
987k
        while (ptr < end) {
1788
987k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
3.50M
            while (*ptr++ != c) {
1790
2.52M
                if (ptr >= end)
1791
121
                    return 0;
1792
2.52M
            }
1793
987k
            if (ptr >= end)
1794
18
                return 0;
1795
1796
987k
            i = 1;
1797
987k
            state->must_advance = 0;
1798
988k
            do {
1799
988k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
947k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
947k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
947k
                    state->start = ptr - (prefix_len - 1);
1808
947k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
947k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
947k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
947k
                    if (status != 0)
1813
663k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
284k
                    if (++ptr >= end)
1816
15
                        return 0;
1817
284k
                    RESET_CAPTURE_GROUP();
1818
284k
                }
1819
324k
                i = overlap[i];
1820
324k
            } while (i != 0);
1821
987k
        }
1822
0
        return 0;
1823
663k
    }
1824
1825
45.7M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
44.5M
        end = (SRE_CHAR *)state->end;
1828
44.5M
        state->must_advance = 0;
1829
44.9M
        for (;;) {
1830
190M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
145M
                ptr++;
1832
44.9M
            if (ptr >= end)
1833
1.19M
                return 0;
1834
43.7M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
43.7M
            state->start = ptr;
1836
43.7M
            state->ptr = ptr;
1837
43.7M
            status = SRE(match)(state, pattern, 0);
1838
43.7M
            if (status != 0)
1839
43.3M
                break;
1840
415k
            ptr++;
1841
415k
            RESET_CAPTURE_GROUP();
1842
415k
        }
1843
44.5M
    } else {
1844
        /* general case */
1845
1.21M
        assert(ptr <= end);
1846
1.21M
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
1.21M
        state->start = state->ptr = ptr;
1848
1.21M
        status = SRE(match)(state, pattern, 1);
1849
1.21M
        state->must_advance = 0;
1850
1.21M
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
20
            (pattern[1] == SRE_AT_BEGINNING ||
1852
20
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
163M
        while (status == 0 && ptr < end) {
1858
162M
            ptr++;
1859
162M
            RESET_CAPTURE_GROUP();
1860
162M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
162M
            state->start = state->ptr = ptr;
1862
162M
            status = SRE(match)(state, pattern, 0);
1863
162M
        }
1864
1.21M
    }
1865
1866
44.5M
    return status;
1867
45.7M
}
sre.c:sre_ucs4_search
Line
Count
Source
1693
7.43M
{
1694
7.43M
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1695
7.43M
    SRE_CHAR* end = (SRE_CHAR *)state->end;
1696
7.43M
    Py_ssize_t status = 0;
1697
7.43M
    Py_ssize_t prefix_len = 0;
1698
7.43M
    Py_ssize_t prefix_skip = 0;
1699
7.43M
    SRE_CODE* prefix = NULL;
1700
7.43M
    SRE_CODE* charset = NULL;
1701
7.43M
    SRE_CODE* overlap = NULL;
1702
7.43M
    int flags = 0;
1703
7.43M
    INIT_TRACE(state);
1704
1705
7.43M
    if (ptr > end)
1706
0
        return 0;
1707
1708
7.43M
    if (pattern[0] == SRE_OP_INFO) {
1709
        /* optimization info block */
1710
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1711
1712
7.43M
        flags = pattern[2];
1713
1714
7.43M
        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) {
1715
7.57k
            TRACE(("reject (got %tu chars, need %zu)\n",
1716
7.57k
                   end - ptr, (size_t) pattern[3]));
1717
7.57k
            return 0;
1718
7.57k
        }
1719
7.42M
        if (pattern[3] > 1) {
1720
            /* adjust end point (but make sure we leave at least one
1721
               character in there, so literal search will work) */
1722
3.82M
            end -= pattern[3] - 1;
1723
3.82M
            if (end <= ptr)
1724
0
                end = ptr;
1725
3.82M
        }
1726
1727
7.42M
        if (flags & SRE_INFO_PREFIX) {
1728
            /* pattern starts with a known prefix */
1729
            /* <length> <skip> <prefix data> <overlap data> */
1730
3.82M
            prefix_len = pattern[5];
1731
3.82M
            prefix_skip = pattern[6];
1732
3.82M
            prefix = pattern + 7;
1733
3.82M
            overlap = prefix + prefix_len - 1;
1734
3.82M
        } else if (flags & SRE_INFO_CHARSET)
1735
            /* pattern starts with a character from a known set */
1736
            /* <charset> */
1737
3.36M
            charset = pattern + 5;
1738
1739
7.42M
        pattern += 1 + pattern[1];
1740
7.42M
    }
1741
1742
7.42M
    TRACE(("prefix = %p %zd %zd\n",
1743
7.42M
           prefix, prefix_len, prefix_skip));
1744
7.42M
    TRACE(("charset = %p\n", charset));
1745
1746
7.42M
    if (prefix_len == 1) {
1747
        /* pattern starts with a literal character */
1748
3.59M
        SRE_CHAR c = (SRE_CHAR) prefix[0];
1749
#if SIZEOF_SRE_CHAR < 4
1750
        if ((SRE_CODE) c != prefix[0])
1751
            return 0; /* literal can't match: doesn't fit in char width */
1752
#endif
1753
3.59M
        end = (SRE_CHAR *)state->end;
1754
3.59M
        state->must_advance = 0;
1755
3.78M
        while (ptr < end) {
1756
31.3M
            while (*ptr != c) {
1757
27.5M
                if (++ptr >= end)
1758
3.95k
                    return 0;
1759
27.5M
            }
1760
3.77M
            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1761
3.77M
            state->start = ptr;
1762
3.77M
            state->ptr = ptr + prefix_skip;
1763
3.77M
            if (flags & SRE_INFO_LITERAL)
1764
1.64k
                return 1; /* we got all of it */
1765
3.77M
            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1766
3.77M
            if (status != 0)
1767
3.58M
                return status;
1768
191k
            ++ptr;
1769
191k
            RESET_CAPTURE_GROUP();
1770
191k
        }
1771
833
        return 0;
1772
3.59M
    }
1773
1774
3.83M
    if (prefix_len > 1) {
1775
        /* pattern starts with a known prefix.  use the overlap
1776
           table to skip forward as fast as we possibly can */
1777
238k
        Py_ssize_t i = 0;
1778
1779
238k
        end = (SRE_CHAR *)state->end;
1780
238k
        if (prefix_len > end - ptr)
1781
0
            return 0;
1782
#if SIZEOF_SRE_CHAR < 4
1783
        for (i = 0; i < prefix_len; i++)
1784
            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1785
                return 0; /* literal can't match: doesn't fit in char width */
1786
#endif
1787
544k
        while (ptr < end) {
1788
544k
            SRE_CHAR c = (SRE_CHAR) prefix[0];
1789
4.40M
            while (*ptr++ != c) {
1790
3.86M
                if (ptr >= end)
1791
155
                    return 0;
1792
3.86M
            }
1793
543k
            if (ptr >= end)
1794
19
                return 0;
1795
1796
543k
            i = 1;
1797
543k
            state->must_advance = 0;
1798
545k
            do {
1799
545k
                if (*ptr == (SRE_CHAR) prefix[i]) {
1800
508k
                    if (++i != prefix_len) {
1801
0
                        if (++ptr >= end)
1802
0
                            return 0;
1803
0
                        continue;
1804
0
                    }
1805
                    /* found a potential match */
1806
508k
                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1807
508k
                    state->start = ptr - (prefix_len - 1);
1808
508k
                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1809
508k
                    if (flags & SRE_INFO_LITERAL)
1810
0
                        return 1; /* we got all of it */
1811
508k
                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1812
508k
                    if (status != 0)
1813
238k
                        return status;
1814
                    /* close but no cigar -- try again */
1815
270k
                    if (++ptr >= end)
1816
6
                        return 0;
1817
270k
                    RESET_CAPTURE_GROUP();
1818
270k
                }
1819
306k
                i = overlap[i];
1820
306k
            } while (i != 0);
1821
543k
        }
1822
0
        return 0;
1823
238k
    }
1824
1825
3.59M
    if (charset) {
1826
        /* pattern starts with a character from a known set */
1827
3.36M
        end = (SRE_CHAR *)state->end;
1828
3.36M
        state->must_advance = 0;
1829
3.85M
        for (;;) {
1830
67.2M
            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1831
63.3M
                ptr++;
1832
3.85M
            if (ptr >= end)
1833
55.8k
                return 0;
1834
3.79M
            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1835
3.79M
            state->start = ptr;
1836
3.79M
            state->ptr = ptr;
1837
3.79M
            status = SRE(match)(state, pattern, 0);
1838
3.79M
            if (status != 0)
1839
3.30M
                break;
1840
491k
            ptr++;
1841
491k
            RESET_CAPTURE_GROUP();
1842
491k
        }
1843
3.36M
    } else {
1844
        /* general case */
1845
233k
        assert(ptr <= end);
1846
233k
        TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1847
233k
        state->start = state->ptr = ptr;
1848
233k
        status = SRE(match)(state, pattern, 1);
1849
233k
        state->must_advance = 0;
1850
233k
        if (status == 0 && pattern[0] == SRE_OP_AT &&
1851
22
            (pattern[1] == SRE_AT_BEGINNING ||
1852
22
             pattern[1] == SRE_AT_BEGINNING_STRING))
1853
0
        {
1854
0
            state->start = state->ptr = ptr = end;
1855
0
            return 0;
1856
0
        }
1857
72.8M
        while (status == 0 && ptr < end) {
1858
72.6M
            ptr++;
1859
72.6M
            RESET_CAPTURE_GROUP();
1860
72.6M
            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1861
72.6M
            state->start = state->ptr = ptr;
1862
72.6M
            status = SRE(match)(state, pattern, 0);
1863
72.6M
        }
1864
233k
    }
1865
1866
3.53M
    return status;
1867
3.59M
}
1868
1869
#undef SRE_CHAR
1870
#undef SIZEOF_SRE_CHAR
1871
#undef SRE
1872
1873
/* vim:ts=4:sw=4:et
1874
*/