/src/cpython/Modules/_sre/sre_lib.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Secret Labs' Regular Expression Engine |
3 | | * |
4 | | * regular expression matching engine |
5 | | * |
6 | | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
7 | | * |
8 | | * See the sre.c file for information on usage and redistribution. |
9 | | */ |
10 | | |
11 | | /* String matching engine */ |
12 | | |
13 | | /* This file is included three times, with different character settings */ |
14 | | |
15 | | LOCAL(int) |
16 | | SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) |
17 | 14.9M | { |
18 | | /* check if pointer is at given position */ |
19 | | |
20 | 14.9M | Py_ssize_t thisp, thatp; |
21 | | |
22 | 14.9M | switch (at) { |
23 | | |
24 | 7.25M | case SRE_AT_BEGINNING: |
25 | 7.25M | case SRE_AT_BEGINNING_STRING: |
26 | 7.25M | return ((void*) ptr == state->beginning); |
27 | | |
28 | 0 | case SRE_AT_BEGINNING_LINE: |
29 | 0 | return ((void*) ptr == state->beginning || |
30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); |
31 | | |
32 | 4.65M | case SRE_AT_END: |
33 | 4.65M | return (((SRE_CHAR *)state->end - ptr == 1 && |
34 | 4.65M | SRE_IS_LINEBREAK((int) ptr[0])) || |
35 | 4.65M | ((void*) ptr == state->end)); |
36 | | |
37 | 0 | case SRE_AT_END_LINE: |
38 | 0 | return ((void*) ptr == state->end || |
39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); |
40 | | |
41 | 3.02M | case SRE_AT_END_STRING: |
42 | 3.02M | return ((void*) ptr == state->end); |
43 | | |
44 | 0 | case SRE_AT_BOUNDARY: |
45 | 0 | thatp = ((void*) ptr > state->beginning) ? |
46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
47 | 0 | thisp = ((void*) ptr < state->end) ? |
48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
49 | 0 | return thisp != thatp; |
50 | | |
51 | 0 | case SRE_AT_NON_BOUNDARY: |
52 | 0 | thatp = ((void*) ptr > state->beginning) ? |
53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
54 | 0 | thisp = ((void*) ptr < state->end) ? |
55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
56 | 0 | return thisp == thatp; |
57 | | |
58 | 0 | case SRE_AT_LOC_BOUNDARY: |
59 | 0 | thatp = ((void*) ptr > state->beginning) ? |
60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
61 | 0 | thisp = ((void*) ptr < state->end) ? |
62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
63 | 0 | return thisp != thatp; |
64 | | |
65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: |
66 | 0 | thatp = ((void*) ptr > state->beginning) ? |
67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
68 | 0 | thisp = ((void*) ptr < state->end) ? |
69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
70 | 0 | return thisp == thatp; |
71 | | |
72 | 0 | case SRE_AT_UNI_BOUNDARY: |
73 | 0 | thatp = ((void*) ptr > state->beginning) ? |
74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
75 | 0 | thisp = ((void*) ptr < state->end) ? |
76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
77 | 0 | return thisp != thatp; |
78 | | |
79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: |
80 | 0 | thatp = ((void*) ptr > state->beginning) ? |
81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
82 | 0 | thisp = ((void*) ptr < state->end) ? |
83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
84 | 0 | return thisp == thatp; |
85 | | |
86 | 14.9M | } |
87 | | |
88 | 0 | return 0; |
89 | 14.9M | } Line | Count | Source | 17 | 13.5M | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 13.5M | Py_ssize_t thisp, thatp; | 21 | | | 22 | 13.5M | switch (at) { | 23 | | | 24 | 7.20M | case SRE_AT_BEGINNING: | 25 | 7.20M | case SRE_AT_BEGINNING_STRING: | 26 | 7.20M | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 4.25M | case SRE_AT_END: | 33 | 4.25M | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 4.25M | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 4.25M | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 2.11M | case SRE_AT_END_STRING: | 42 | 2.11M | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 13.5M | } | 87 | | | 88 | 0 | return 0; | 89 | 13.5M | } |
Line | Count | Source | 17 | 791k | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 791k | Py_ssize_t thisp, thatp; | 21 | | | 22 | 791k | switch (at) { | 23 | | | 24 | 46.7k | case SRE_AT_BEGINNING: | 25 | 46.7k | case SRE_AT_BEGINNING_STRING: | 26 | 46.7k | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 320k | case SRE_AT_END: | 33 | 320k | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 320k | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 320k | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 424k | case SRE_AT_END_STRING: | 42 | 424k | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 791k | } | 87 | | | 88 | 0 | return 0; | 89 | 791k | } |
Line | Count | Source | 17 | 586k | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 586k | Py_ssize_t thisp, thatp; | 21 | | | 22 | 586k | switch (at) { | 23 | | | 24 | 8.41k | case SRE_AT_BEGINNING: | 25 | 8.41k | case SRE_AT_BEGINNING_STRING: | 26 | 8.41k | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 85.8k | case SRE_AT_END: | 33 | 85.8k | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 85.8k | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 85.8k | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 492k | case SRE_AT_END_STRING: | 42 | 492k | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 586k | } | 87 | | | 88 | 0 | return 0; | 89 | 586k | } |
|
90 | | |
91 | | LOCAL(int) |
92 | | SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) |
93 | 1.76G | { |
94 | | /* check if character is a member of the given set */ |
95 | | |
96 | 1.76G | int ok = 1; |
97 | | |
98 | 3.93G | for (;;) { |
99 | 3.93G | switch (*set++) { |
100 | | |
101 | 1.15G | case SRE_OP_FAILURE: |
102 | 1.15G | return !ok; |
103 | | |
104 | 1.14G | case SRE_OP_LITERAL: |
105 | | /* <LITERAL> <code> */ |
106 | 1.14G | if (ch == set[0]) |
107 | 5.40M | return ok; |
108 | 1.13G | set++; |
109 | 1.13G | break; |
110 | | |
111 | 11.5M | case SRE_OP_CATEGORY: |
112 | | /* <CATEGORY> <code> */ |
113 | 11.5M | if (sre_category(set[0], (int) ch)) |
114 | 7.82M | return ok; |
115 | 3.73M | set++; |
116 | 3.73M | break; |
117 | | |
118 | 913M | case SRE_OP_CHARSET: |
119 | | /* <CHARSET> <bitmap> */ |
120 | 913M | if (ch < 256 && |
121 | 913M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) |
122 | 396M | return ok; |
123 | 517M | set += 256/SRE_CODE_BITS; |
124 | 517M | break; |
125 | | |
126 | 331M | case SRE_OP_RANGE: |
127 | | /* <RANGE> <lower> <upper> */ |
128 | 331M | if (set[0] <= ch && ch <= set[1]) |
129 | 196M | return ok; |
130 | 134M | set += 2; |
131 | 134M | break; |
132 | | |
133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: |
134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ |
135 | 0 | { |
136 | 0 | SRE_CODE uch; |
137 | | /* ch is already lower cased */ |
138 | 0 | if (set[0] <= ch && ch <= set[1]) |
139 | 0 | return ok; |
140 | 0 | uch = sre_upper_unicode(ch); |
141 | 0 | if (set[0] <= uch && uch <= set[1]) |
142 | 0 | return ok; |
143 | 0 | set += 2; |
144 | 0 | break; |
145 | 0 | } |
146 | | |
147 | 378M | case SRE_OP_NEGATE: |
148 | 378M | ok = !ok; |
149 | 378M | break; |
150 | | |
151 | 0 | case SRE_OP_BIGCHARSET: |
152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ |
153 | 0 | { |
154 | 0 | Py_ssize_t count, block; |
155 | 0 | count = *(set++); |
156 | |
|
157 | 0 | if (ch < 0x10000u) |
158 | 0 | block = ((unsigned char*)set)[ch >> 8]; |
159 | 0 | else |
160 | 0 | block = -1; |
161 | 0 | set += 256/sizeof(SRE_CODE); |
162 | 0 | if (block >=0 && |
163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & |
164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) |
165 | 0 | return ok; |
166 | 0 | set += count * (256/SRE_CODE_BITS); |
167 | 0 | break; |
168 | 0 | } |
169 | | |
170 | 0 | default: |
171 | | /* internal error -- there's not much we can do about it |
172 | | here, so let's just pretend it didn't match... */ |
173 | 0 | return 0; |
174 | 3.93G | } |
175 | 3.93G | } |
176 | 1.76G | } Line | Count | Source | 93 | 338M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 338M | int ok = 1; | 97 | | | 98 | 706M | for (;;) { | 99 | 706M | switch (*set++) { | 100 | | | 101 | 188M | case SRE_OP_FAILURE: | 102 | 188M | return !ok; | 103 | | | 104 | 230M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 230M | if (ch == set[0]) | 107 | 2.96M | return ok; | 108 | 227M | set++; | 109 | 227M | break; | 110 | | | 111 | 10.7M | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 10.7M | if (sre_category(set[0], (int) ch)) | 114 | 7.00M | return ok; | 115 | 3.72M | set++; | 116 | 3.72M | break; | 117 | | | 118 | 84.4M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 84.4M | if (ch < 256 && | 121 | 84.4M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 44.9M | return ok; | 123 | 39.5M | set += 256/SRE_CODE_BITS; | 124 | 39.5M | break; | 125 | | | 126 | 157M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 157M | if (set[0] <= ch && ch <= set[1]) | 129 | 95.5M | return ok; | 130 | 62.2M | set += 2; | 131 | 62.2M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 35.1M | case SRE_OP_NEGATE: | 148 | 35.1M | ok = !ok; | 149 | 35.1M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 706M | } | 175 | 706M | } | 176 | 338M | } |
Line | Count | Source | 93 | 793M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 793M | int ok = 1; | 97 | | | 98 | 1.86G | for (;;) { | 99 | 1.86G | switch (*set++) { | 100 | | | 101 | 567M | case SRE_OP_FAILURE: | 102 | 567M | return !ok; | 103 | | | 104 | 628M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 628M | if (ch == set[0]) | 107 | 1.42M | return ok; | 108 | 627M | set++; | 109 | 627M | break; | 110 | | | 111 | 176k | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 176k | if (sre_category(set[0], (int) ch)) | 114 | 169k | return ok; | 115 | 6.73k | set++; | 116 | 6.73k | break; | 117 | | | 118 | 357M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 357M | if (ch < 256 && | 121 | 357M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 133M | return ok; | 123 | 224M | set += 256/SRE_CODE_BITS; | 124 | 224M | break; | 125 | | | 126 | 150M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 150M | if (set[0] <= ch && ch <= set[1]) | 129 | 91.6M | return ok; | 130 | 59.2M | set += 2; | 131 | 59.2M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 164M | case SRE_OP_NEGATE: | 148 | 164M | ok = !ok; | 149 | 164M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 1.86G | } | 175 | 1.86G | } | 176 | 793M | } |
Line | Count | Source | 93 | 629M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 629M | int ok = 1; | 97 | | | 98 | 1.35G | for (;;) { | 99 | 1.35G | switch (*set++) { | 100 | | | 101 | 400M | case SRE_OP_FAILURE: | 102 | 400M | return !ok; | 103 | | | 104 | 281M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 281M | if (ch == set[0]) | 107 | 1.00M | return ok; | 108 | 280M | set++; | 109 | 280M | break; | 110 | | | 111 | 651k | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 651k | if (sre_category(set[0], (int) ch)) | 114 | 650k | return ok; | 115 | 855 | set++; | 116 | 855 | break; | 117 | | | 118 | 471M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 471M | if (ch < 256 && | 121 | 471M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 218M | return ok; | 123 | 253M | set += 256/SRE_CODE_BITS; | 124 | 253M | break; | 125 | | | 126 | 23.2M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 23.2M | if (set[0] <= ch && ch <= set[1]) | 129 | 9.82M | return ok; | 130 | 13.3M | set += 2; | 131 | 13.3M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 178M | case SRE_OP_NEGATE: | 148 | 178M | ok = !ok; | 149 | 178M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 1.35G | } | 175 | 1.35G | } | 176 | 629M | } |
|
177 | | |
178 | | LOCAL(int) |
179 | | SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) |
180 | 0 | { |
181 | 0 | SRE_CODE lo, up; |
182 | 0 | lo = sre_lower_locale(ch); |
183 | 0 | if (SRE(charset)(state, set, lo)) |
184 | 0 | return 1; |
185 | | |
186 | 0 | up = sre_upper_locale(ch); |
187 | 0 | return up != lo && SRE(charset)(state, set, up); |
188 | 0 | } Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore |
189 | | |
190 | | LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); |
191 | | |
192 | | LOCAL(Py_ssize_t) |
193 | | SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) |
194 | 706M | { |
195 | 706M | SRE_CODE chr; |
196 | 706M | SRE_CHAR c; |
197 | 706M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; |
198 | 706M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; |
199 | 706M | Py_ssize_t i; |
200 | 706M | INIT_TRACE(state); |
201 | | |
202 | | /* adjust end */ |
203 | 706M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) |
204 | 14.5M | end = ptr + maxcount; |
205 | | |
206 | 706M | switch (pattern[0]) { |
207 | | |
208 | 624M | case SRE_OP_IN: |
209 | | /* repeated set */ |
210 | 624M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); |
211 | 998M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) |
212 | 373M | ptr++; |
213 | 624M | break; |
214 | | |
215 | 0 | case SRE_OP_ANY: |
216 | | /* repeated dot wildcard. */ |
217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); |
218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) |
219 | 0 | ptr++; |
220 | 0 | break; |
221 | | |
222 | 0 | case SRE_OP_ANY_ALL: |
223 | | /* repeated dot wildcard. skip to the end of the target |
224 | | string, and backtrack from there */ |
225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); |
226 | 0 | ptr = end; |
227 | 0 | break; |
228 | | |
229 | 74.2M | case SRE_OP_LITERAL: |
230 | | /* repeated literal */ |
231 | 74.2M | chr = pattern[1]; |
232 | 74.2M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); |
233 | 74.2M | c = (SRE_CHAR) chr; |
234 | | #if SIZEOF_SRE_CHAR < 4 |
235 | 71.3M | if ((SRE_CODE) c != chr) |
236 | 0 | ; /* literal can't match: doesn't fit in char width */ |
237 | 71.3M | else |
238 | 71.3M | #endif |
239 | 78.9M | while (ptr < end && *ptr == c) |
240 | 4.68M | ptr++; |
241 | 74.2M | break; |
242 | | |
243 | 0 | case SRE_OP_LITERAL_IGNORE: |
244 | | /* repeated literal */ |
245 | 0 | chr = pattern[1]; |
246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) |
248 | 0 | ptr++; |
249 | 0 | break; |
250 | | |
251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: |
252 | | /* repeated literal */ |
253 | 0 | chr = pattern[1]; |
254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) |
256 | 0 | ptr++; |
257 | 0 | break; |
258 | | |
259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: |
260 | | /* repeated literal */ |
261 | 0 | chr = pattern[1]; |
262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) |
264 | 0 | ptr++; |
265 | 0 | break; |
266 | | |
267 | 7.48M | case SRE_OP_NOT_LITERAL: |
268 | | /* repeated non-literal */ |
269 | 7.48M | chr = pattern[1]; |
270 | 7.48M | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); |
271 | 7.48M | c = (SRE_CHAR) chr; |
272 | | #if SIZEOF_SRE_CHAR < 4 |
273 | 3.84M | if ((SRE_CODE) c != chr) |
274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ |
275 | 3.84M | else |
276 | 3.84M | #endif |
277 | 42.1M | while (ptr < end && *ptr != c) |
278 | 34.6M | ptr++; |
279 | 7.48M | break; |
280 | | |
281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: |
282 | | /* repeated non-literal */ |
283 | 0 | chr = pattern[1]; |
284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) |
286 | 0 | ptr++; |
287 | 0 | break; |
288 | | |
289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: |
290 | | /* repeated non-literal */ |
291 | 0 | chr = pattern[1]; |
292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) |
294 | 0 | ptr++; |
295 | 0 | break; |
296 | | |
297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
298 | | /* repeated non-literal */ |
299 | 0 | chr = pattern[1]; |
300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) |
302 | 0 | ptr++; |
303 | 0 | break; |
304 | | |
305 | 0 | default: |
306 | | /* repeated single character pattern */ |
307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
308 | 0 | while ((SRE_CHAR*) state->ptr < end) { |
309 | 0 | i = SRE(match)(state, pattern, 0); |
310 | 0 | if (i < 0) |
311 | 0 | return i; |
312 | 0 | if (!i) |
313 | 0 | break; |
314 | 0 | } |
315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, |
316 | 0 | (SRE_CHAR*) state->ptr - ptr)); |
317 | 0 | return (SRE_CHAR*) state->ptr - ptr; |
318 | 706M | } |
319 | | |
320 | 706M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, |
321 | 706M | ptr - (SRE_CHAR*) state->ptr)); |
322 | 706M | return ptr - (SRE_CHAR*) state->ptr; |
323 | 706M | } Line | Count | Source | 194 | 172M | { | 195 | 172M | SRE_CODE chr; | 196 | 172M | SRE_CHAR c; | 197 | 172M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 172M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 172M | Py_ssize_t i; | 200 | 172M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 172M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 3.49M | end = ptr + maxcount; | 205 | | | 206 | 172M | switch (pattern[0]) { | 207 | | | 208 | 107M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 107M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 218M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 110M | ptr++; | 213 | 107M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 65.2M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 65.2M | chr = pattern[1]; | 232 | 65.2M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 65.2M | c = (SRE_CHAR) chr; | 234 | 65.2M | #if SIZEOF_SRE_CHAR < 4 | 235 | 65.2M | if ((SRE_CODE) c != chr) | 236 | 0 | ; /* literal can't match: doesn't fit in char width */ | 237 | 65.2M | else | 238 | 65.2M | #endif | 239 | 67.5M | while (ptr < end && *ptr == c) | 240 | 2.34M | ptr++; | 241 | 65.2M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 169k | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 169k | chr = pattern[1]; | 270 | 169k | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 169k | c = (SRE_CHAR) chr; | 272 | 169k | #if SIZEOF_SRE_CHAR < 4 | 273 | 169k | if ((SRE_CODE) c != chr) | 274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | 169k | else | 276 | 169k | #endif | 277 | 5.89M | while (ptr < end && *ptr != c) | 278 | 5.72M | ptr++; | 279 | 169k | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 172M | } | 319 | | | 320 | 172M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 172M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 172M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 172M | } |
Line | Count | Source | 194 | 308M | { | 195 | 308M | SRE_CODE chr; | 196 | 308M | SRE_CHAR c; | 197 | 308M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 308M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 308M | Py_ssize_t i; | 200 | 308M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 308M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 5.08M | end = ptr + maxcount; | 205 | | | 206 | 308M | switch (pattern[0]) { | 207 | | | 208 | 298M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 298M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 424M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 126M | ptr++; | 213 | 298M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 6.17M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 6.17M | chr = pattern[1]; | 232 | 6.17M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 6.17M | c = (SRE_CHAR) chr; | 234 | 6.17M | #if SIZEOF_SRE_CHAR < 4 | 235 | 6.17M | if ((SRE_CODE) c != chr) | 236 | 0 | ; /* literal can't match: doesn't fit in char width */ | 237 | 6.17M | else | 238 | 6.17M | #endif | 239 | 8.27M | while (ptr < end && *ptr == c) | 240 | 2.10M | ptr++; | 241 | 6.17M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 3.67M | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 3.67M | chr = pattern[1]; | 270 | 3.67M | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 3.67M | c = (SRE_CHAR) chr; | 272 | 3.67M | #if SIZEOF_SRE_CHAR < 4 | 273 | 3.67M | if ((SRE_CODE) c != chr) | 274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | 3.67M | else | 276 | 3.67M | #endif | 277 | 11.5M | while (ptr < end && *ptr != c) | 278 | 7.91M | ptr++; | 279 | 3.67M | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 308M | } | 319 | | | 320 | 308M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 308M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 308M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 308M | } |
Line | Count | Source | 194 | 225M | { | 195 | 225M | SRE_CODE chr; | 196 | 225M | SRE_CHAR c; | 197 | 225M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 225M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 225M | Py_ssize_t i; | 200 | 225M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 225M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 5.92M | end = ptr + maxcount; | 205 | | | 206 | 225M | switch (pattern[0]) { | 207 | | | 208 | 219M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 219M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 355M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 136M | ptr++; | 213 | 219M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 2.87M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 2.87M | chr = pattern[1]; | 232 | 2.87M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 2.87M | c = (SRE_CHAR) chr; | 234 | | #if SIZEOF_SRE_CHAR < 4 | 235 | | if ((SRE_CODE) c != chr) | 236 | | ; /* literal can't match: doesn't fit in char width */ | 237 | | else | 238 | | #endif | 239 | 3.10M | while (ptr < end && *ptr == c) | 240 | 235k | ptr++; | 241 | 2.87M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 3.63M | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 3.63M | chr = pattern[1]; | 270 | 3.63M | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 3.63M | c = (SRE_CHAR) chr; | 272 | | #if SIZEOF_SRE_CHAR < 4 | 273 | | if ((SRE_CODE) c != chr) | 274 | | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | | else | 276 | | #endif | 277 | 24.6M | while (ptr < end && *ptr != c) | 278 | 21.0M | ptr++; | 279 | 3.63M | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 225M | } | 319 | | | 320 | 225M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 225M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 225M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 225M | } |
|
324 | | |
325 | | /* The macros below should be used to protect recursive SRE(match)() |
326 | | * calls that *failed* and do *not* return immediately (IOW, those |
327 | | * that will backtrack). Explaining: |
328 | | * |
329 | | * - Recursive SRE(match)() returned true: that's usually a success |
330 | | * (besides atypical cases like ASSERT_NOT), therefore there's no |
331 | | * reason to restore lastmark; |
332 | | * |
333 | | * - Recursive SRE(match)() returned false but the current SRE(match)() |
334 | | * is returning to the caller: If the current SRE(match)() is the |
335 | | * top function of the recursion, returning false will be a matching |
336 | | * failure, and it doesn't matter where lastmark is pointing to. |
337 | | * If it's *not* the top function, it will be a recursive SRE(match)() |
338 | | * failure by itself, and the calling SRE(match)() will have to deal |
339 | | * with the failure by the same rules explained here (it will restore |
340 | | * lastmark by itself if necessary); |
341 | | * |
342 | | * - Recursive SRE(match)() returned false, and will continue the |
343 | | * outside 'for' loop: must be protected when breaking, since the next |
344 | | * OP could potentially depend on lastmark; |
345 | | * |
346 | | * - Recursive SRE(match)() returned false, and will be called again |
347 | | * inside a local for/while loop: must be protected between each |
348 | | * loop iteration, since the recursive SRE(match)() could do anything, |
349 | | * and could potentially depend on lastmark. |
350 | | * |
351 | | * For more information, check the discussion at SF patch #712900. |
352 | | */ |
353 | | #define LASTMARK_SAVE() \ |
354 | 782M | do { \ |
355 | 782M | ctx->lastmark = state->lastmark; \ |
356 | 782M | ctx->lastindex = state->lastindex; \ |
357 | 782M | } while (0) |
358 | | #define LASTMARK_RESTORE() \ |
359 | 262M | do { \ |
360 | 262M | state->lastmark = ctx->lastmark; \ |
361 | 262M | state->lastindex = ctx->lastindex; \ |
362 | 262M | } while (0) |
363 | | |
364 | | #define LAST_PTR_PUSH() \ |
365 | 266M | do { \ |
366 | 266M | TRACE(("push last_ptr: %zd", \ |
367 | 266M | PTR_TO_INDEX(ctx->u.rep->last_ptr))); \ |
368 | 266M | DATA_PUSH(&ctx->u.rep->last_ptr); \ |
369 | 266M | } while (0) |
370 | | #define LAST_PTR_POP() \ |
371 | 266M | do { \ |
372 | 266M | DATA_POP(&ctx->u.rep->last_ptr); \ |
373 | 266M | TRACE(("pop last_ptr: %zd", \ |
374 | 266M | PTR_TO_INDEX(ctx->u.rep->last_ptr))); \ |
375 | 266M | } while (0) |
376 | | |
377 | 0 | #define RETURN_ERROR(i) do { return i; } while(0) |
378 | 662M | #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) |
379 | 974M | #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) |
380 | | |
381 | | #define RETURN_ON_ERROR(i) \ |
382 | 1.53G | do { if (i < 0) RETURN_ERROR(i); } while (0) |
383 | | #define RETURN_ON_SUCCESS(i) \ |
384 | 145M | do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) |
385 | | #define RETURN_ON_FAILURE(i) \ |
386 | 97.4M | do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) |
387 | | |
388 | 1.63G | #define DATA_STACK_ALLOC(state, type, ptr) \ |
389 | 1.63G | do { \ |
390 | 1.63G | alloc_pos = state->data_stack_base; \ |
391 | 1.63G | TRACE(("allocating %s in %zd (%zd)\n", \ |
392 | 1.63G | Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ |
393 | 1.63G | if (sizeof(type) > state->data_stack_size - alloc_pos) { \ |
394 | 171M | int j = data_stack_grow(state, sizeof(type)); \ |
395 | 171M | if (j < 0) return j; \ |
396 | 171M | if (ctx_pos != -1) \ |
397 | 171M | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
398 | 171M | } \ |
399 | 1.63G | ptr = (type*)(state->data_stack+alloc_pos); \ |
400 | 1.63G | state->data_stack_base += sizeof(type); \ |
401 | 1.63G | } while (0) |
402 | | |
403 | 1.79G | #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ |
404 | 1.79G | do { \ |
405 | 1.79G | TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ |
406 | 1.79G | ptr = (type*)(state->data_stack+pos); \ |
407 | 1.79G | } while (0) |
408 | | |
409 | 658M | #define DATA_STACK_PUSH(state, data, size) \ |
410 | 658M | do { \ |
411 | 658M | TRACE(("copy data in %p to %zd (%zd)\n", \ |
412 | 658M | data, state->data_stack_base, size)); \ |
413 | 658M | if (size > state->data_stack_size - state->data_stack_base) { \ |
414 | 91.0k | int j = data_stack_grow(state, size); \ |
415 | 91.0k | if (j < 0) return j; \ |
416 | 91.0k | if (ctx_pos != -1) \ |
417 | 91.0k | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
418 | 91.0k | } \ |
419 | 658M | memcpy(state->data_stack+state->data_stack_base, data, size); \ |
420 | 658M | state->data_stack_base += size; \ |
421 | 658M | } while (0) |
422 | | |
423 | | /* We add an explicit cast to memcpy here because MSVC has a bug when |
424 | | compiling C code where it believes that `const void**` cannot be |
425 | | safely casted to `void*`, see bpo-39943 for details. */ |
426 | 398M | #define DATA_STACK_POP(state, data, size, discard) \ |
427 | 398M | do { \ |
428 | 398M | TRACE(("copy data to %p from %zd (%zd)\n", \ |
429 | 398M | data, state->data_stack_base-size, size)); \ |
430 | 398M | memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ |
431 | 398M | if (discard) \ |
432 | 398M | state->data_stack_base -= size; \ |
433 | 398M | } while (0) |
434 | | |
435 | 1.89G | #define DATA_STACK_POP_DISCARD(state, size) \ |
436 | 1.89G | do { \ |
437 | 1.89G | TRACE(("discard data from %zd (%zd)\n", \ |
438 | 1.89G | state->data_stack_base-size, size)); \ |
439 | 1.89G | state->data_stack_base -= size; \ |
440 | 1.89G | } while(0) |
441 | | |
442 | | #define DATA_PUSH(x) \ |
443 | 266M | DATA_STACK_PUSH(state, (x), sizeof(*(x))) |
444 | | #define DATA_POP(x) \ |
445 | 266M | DATA_STACK_POP(state, (x), sizeof(*(x)), 1) |
446 | | #define DATA_POP_DISCARD(x) \ |
447 | 1.63G | DATA_STACK_POP_DISCARD(state, sizeof(*(x))) |
448 | | #define DATA_ALLOC(t,p) \ |
449 | 1.63G | DATA_STACK_ALLOC(state, t, p) |
450 | | #define DATA_LOOKUP_AT(t,p,pos) \ |
451 | 1.79G | DATA_STACK_LOOKUP_AT(state,t,p,pos) |
452 | | |
453 | | #define PTR_TO_INDEX(ptr) \ |
454 | | ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1) |
455 | | |
456 | | #if VERBOSE |
457 | | # define MARK_TRACE(label, lastmark) \ |
458 | | do if (DO_TRACE) { \ |
459 | | TRACE(("%s %d marks:", (label), (lastmark)+1)); \ |
460 | | for (int j = 0; j <= (lastmark); j++) { \ |
461 | | if (j && (j & 1) == 0) { \ |
462 | | TRACE((" ")); \ |
463 | | } \ |
464 | | TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \ |
465 | | } \ |
466 | | TRACE(("\n")); \ |
467 | | } while (0) |
468 | | #else |
469 | | # define MARK_TRACE(label, lastmark) |
470 | | #endif |
471 | | #define MARK_PUSH(lastmark) \ |
472 | 651M | do if (lastmark >= 0) { \ |
473 | 392M | MARK_TRACE("push", (lastmark)); \ |
474 | 392M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
475 | 392M | DATA_STACK_PUSH(state, state->mark, _marks_size); \ |
476 | 651M | } while (0) |
477 | | #define MARK_POP(lastmark) \ |
478 | 176M | do if (lastmark >= 0) { \ |
479 | 130M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
480 | 130M | DATA_STACK_POP(state, state->mark, _marks_size, 1); \ |
481 | 130M | MARK_TRACE("pop", (lastmark)); \ |
482 | 176M | } while (0) |
483 | | #define MARK_POP_KEEP(lastmark) \ |
484 | 2.03M | do if (lastmark >= 0) { \ |
485 | 2.03M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
486 | 2.03M | DATA_STACK_POP(state, state->mark, _marks_size, 0); \ |
487 | 2.03M | MARK_TRACE("pop keep", (lastmark)); \ |
488 | 2.03M | } while (0) |
489 | | #define MARK_POP_DISCARD(lastmark) \ |
490 | 475M | do if (lastmark >= 0) { \ |
491 | 262M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
492 | 262M | DATA_STACK_POP_DISCARD(state, _marks_size); \ |
493 | 262M | MARK_TRACE("pop discard", (lastmark)); \ |
494 | 475M | } while (0) |
495 | | |
496 | 546M | #define JUMP_NONE 0 |
497 | 0 | #define JUMP_MAX_UNTIL_1 1 |
498 | 266M | #define JUMP_MAX_UNTIL_2 2 |
499 | 145M | #define JUMP_MAX_UNTIL_3 3 |
500 | 0 | #define JUMP_MIN_UNTIL_1 4 |
501 | 0 | #define JUMP_MIN_UNTIL_2 5 |
502 | 0 | #define JUMP_MIN_UNTIL_3 6 |
503 | 144M | #define JUMP_REPEAT 7 |
504 | 18.0M | #define JUMP_REPEAT_ONE_1 8 |
505 | 191M | #define JUMP_REPEAT_ONE_2 9 |
506 | 0 | #define JUMP_MIN_REPEAT_ONE 10 |
507 | 182M | #define JUMP_BRANCH 11 |
508 | 97.4M | #define JUMP_ASSERT 12 |
509 | 43.6M | #define JUMP_ASSERT_NOT 13 |
510 | 0 | #define JUMP_POSS_REPEAT_1 14 |
511 | 0 | #define JUMP_POSS_REPEAT_2 15 |
512 | 0 | #define JUMP_ATOMIC_GROUP 16 |
513 | | |
514 | | #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ |
515 | 1.09G | ctx->pattern = pattern; \ |
516 | 1.09G | ctx->ptr = ptr; \ |
517 | 1.09G | DATA_ALLOC(SRE(match_context), nextctx); \ |
518 | 1.09G | nextctx->pattern = nextpattern; \ |
519 | 1.09G | nextctx->toplevel = toplevel_; \ |
520 | 1.09G | nextctx->jump = jumpvalue; \ |
521 | 1.09G | nextctx->last_ctx_pos = ctx_pos; \ |
522 | 1.09G | pattern = nextpattern; \ |
523 | 1.09G | ctx_pos = alloc_pos; \ |
524 | 1.09G | ctx = nextctx; \ |
525 | 1.09G | goto entrance; \ |
526 | 1.09G | jumplabel: \ |
527 | 1.09G | pattern = ctx->pattern; \ |
528 | 1.09G | ptr = ctx->ptr; |
529 | | |
530 | | #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ |
531 | 949M | DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) |
532 | | |
533 | | #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ |
534 | 141M | DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) |
535 | | |
536 | | typedef struct { |
537 | | Py_ssize_t count; |
538 | | union { |
539 | | SRE_CODE chr; |
540 | | SRE_REPEAT* rep; |
541 | | } u; |
542 | | int lastmark; |
543 | | int lastindex; |
544 | | const SRE_CODE* pattern; |
545 | | const SRE_CHAR* ptr; |
546 | | int toplevel; |
547 | | int jump; |
548 | | Py_ssize_t last_ctx_pos; |
549 | | } SRE(match_context); |
550 | | |
551 | | #define _MAYBE_CHECK_SIGNALS \ |
552 | 2.86G | do { \ |
553 | 2.86G | if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \ |
554 | 0 | RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ |
555 | 0 | } \ |
556 | 2.86G | } while (0) |
557 | | |
558 | | #ifdef Py_DEBUG |
559 | | # define MAYBE_CHECK_SIGNALS \ |
560 | | do { \ |
561 | | _MAYBE_CHECK_SIGNALS; \ |
562 | | if (state->fail_after_count >= 0) { \ |
563 | | if (state->fail_after_count-- == 0) { \ |
564 | | PyErr_SetNone(state->fail_after_exc); \ |
565 | | RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ |
566 | | } \ |
567 | | } \ |
568 | | } while (0) |
569 | | #else |
570 | 2.86G | # define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS |
571 | | #endif /* Py_DEBUG */ |
572 | | |
573 | | #ifdef HAVE_COMPUTED_GOTOS |
574 | | #ifndef USE_COMPUTED_GOTOS |
575 | | #define USE_COMPUTED_GOTOS 1 |
576 | | #endif |
577 | | #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS |
578 | | #error "Computed gotos are not supported on this compiler." |
579 | | #else |
580 | | #undef USE_COMPUTED_GOTOS |
581 | | #define USE_COMPUTED_GOTOS 0 |
582 | | #endif |
583 | | |
584 | | #if USE_COMPUTED_GOTOS |
585 | 2.96G | #define TARGET(OP) TARGET_ ## OP |
586 | | #define DISPATCH \ |
587 | 2.86G | do { \ |
588 | 2.86G | MAYBE_CHECK_SIGNALS; \ |
589 | 2.86G | goto *sre_targets[*pattern++]; \ |
590 | 2.86G | } while (0) |
591 | | #else |
592 | | #define TARGET(OP) case OP |
593 | | #define DISPATCH goto dispatch |
594 | | #endif |
595 | | |
596 | | /* check if string matches the given pattern. returns <0 for |
597 | | error, 0 for failure, and 1 for success */ |
598 | | LOCAL(Py_ssize_t) |
599 | | SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) |
600 | 546M | { |
601 | 546M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; |
602 | 546M | Py_ssize_t alloc_pos, ctx_pos = -1; |
603 | 546M | Py_ssize_t ret = 0; |
604 | 546M | int jump; |
605 | 546M | unsigned int sigcount = state->sigcount; |
606 | | |
607 | 546M | SRE(match_context)* ctx; |
608 | 546M | SRE(match_context)* nextctx; |
609 | 546M | INIT_TRACE(state); |
610 | | |
611 | 546M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); |
612 | | |
613 | 546M | DATA_ALLOC(SRE(match_context), ctx); |
614 | 546M | ctx->last_ctx_pos = -1; |
615 | 546M | ctx->jump = JUMP_NONE; |
616 | 546M | ctx->toplevel = toplevel; |
617 | 546M | ctx_pos = alloc_pos; |
618 | | |
619 | 546M | #if USE_COMPUTED_GOTOS |
620 | 546M | #include "sre_targets.h" |
621 | 546M | #endif |
622 | | |
623 | 1.63G | entrance: |
624 | | |
625 | 1.63G | ; // Fashion statement. |
626 | 1.63G | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; |
627 | | |
628 | 1.63G | if (pattern[0] == SRE_OP_INFO) { |
629 | | /* optimization info block */ |
630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ |
631 | 89.9M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { |
632 | 7.74M | TRACE(("reject (got %tu chars, need %zu)\n", |
633 | 7.74M | end - ptr, (size_t) pattern[3])); |
634 | 7.74M | RETURN_FAILURE; |
635 | 7.74M | } |
636 | 82.2M | pattern += pattern[1] + 1; |
637 | 82.2M | } |
638 | | |
639 | 1.62G | #if USE_COMPUTED_GOTOS |
640 | 1.62G | DISPATCH; |
641 | | #else |
642 | | dispatch: |
643 | | MAYBE_CHECK_SIGNALS; |
644 | | switch (*pattern++) |
645 | | #endif |
646 | 1.62G | { |
647 | | |
648 | 1.62G | TARGET(SRE_OP_MARK): |
649 | | /* set mark */ |
650 | | /* <MARK> <gid> */ |
651 | 577M | TRACE(("|%p|%p|MARK %d\n", pattern, |
652 | 577M | ptr, pattern[0])); |
653 | 577M | { |
654 | 577M | int i = pattern[0]; |
655 | 577M | if (i & 1) |
656 | 82.4M | state->lastindex = i/2 + 1; |
657 | 577M | if (i > state->lastmark) { |
658 | | /* state->lastmark is the highest valid index in the |
659 | | state->mark array. If it is increased by more than 1, |
660 | | the intervening marks must be set to NULL to signal |
661 | | that these marks have not been encountered. */ |
662 | 569M | int j = state->lastmark + 1; |
663 | 577M | while (j < i) |
664 | 7.67M | state->mark[j++] = NULL; |
665 | 569M | state->lastmark = i; |
666 | 569M | } |
667 | 577M | state->mark[i] = ptr; |
668 | 577M | } |
669 | 577M | pattern++; |
670 | 577M | DISPATCH; |
671 | | |
672 | 577M | TARGET(SRE_OP_LITERAL): |
673 | | /* match literal string */ |
674 | | /* <LITERAL> <code> */ |
675 | 163M | TRACE(("|%p|%p|LITERAL %d\n", pattern, |
676 | 163M | ptr, *pattern)); |
677 | 163M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) |
678 | 57.5M | RETURN_FAILURE; |
679 | 105M | pattern++; |
680 | 105M | ptr++; |
681 | 105M | DISPATCH; |
682 | | |
683 | 105M | TARGET(SRE_OP_NOT_LITERAL): |
684 | | /* match anything that is not literal character */ |
685 | | /* <NOT_LITERAL> <code> */ |
686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, |
687 | 0 | ptr, *pattern)); |
688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) |
689 | 0 | RETURN_FAILURE; |
690 | 0 | pattern++; |
691 | 0 | ptr++; |
692 | 0 | DISPATCH; |
693 | | |
694 | 244M | TARGET(SRE_OP_SUCCESS): |
695 | | /* end of pattern */ |
696 | 244M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); |
697 | 244M | if (ctx->toplevel && |
698 | 244M | ((state->match_all && ptr != state->end) || |
699 | 70.6M | (state->must_advance && ptr == state->start))) |
700 | 0 | { |
701 | 0 | RETURN_FAILURE; |
702 | 0 | } |
703 | 244M | state->ptr = ptr; |
704 | 244M | RETURN_SUCCESS; |
705 | | |
706 | 14.9M | TARGET(SRE_OP_AT): |
707 | | /* match at given position */ |
708 | | /* <AT> <code> */ |
709 | 14.9M | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); |
710 | 14.9M | if (!SRE(at)(state, ptr, *pattern)) |
711 | 4.12M | RETURN_FAILURE; |
712 | 10.8M | pattern++; |
713 | 10.8M | DISPATCH; |
714 | | |
715 | 10.8M | TARGET(SRE_OP_CATEGORY): |
716 | | /* match at given category */ |
717 | | /* <CATEGORY> <code> */ |
718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, |
719 | 0 | ptr, *pattern)); |
720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) |
721 | 0 | RETURN_FAILURE; |
722 | 0 | pattern++; |
723 | 0 | ptr++; |
724 | 0 | DISPATCH; |
725 | | |
726 | 0 | TARGET(SRE_OP_ANY): |
727 | | /* match anything (except a newline) */ |
728 | | /* <ANY> */ |
729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); |
730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) |
731 | 0 | RETURN_FAILURE; |
732 | 0 | ptr++; |
733 | 0 | DISPATCH; |
734 | | |
735 | 0 | TARGET(SRE_OP_ANY_ALL): |
736 | | /* match anything */ |
737 | | /* <ANY_ALL> */ |
738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); |
739 | 0 | if (ptr >= end) |
740 | 0 | RETURN_FAILURE; |
741 | 0 | ptr++; |
742 | 0 | DISPATCH; |
743 | | |
744 | 301M | TARGET(SRE_OP_IN): |
745 | | /* match set member (or non_member) */ |
746 | | /* <IN> <skip> <set> */ |
747 | 301M | TRACE(("|%p|%p|IN\n", pattern, ptr)); |
748 | 301M | if (ptr >= end || |
749 | 301M | !SRE(charset)(state, pattern + 1, *ptr)) |
750 | 7.57M | RETURN_FAILURE; |
751 | 294M | pattern += pattern[0]; |
752 | 294M | ptr++; |
753 | 294M | DISPATCH; |
754 | | |
755 | 294M | TARGET(SRE_OP_LITERAL_IGNORE): |
756 | 6.13M | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
757 | 6.13M | pattern, ptr, pattern[0])); |
758 | 6.13M | if (ptr >= end || |
759 | 6.13M | sre_lower_ascii(*ptr) != *pattern) |
760 | 349k | RETURN_FAILURE; |
761 | 5.78M | pattern++; |
762 | 5.78M | ptr++; |
763 | 5.78M | DISPATCH; |
764 | | |
765 | 5.78M | TARGET(SRE_OP_LITERAL_UNI_IGNORE): |
766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", |
767 | 0 | pattern, ptr, pattern[0])); |
768 | 0 | if (ptr >= end || |
769 | 0 | sre_lower_unicode(*ptr) != *pattern) |
770 | 0 | RETURN_FAILURE; |
771 | 0 | pattern++; |
772 | 0 | ptr++; |
773 | 0 | DISPATCH; |
774 | | |
775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): |
776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", |
777 | 0 | pattern, ptr, pattern[0])); |
778 | 0 | if (ptr >= end |
779 | 0 | || !char_loc_ignore(*pattern, *ptr)) |
780 | 0 | RETURN_FAILURE; |
781 | 0 | pattern++; |
782 | 0 | ptr++; |
783 | 0 | DISPATCH; |
784 | | |
785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): |
786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
787 | 0 | pattern, ptr, *pattern)); |
788 | 0 | if (ptr >= end || |
789 | 0 | sre_lower_ascii(*ptr) == *pattern) |
790 | 0 | RETURN_FAILURE; |
791 | 0 | pattern++; |
792 | 0 | ptr++; |
793 | 0 | DISPATCH; |
794 | | |
795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): |
796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", |
797 | 0 | pattern, ptr, *pattern)); |
798 | 0 | if (ptr >= end || |
799 | 0 | sre_lower_unicode(*ptr) == *pattern) |
800 | 0 | RETURN_FAILURE; |
801 | 0 | pattern++; |
802 | 0 | ptr++; |
803 | 0 | DISPATCH; |
804 | | |
805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): |
806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", |
807 | 0 | pattern, ptr, *pattern)); |
808 | 0 | if (ptr >= end |
809 | 0 | || char_loc_ignore(*pattern, *ptr)) |
810 | 0 | RETURN_FAILURE; |
811 | 0 | pattern++; |
812 | 0 | ptr++; |
813 | 0 | DISPATCH; |
814 | | |
815 | 0 | TARGET(SRE_OP_IN_IGNORE): |
816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); |
817 | 0 | if (ptr >= end |
818 | 0 | || !SRE(charset)(state, pattern+1, |
819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) |
820 | 0 | RETURN_FAILURE; |
821 | 0 | pattern += pattern[0]; |
822 | 0 | ptr++; |
823 | 0 | DISPATCH; |
824 | | |
825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): |
826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); |
827 | 0 | if (ptr >= end |
828 | 0 | || !SRE(charset)(state, pattern+1, |
829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) |
830 | 0 | RETURN_FAILURE; |
831 | 0 | pattern += pattern[0]; |
832 | 0 | ptr++; |
833 | 0 | DISPATCH; |
834 | | |
835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): |
836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); |
837 | 0 | if (ptr >= end |
838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) |
839 | 0 | RETURN_FAILURE; |
840 | 0 | pattern += pattern[0]; |
841 | 0 | ptr++; |
842 | 0 | DISPATCH; |
843 | | |
844 | 104M | TARGET(SRE_OP_JUMP): |
845 | 104M | TARGET(SRE_OP_INFO): |
846 | | /* jump forward */ |
847 | | /* <JUMP> <offset> */ |
848 | 104M | TRACE(("|%p|%p|JUMP %d\n", pattern, |
849 | 104M | ptr, pattern[0])); |
850 | 104M | pattern += pattern[0]; |
851 | 104M | DISPATCH; |
852 | | |
853 | 179M | TARGET(SRE_OP_BRANCH): |
854 | | /* alternation */ |
855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ |
856 | 179M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); |
857 | 179M | LASTMARK_SAVE(); |
858 | 179M | if (state->repeat) |
859 | 127M | MARK_PUSH(ctx->lastmark); |
860 | 442M | for (; pattern[0]; pattern += pattern[0]) { |
861 | 363M | if (pattern[1] == SRE_OP_LITERAL && |
862 | 363M | (ptr >= end || |
863 | 169M | (SRE_CODE) *ptr != pattern[2])) |
864 | 90.2M | continue; |
865 | 273M | if (pattern[1] == SRE_OP_IN && |
866 | 273M | (ptr >= end || |
867 | 120M | !SRE(charset)(state, pattern + 3, |
868 | 120M | (SRE_CODE) *ptr))) |
869 | 90.3M | continue; |
870 | 182M | state->ptr = ptr; |
871 | 182M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); |
872 | 182M | if (ret) { |
873 | 101M | if (state->repeat) |
874 | 81.3M | MARK_POP_DISCARD(ctx->lastmark); |
875 | 101M | RETURN_ON_ERROR(ret); |
876 | 101M | RETURN_SUCCESS; |
877 | 101M | } |
878 | 81.8M | if (state->repeat) |
879 | 31.7k | MARK_POP_KEEP(ctx->lastmark); |
880 | 81.8M | LASTMARK_RESTORE(); |
881 | 81.8M | } |
882 | 78.5M | if (state->repeat) |
883 | 46.3M | MARK_POP_DISCARD(ctx->lastmark); |
884 | 78.5M | RETURN_FAILURE; |
885 | | |
886 | 707M | TARGET(SRE_OP_REPEAT_ONE): |
887 | | /* match repeated sequence (maximizing regexp) */ |
888 | | |
889 | | /* this operator only works if the repeated item is |
890 | | exactly one character wide, and we're not already |
891 | | collecting backtracking points. for other cases, |
892 | | use the MAX_REPEAT operator */ |
893 | | |
894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
895 | | |
896 | 707M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, |
897 | 707M | pattern[1], pattern[2])); |
898 | | |
899 | 707M | if ((Py_ssize_t) pattern[1] > end - ptr) |
900 | 1.13M | RETURN_FAILURE; /* cannot match */ |
901 | | |
902 | 706M | state->ptr = ptr; |
903 | | |
904 | 706M | ret = SRE(count)(state, pattern+3, pattern[2]); |
905 | 706M | RETURN_ON_ERROR(ret); |
906 | 706M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
907 | 706M | ctx->count = ret; |
908 | 706M | ptr += ctx->count; |
909 | | |
910 | | /* when we arrive here, count contains the number of |
911 | | matches, and ptr points to the tail of the target |
912 | | string. check if the rest of the pattern matches, |
913 | | and backtrack if not. */ |
914 | | |
915 | 706M | if (ctx->count < (Py_ssize_t) pattern[1]) |
916 | 413M | RETURN_FAILURE; |
917 | | |
918 | 293M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && |
919 | 293M | ptr == state->end && |
920 | 293M | !(ctx->toplevel && state->must_advance && ptr == state->start)) |
921 | 76.1k | { |
922 | | /* tail is empty. we're finished */ |
923 | 76.1k | state->ptr = ptr; |
924 | 76.1k | RETURN_SUCCESS; |
925 | 76.1k | } |
926 | | |
927 | 293M | LASTMARK_SAVE(); |
928 | 293M | if (state->repeat) |
929 | 214M | MARK_PUSH(ctx->lastmark); |
930 | | |
931 | 293M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { |
932 | | /* tail starts with a literal. skip positions where |
933 | | the rest of the pattern cannot possibly match */ |
934 | 103M | ctx->u.chr = pattern[pattern[0]+1]; |
935 | 103M | for (;;) { |
936 | 235M | while (ctx->count >= (Py_ssize_t) pattern[1] && |
937 | 235M | (ptr >= end || *ptr != ctx->u.chr)) { |
938 | 131M | ptr--; |
939 | 131M | ctx->count--; |
940 | 131M | } |
941 | 103M | if (ctx->count < (Py_ssize_t) pattern[1]) |
942 | 85.7M | break; |
943 | 18.0M | state->ptr = ptr; |
944 | 18.0M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, |
945 | 18.0M | pattern+pattern[0]); |
946 | 18.0M | if (ret) { |
947 | 18.0M | if (state->repeat) |
948 | 16.1M | MARK_POP_DISCARD(ctx->lastmark); |
949 | 18.0M | RETURN_ON_ERROR(ret); |
950 | 18.0M | RETURN_SUCCESS; |
951 | 18.0M | } |
952 | 551 | if (state->repeat) |
953 | 551 | MARK_POP_KEEP(ctx->lastmark); |
954 | 551 | LASTMARK_RESTORE(); |
955 | | |
956 | 551 | ptr--; |
957 | 551 | ctx->count--; |
958 | 551 | } |
959 | 85.7M | if (state->repeat) |
960 | 84.0M | MARK_POP_DISCARD(ctx->lastmark); |
961 | 189M | } else { |
962 | | /* general case */ |
963 | 193M | while (ctx->count >= (Py_ssize_t) pattern[1]) { |
964 | 191M | state->ptr = ptr; |
965 | 191M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, |
966 | 191M | pattern+pattern[0]); |
967 | 191M | if (ret) { |
968 | 188M | if (state->repeat) |
969 | 113M | MARK_POP_DISCARD(ctx->lastmark); |
970 | 188M | RETURN_ON_ERROR(ret); |
971 | 188M | RETURN_SUCCESS; |
972 | 188M | } |
973 | 3.58M | if (state->repeat) |
974 | 2.00M | MARK_POP_KEEP(ctx->lastmark); |
975 | 3.58M | LASTMARK_RESTORE(); |
976 | | |
977 | 3.58M | ptr--; |
978 | 3.58M | ctx->count--; |
979 | 3.58M | } |
980 | 1.24M | if (state->repeat) |
981 | 1.04M | MARK_POP_DISCARD(ctx->lastmark); |
982 | 1.24M | } |
983 | 86.9M | RETURN_FAILURE; |
984 | | |
985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): |
986 | | /* match repeated sequence (minimizing regexp) */ |
987 | | |
988 | | /* this operator only works if the repeated item is |
989 | | exactly one character wide, and we're not already |
990 | | collecting backtracking points. for other cases, |
991 | | use the MIN_REPEAT operator */ |
992 | | |
993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
994 | |
|
995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, |
996 | 0 | pattern[1], pattern[2])); |
997 | |
|
998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) |
999 | 0 | RETURN_FAILURE; /* cannot match */ |
1000 | | |
1001 | 0 | state->ptr = ptr; |
1002 | |
|
1003 | 0 | if (pattern[1] == 0) |
1004 | 0 | ctx->count = 0; |
1005 | 0 | else { |
1006 | | /* count using pattern min as the maximum */ |
1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); |
1008 | 0 | RETURN_ON_ERROR(ret); |
1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) |
1011 | | /* didn't match minimum number of times */ |
1012 | 0 | RETURN_FAILURE; |
1013 | | /* advance past minimum matches of repeat */ |
1014 | 0 | ctx->count = ret; |
1015 | 0 | ptr += ctx->count; |
1016 | 0 | } |
1017 | | |
1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && |
1019 | 0 | !(ctx->toplevel && |
1020 | 0 | ((state->match_all && ptr != state->end) || |
1021 | 0 | (state->must_advance && ptr == state->start)))) |
1022 | 0 | { |
1023 | | /* tail is empty. we're finished */ |
1024 | 0 | state->ptr = ptr; |
1025 | 0 | RETURN_SUCCESS; |
1026 | |
|
1027 | 0 | } else { |
1028 | | /* general case */ |
1029 | 0 | LASTMARK_SAVE(); |
1030 | 0 | if (state->repeat) |
1031 | 0 | MARK_PUSH(ctx->lastmark); |
1032 | | |
1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT |
1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { |
1035 | 0 | state->ptr = ptr; |
1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, |
1037 | 0 | pattern+pattern[0]); |
1038 | 0 | if (ret) { |
1039 | 0 | if (state->repeat) |
1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1041 | 0 | RETURN_ON_ERROR(ret); |
1042 | 0 | RETURN_SUCCESS; |
1043 | 0 | } |
1044 | 0 | if (state->repeat) |
1045 | 0 | MARK_POP_KEEP(ctx->lastmark); |
1046 | 0 | LASTMARK_RESTORE(); |
1047 | |
|
1048 | 0 | state->ptr = ptr; |
1049 | 0 | ret = SRE(count)(state, pattern+3, 1); |
1050 | 0 | RETURN_ON_ERROR(ret); |
1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1052 | 0 | if (ret == 0) |
1053 | 0 | break; |
1054 | 0 | assert(ret == 1); |
1055 | 0 | ptr++; |
1056 | 0 | ctx->count++; |
1057 | 0 | } |
1058 | 0 | if (state->repeat) |
1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1060 | 0 | } |
1061 | 0 | RETURN_FAILURE; |
1062 | | |
1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): |
1064 | | /* match repeated sequence (maximizing regexp) without |
1065 | | backtracking */ |
1066 | | |
1067 | | /* this operator only works if the repeated item is |
1068 | | exactly one character wide, and we're not already |
1069 | | collecting backtracking points. for other cases, |
1070 | | use the MAX_REPEAT operator */ |
1071 | | |
1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> |
1073 | | tail */ |
1074 | |
|
1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, |
1076 | 0 | ptr, pattern[1], pattern[2])); |
1077 | |
|
1078 | 0 | if (ptr + pattern[1] > end) { |
1079 | 0 | RETURN_FAILURE; /* cannot match */ |
1080 | 0 | } |
1081 | | |
1082 | 0 | state->ptr = ptr; |
1083 | |
|
1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); |
1085 | 0 | RETURN_ON_ERROR(ret); |
1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1087 | 0 | ctx->count = ret; |
1088 | 0 | ptr += ctx->count; |
1089 | | |
1090 | | /* when we arrive here, count contains the number of |
1091 | | matches, and ptr points to the tail of the target |
1092 | | string. check if the rest of the pattern matches, |
1093 | | and fail if not. */ |
1094 | | |
1095 | | /* Test for not enough repetitions in match */ |
1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { |
1097 | 0 | RETURN_FAILURE; |
1098 | 0 | } |
1099 | | |
1100 | | /* Update the pattern to point to the next op code */ |
1101 | 0 | pattern += pattern[0]; |
1102 | | |
1103 | | /* Let the tail be evaluated separately and consider this |
1104 | | match successful. */ |
1105 | 0 | if (*pattern == SRE_OP_SUCCESS && |
1106 | 0 | ptr == state->end && |
1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) |
1108 | 0 | { |
1109 | | /* tail is empty. we're finished */ |
1110 | 0 | state->ptr = ptr; |
1111 | 0 | RETURN_SUCCESS; |
1112 | 0 | } |
1113 | | |
1114 | | /* Attempt to match the rest of the string */ |
1115 | 0 | DISPATCH; |
1116 | | |
1117 | 144M | TARGET(SRE_OP_REPEAT): |
1118 | | /* create repeat context. all the hard work is done |
1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ |
1120 | | /* <REPEAT> <skip> <1=min> <2=max> |
1121 | | <3=repeat_index> item <UNTIL> tail */ |
1122 | 144M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, |
1123 | 144M | pattern[1], pattern[2])); |
1124 | | |
1125 | | /* install new repeat context */ |
1126 | 144M | ctx->u.rep = repeat_pool_malloc(state); |
1127 | 144M | if (!ctx->u.rep) { |
1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); |
1129 | 0 | } |
1130 | 144M | ctx->u.rep->count = -1; |
1131 | 144M | ctx->u.rep->pattern = pattern; |
1132 | 144M | ctx->u.rep->prev = state->repeat; |
1133 | 144M | ctx->u.rep->last_ptr = NULL; |
1134 | 144M | state->repeat = ctx->u.rep; |
1135 | | |
1136 | 144M | state->ptr = ptr; |
1137 | 144M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); |
1138 | 144M | state->repeat = ctx->u.rep->prev; |
1139 | 144M | repeat_pool_free(state, ctx->u.rep); |
1140 | | |
1141 | 144M | if (ret) { |
1142 | 144M | RETURN_ON_ERROR(ret); |
1143 | 144M | RETURN_SUCCESS; |
1144 | 144M | } |
1145 | 101k | RETURN_FAILURE; |
1146 | | |
1147 | 278M | TARGET(SRE_OP_MAX_UNTIL): |
1148 | | /* maximizing repeat */ |
1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ |
1150 | | |
1151 | | /* FIXME: we probably need to deal with zero-width |
1152 | | matches in here... */ |
1153 | | |
1154 | 278M | ctx->u.rep = state->repeat; |
1155 | 278M | if (!ctx->u.rep) |
1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1157 | | |
1158 | 278M | state->ptr = ptr; |
1159 | | |
1160 | 278M | ctx->count = ctx->u.rep->count+1; |
1161 | | |
1162 | 278M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, |
1163 | 278M | ptr, ctx->count)); |
1164 | | |
1165 | 278M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1166 | | /* not enough matches */ |
1167 | 0 | ctx->u.rep->count = ctx->count; |
1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, |
1169 | 0 | ctx->u.rep->pattern+3); |
1170 | 0 | if (ret) { |
1171 | 0 | RETURN_ON_ERROR(ret); |
1172 | 0 | RETURN_SUCCESS; |
1173 | 0 | } |
1174 | 0 | ctx->u.rep->count = ctx->count-1; |
1175 | 0 | state->ptr = ptr; |
1176 | 0 | RETURN_FAILURE; |
1177 | 0 | } |
1178 | | |
1179 | 278M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || |
1180 | 278M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && |
1181 | 278M | state->ptr != ctx->u.rep->last_ptr) { |
1182 | | /* we may have enough matches, but if we can |
1183 | | match another item, do so */ |
1184 | 266M | ctx->u.rep->count = ctx->count; |
1185 | 266M | LASTMARK_SAVE(); |
1186 | 266M | MARK_PUSH(ctx->lastmark); |
1187 | | /* zero-width match protection */ |
1188 | 266M | LAST_PTR_PUSH(); |
1189 | 266M | ctx->u.rep->last_ptr = state->ptr; |
1190 | 266M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, |
1191 | 266M | ctx->u.rep->pattern+3); |
1192 | 266M | LAST_PTR_POP(); |
1193 | 266M | if (ret) { |
1194 | 132M | MARK_POP_DISCARD(ctx->lastmark); |
1195 | 132M | RETURN_ON_ERROR(ret); |
1196 | 132M | RETURN_SUCCESS; |
1197 | 132M | } |
1198 | 133M | MARK_POP(ctx->lastmark); |
1199 | 133M | LASTMARK_RESTORE(); |
1200 | 133M | ctx->u.rep->count = ctx->count-1; |
1201 | 133M | state->ptr = ptr; |
1202 | 133M | } |
1203 | | |
1204 | | /* cannot match more repeated items here. make sure the |
1205 | | tail matches */ |
1206 | 145M | state->repeat = ctx->u.rep->prev; |
1207 | 145M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); |
1208 | 145M | state->repeat = ctx->u.rep; // restore repeat before return |
1209 | | |
1210 | 145M | RETURN_ON_SUCCESS(ret); |
1211 | 1.10M | state->ptr = ptr; |
1212 | 1.10M | RETURN_FAILURE; |
1213 | | |
1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): |
1215 | | /* minimizing repeat */ |
1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ |
1217 | |
|
1218 | 0 | ctx->u.rep = state->repeat; |
1219 | 0 | if (!ctx->u.rep) |
1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1221 | | |
1222 | 0 | state->ptr = ptr; |
1223 | |
|
1224 | 0 | ctx->count = ctx->u.rep->count+1; |
1225 | |
|
1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, |
1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); |
1228 | |
|
1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1230 | | /* not enough matches */ |
1231 | 0 | ctx->u.rep->count = ctx->count; |
1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, |
1233 | 0 | ctx->u.rep->pattern+3); |
1234 | 0 | if (ret) { |
1235 | 0 | RETURN_ON_ERROR(ret); |
1236 | 0 | RETURN_SUCCESS; |
1237 | 0 | } |
1238 | 0 | ctx->u.rep->count = ctx->count-1; |
1239 | 0 | state->ptr = ptr; |
1240 | 0 | RETURN_FAILURE; |
1241 | 0 | } |
1242 | | |
1243 | | /* see if the tail matches */ |
1244 | 0 | state->repeat = ctx->u.rep->prev; |
1245 | |
|
1246 | 0 | LASTMARK_SAVE(); |
1247 | 0 | if (state->repeat) |
1248 | 0 | MARK_PUSH(ctx->lastmark); |
1249 | | |
1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); |
1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; |
1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return |
1253 | |
|
1254 | 0 | if (ret) { |
1255 | 0 | if (repeat_of_tail) |
1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1257 | 0 | RETURN_ON_ERROR(ret); |
1258 | 0 | RETURN_SUCCESS; |
1259 | 0 | } |
1260 | 0 | if (repeat_of_tail) |
1261 | 0 | MARK_POP(ctx->lastmark); |
1262 | 0 | LASTMARK_RESTORE(); |
1263 | |
|
1264 | 0 | state->ptr = ptr; |
1265 | |
|
1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] |
1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || |
1268 | 0 | state->ptr == ctx->u.rep->last_ptr) |
1269 | 0 | RETURN_FAILURE; |
1270 | | |
1271 | 0 | ctx->u.rep->count = ctx->count; |
1272 | | /* zero-width match protection */ |
1273 | 0 | LAST_PTR_PUSH(); |
1274 | 0 | ctx->u.rep->last_ptr = state->ptr; |
1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, |
1276 | 0 | ctx->u.rep->pattern+3); |
1277 | 0 | LAST_PTR_POP(); |
1278 | 0 | if (ret) { |
1279 | 0 | RETURN_ON_ERROR(ret); |
1280 | 0 | RETURN_SUCCESS; |
1281 | 0 | } |
1282 | 0 | ctx->u.rep->count = ctx->count-1; |
1283 | 0 | state->ptr = ptr; |
1284 | 0 | RETURN_FAILURE; |
1285 | | |
1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): |
1287 | | /* create possessive repeat contexts. */ |
1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern |
1289 | | <SUCCESS> tail */ |
1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, |
1291 | 0 | ptr, pattern[1], pattern[2])); |
1292 | | |
1293 | | /* Set the global Input pointer to this context's Input |
1294 | | pointer */ |
1295 | 0 | state->ptr = ptr; |
1296 | | |
1297 | | /* Set state->repeat to non-NULL */ |
1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); |
1299 | 0 | if (!ctx->u.rep) { |
1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); |
1301 | 0 | } |
1302 | 0 | ctx->u.rep->count = -1; |
1303 | 0 | ctx->u.rep->pattern = NULL; |
1304 | 0 | ctx->u.rep->prev = state->repeat; |
1305 | 0 | ctx->u.rep->last_ptr = NULL; |
1306 | 0 | state->repeat = ctx->u.rep; |
1307 | | |
1308 | | /* Initialize Count to 0 */ |
1309 | 0 | ctx->count = 0; |
1310 | | |
1311 | | /* Check for minimum required matches. */ |
1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { |
1313 | | /* not enough matches */ |
1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, |
1315 | 0 | &pattern[3]); |
1316 | 0 | if (ret) { |
1317 | 0 | RETURN_ON_ERROR(ret); |
1318 | 0 | ctx->count++; |
1319 | 0 | } |
1320 | 0 | else { |
1321 | 0 | state->ptr = ptr; |
1322 | | /* Restore state->repeat */ |
1323 | 0 | state->repeat = ctx->u.rep->prev; |
1324 | 0 | repeat_pool_free(state, ctx->u.rep); |
1325 | 0 | RETURN_FAILURE; |
1326 | 0 | } |
1327 | 0 | } |
1328 | | |
1329 | | /* Clear the context's Input stream pointer so that it |
1330 | | doesn't match the global state so that the while loop can |
1331 | | be entered. */ |
1332 | 0 | ptr = NULL; |
1333 | | |
1334 | | /* Keep trying to parse the <pattern> sub-pattern until the |
1335 | | end is reached, creating a new context each time. */ |
1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || |
1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && |
1338 | 0 | state->ptr != ptr) { |
1339 | | /* Save the Capture Group Marker state into the current |
1340 | | Context and back up the current highest number |
1341 | | Capture Group marker. */ |
1342 | 0 | LASTMARK_SAVE(); |
1343 | 0 | MARK_PUSH(ctx->lastmark); |
1344 | | |
1345 | | /* zero-width match protection */ |
1346 | | /* Set the context's Input Stream pointer to be the |
1347 | | current Input Stream pointer from the global |
1348 | | state. When the loop reaches the next iteration, |
1349 | | the context will then store the last known good |
1350 | | position with the global state holding the Input |
1351 | | Input Stream position that has been updated with |
1352 | | the most recent match. Thus, if state's Input |
1353 | | stream remains the same as the one stored in the |
1354 | | current Context, we know we have successfully |
1355 | | matched an empty string and that all subsequent |
1356 | | matches will also be the empty string until the |
1357 | | maximum number of matches are counted, and because |
1358 | | of this, we could immediately stop at that point and |
1359 | | consider this match successful. */ |
1360 | 0 | ptr = state->ptr; |
1361 | | |
1362 | | /* We have not reached the maximin matches, so try to |
1363 | | match once more. */ |
1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, |
1365 | 0 | &pattern[3]); |
1366 | | |
1367 | | /* Check to see if the last attempted match |
1368 | | succeeded. */ |
1369 | 0 | if (ret) { |
1370 | | /* Drop the saved highest number Capture Group |
1371 | | marker saved above and use the newly updated |
1372 | | value. */ |
1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1374 | 0 | RETURN_ON_ERROR(ret); |
1375 | | |
1376 | | /* Success, increment the count. */ |
1377 | 0 | ctx->count++; |
1378 | 0 | } |
1379 | | /* Last attempted match failed. */ |
1380 | 0 | else { |
1381 | | /* Restore the previously saved highest number |
1382 | | Capture Group marker since the last iteration |
1383 | | did not match, then restore that to the global |
1384 | | state. */ |
1385 | 0 | MARK_POP(ctx->lastmark); |
1386 | 0 | LASTMARK_RESTORE(); |
1387 | | |
1388 | | /* Restore the global Input Stream pointer |
1389 | | since it can change after jumps. */ |
1390 | 0 | state->ptr = ptr; |
1391 | | |
1392 | | /* We have sufficient matches, so exit loop. */ |
1393 | 0 | break; |
1394 | 0 | } |
1395 | 0 | } |
1396 | | |
1397 | | /* Restore state->repeat */ |
1398 | 0 | state->repeat = ctx->u.rep->prev; |
1399 | 0 | repeat_pool_free(state, ctx->u.rep); |
1400 | | |
1401 | | /* Evaluate Tail */ |
1402 | | /* Jump to end of pattern indicated by skip, and then skip |
1403 | | the SUCCESS op code that follows it. */ |
1404 | 0 | pattern += pattern[0] + 1; |
1405 | 0 | ptr = state->ptr; |
1406 | 0 | DISPATCH; |
1407 | | |
1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): |
1409 | | /* Atomic Group Sub Pattern */ |
1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ |
1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); |
1412 | | |
1413 | | /* Set the global Input pointer to this context's Input |
1414 | | pointer */ |
1415 | 0 | state->ptr = ptr; |
1416 | | |
1417 | | /* Evaluate the Atomic Group in a new context, terminating |
1418 | | when the end of the group, represented by a SUCCESS op |
1419 | | code, is reached. */ |
1420 | | /* Group Pattern begins at an offset of 1 code. */ |
1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, |
1422 | 0 | &pattern[1]); |
1423 | | |
1424 | | /* Test Exit Condition */ |
1425 | 0 | RETURN_ON_ERROR(ret); |
1426 | | |
1427 | 0 | if (ret == 0) { |
1428 | | /* Atomic Group failed to Match. */ |
1429 | 0 | state->ptr = ptr; |
1430 | 0 | RETURN_FAILURE; |
1431 | 0 | } |
1432 | | |
1433 | | /* Evaluate Tail */ |
1434 | | /* Jump to end of pattern indicated by skip, and then skip |
1435 | | the SUCCESS op code that follows it. */ |
1436 | 0 | pattern += pattern[0]; |
1437 | 0 | ptr = state->ptr; |
1438 | 0 | DISPATCH; |
1439 | | |
1440 | 0 | TARGET(SRE_OP_GROUPREF): |
1441 | | /* match backreference */ |
1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, |
1443 | 0 | ptr, pattern[0])); |
1444 | 0 | { |
1445 | 0 | int groupref = pattern[0] * 2; |
1446 | 0 | if (groupref >= state->lastmark) { |
1447 | 0 | RETURN_FAILURE; |
1448 | 0 | } else { |
1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1451 | 0 | if (!p || !e || e < p) |
1452 | 0 | RETURN_FAILURE; |
1453 | 0 | while (p < e) { |
1454 | 0 | if (ptr >= end || *ptr != *p) |
1455 | 0 | RETURN_FAILURE; |
1456 | 0 | p++; |
1457 | 0 | ptr++; |
1458 | 0 | } |
1459 | 0 | } |
1460 | 0 | } |
1461 | 0 | pattern++; |
1462 | 0 | DISPATCH; |
1463 | | |
1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): |
1465 | | /* match backreference */ |
1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, |
1467 | 0 | ptr, pattern[0])); |
1468 | 0 | { |
1469 | 0 | int groupref = pattern[0] * 2; |
1470 | 0 | if (groupref >= state->lastmark) { |
1471 | 0 | RETURN_FAILURE; |
1472 | 0 | } else { |
1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1475 | 0 | if (!p || !e || e < p) |
1476 | 0 | RETURN_FAILURE; |
1477 | 0 | while (p < e) { |
1478 | 0 | if (ptr >= end || |
1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) |
1480 | 0 | RETURN_FAILURE; |
1481 | 0 | p++; |
1482 | 0 | ptr++; |
1483 | 0 | } |
1484 | 0 | } |
1485 | 0 | } |
1486 | 0 | pattern++; |
1487 | 0 | DISPATCH; |
1488 | | |
1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): |
1490 | | /* match backreference */ |
1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, |
1492 | 0 | ptr, pattern[0])); |
1493 | 0 | { |
1494 | 0 | int groupref = pattern[0] * 2; |
1495 | 0 | if (groupref >= state->lastmark) { |
1496 | 0 | RETURN_FAILURE; |
1497 | 0 | } else { |
1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1500 | 0 | if (!p || !e || e < p) |
1501 | 0 | RETURN_FAILURE; |
1502 | 0 | while (p < e) { |
1503 | 0 | if (ptr >= end || |
1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) |
1505 | 0 | RETURN_FAILURE; |
1506 | 0 | p++; |
1507 | 0 | ptr++; |
1508 | 0 | } |
1509 | 0 | } |
1510 | 0 | } |
1511 | 0 | pattern++; |
1512 | 0 | DISPATCH; |
1513 | | |
1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): |
1515 | | /* match backreference */ |
1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, |
1517 | 0 | ptr, pattern[0])); |
1518 | 0 | { |
1519 | 0 | int groupref = pattern[0] * 2; |
1520 | 0 | if (groupref >= state->lastmark) { |
1521 | 0 | RETURN_FAILURE; |
1522 | 0 | } else { |
1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1525 | 0 | if (!p || !e || e < p) |
1526 | 0 | RETURN_FAILURE; |
1527 | 0 | while (p < e) { |
1528 | 0 | if (ptr >= end || |
1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) |
1530 | 0 | RETURN_FAILURE; |
1531 | 0 | p++; |
1532 | 0 | ptr++; |
1533 | 0 | } |
1534 | 0 | } |
1535 | 0 | } |
1536 | 0 | pattern++; |
1537 | 0 | DISPATCH; |
1538 | | |
1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): |
1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, |
1541 | 0 | ptr, pattern[0])); |
1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ |
1543 | 0 | { |
1544 | 0 | int groupref = pattern[0] * 2; |
1545 | 0 | if (groupref >= state->lastmark) { |
1546 | 0 | pattern += pattern[1]; |
1547 | 0 | DISPATCH; |
1548 | 0 | } else { |
1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1551 | 0 | if (!p || !e || e < p) { |
1552 | 0 | pattern += pattern[1]; |
1553 | 0 | DISPATCH; |
1554 | 0 | } |
1555 | 0 | } |
1556 | 0 | } |
1557 | 0 | pattern += 2; |
1558 | 0 | DISPATCH; |
1559 | | |
1560 | 97.4M | TARGET(SRE_OP_ASSERT): |
1561 | | /* assert subpattern */ |
1562 | | /* <ASSERT> <skip> <back> <pattern> */ |
1563 | 97.4M | TRACE(("|%p|%p|ASSERT %d\n", pattern, |
1564 | 97.4M | ptr, pattern[1])); |
1565 | 97.4M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) |
1566 | 0 | RETURN_FAILURE; |
1567 | 97.4M | state->ptr = ptr - pattern[1]; |
1568 | 97.4M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); |
1569 | 97.4M | RETURN_ON_FAILURE(ret); |
1570 | 93.2M | pattern += pattern[0]; |
1571 | 93.2M | DISPATCH; |
1572 | | |
1573 | 93.2M | TARGET(SRE_OP_ASSERT_NOT): |
1574 | | /* assert not subpattern */ |
1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ |
1576 | 43.6M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, |
1577 | 43.6M | ptr, pattern[1])); |
1578 | 43.6M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { |
1579 | 43.6M | state->ptr = ptr - pattern[1]; |
1580 | 43.6M | LASTMARK_SAVE(); |
1581 | 43.6M | if (state->repeat) |
1582 | 43.6M | MARK_PUSH(ctx->lastmark); |
1583 | | |
1584 | 87.2M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); |
1585 | 87.2M | if (ret) { |
1586 | 25.5k | if (state->repeat) |
1587 | 25.5k | MARK_POP_DISCARD(ctx->lastmark); |
1588 | 25.5k | RETURN_ON_ERROR(ret); |
1589 | 25.5k | RETURN_FAILURE; |
1590 | 25.5k | } |
1591 | 43.5M | if (state->repeat) |
1592 | 43.5M | MARK_POP(ctx->lastmark); |
1593 | 43.5M | LASTMARK_RESTORE(); |
1594 | 43.5M | } |
1595 | 43.5M | pattern += pattern[0]; |
1596 | 43.5M | DISPATCH; |
1597 | | |
1598 | 43.5M | TARGET(SRE_OP_FAILURE): |
1599 | | /* immediate failure */ |
1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); |
1601 | 0 | RETURN_FAILURE; |
1602 | | |
1603 | | #if !USE_COMPUTED_GOTOS |
1604 | | default: |
1605 | | #endif |
1606 | | // Also any unused opcodes: |
1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): |
1608 | 0 | TARGET(SRE_OP_SUBPATTERN): |
1609 | 0 | TARGET(SRE_OP_RANGE): |
1610 | 0 | TARGET(SRE_OP_NEGATE): |
1611 | 0 | TARGET(SRE_OP_BIGCHARSET): |
1612 | 0 | TARGET(SRE_OP_CHARSET): |
1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, |
1614 | 0 | pattern[-1])); |
1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); |
1616 | |
|
1617 | 0 | } |
1618 | | |
1619 | 1.63G | exit: |
1620 | 1.63G | ctx_pos = ctx->last_ctx_pos; |
1621 | 1.63G | jump = ctx->jump; |
1622 | 1.63G | DATA_POP_DISCARD(ctx); |
1623 | 1.63G | if (ctx_pos == -1) { |
1624 | 546M | state->sigcount = sigcount; |
1625 | 546M | return ret; |
1626 | 546M | } |
1627 | 1.09G | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1628 | | |
1629 | 1.09G | switch (jump) { |
1630 | 266M | case JUMP_MAX_UNTIL_2: |
1631 | 266M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); |
1632 | 266M | goto jump_max_until_2; |
1633 | 145M | case JUMP_MAX_UNTIL_3: |
1634 | 145M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); |
1635 | 145M | goto jump_max_until_3; |
1636 | 0 | case JUMP_MIN_UNTIL_2: |
1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); |
1638 | 0 | goto jump_min_until_2; |
1639 | 0 | case JUMP_MIN_UNTIL_3: |
1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); |
1641 | 0 | goto jump_min_until_3; |
1642 | 182M | case JUMP_BRANCH: |
1643 | 182M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); |
1644 | 182M | goto jump_branch; |
1645 | 0 | case JUMP_MAX_UNTIL_1: |
1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); |
1647 | 0 | goto jump_max_until_1; |
1648 | 0 | case JUMP_MIN_UNTIL_1: |
1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); |
1650 | 0 | goto jump_min_until_1; |
1651 | 0 | case JUMP_POSS_REPEAT_1: |
1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); |
1653 | 0 | goto jump_poss_repeat_1; |
1654 | 0 | case JUMP_POSS_REPEAT_2: |
1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); |
1656 | 0 | goto jump_poss_repeat_2; |
1657 | 144M | case JUMP_REPEAT: |
1658 | 144M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); |
1659 | 144M | goto jump_repeat; |
1660 | 18.0M | case JUMP_REPEAT_ONE_1: |
1661 | 18.0M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); |
1662 | 18.0M | goto jump_repeat_one_1; |
1663 | 191M | case JUMP_REPEAT_ONE_2: |
1664 | 191M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); |
1665 | 191M | goto jump_repeat_one_2; |
1666 | 0 | case JUMP_MIN_REPEAT_ONE: |
1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); |
1668 | 0 | goto jump_min_repeat_one; |
1669 | 0 | case JUMP_ATOMIC_GROUP: |
1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); |
1671 | 0 | goto jump_atomic_group; |
1672 | 97.4M | case JUMP_ASSERT: |
1673 | 97.4M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); |
1674 | 97.4M | goto jump_assert; |
1675 | 43.6M | case JUMP_ASSERT_NOT: |
1676 | 43.6M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); |
1677 | 43.6M | goto jump_assert_not; |
1678 | 0 | case JUMP_NONE: |
1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, |
1680 | 0 | ptr, ret)); |
1681 | 0 | break; |
1682 | 1.09G | } |
1683 | | |
1684 | 0 | return ret; /* should never get here */ |
1685 | 1.09G | } Line | Count | Source | 600 | 172M | { | 601 | 172M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 172M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 172M | Py_ssize_t ret = 0; | 604 | 172M | int jump; | 605 | 172M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 172M | SRE(match_context)* ctx; | 608 | 172M | SRE(match_context)* nextctx; | 609 | 172M | INIT_TRACE(state); | 610 | | | 611 | 172M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 172M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 172M | ctx->last_ctx_pos = -1; | 615 | 172M | ctx->jump = JUMP_NONE; | 616 | 172M | ctx->toplevel = toplevel; | 617 | 172M | ctx_pos = alloc_pos; | 618 | | | 619 | 172M | #if USE_COMPUTED_GOTOS | 620 | 172M | #include "sre_targets.h" | 621 | 172M | #endif | 622 | | | 623 | 333M | entrance: | 624 | | | 625 | 333M | ; // Fashion statement. | 626 | 333M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 333M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 33.7M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 7.74M | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 7.74M | end - ptr, (size_t) pattern[3])); | 634 | 7.74M | RETURN_FAILURE; | 635 | 7.74M | } | 636 | 25.9M | pattern += pattern[1] + 1; | 637 | 25.9M | } | 638 | | | 639 | 325M | #if USE_COMPUTED_GOTOS | 640 | 325M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 325M | { | 647 | | | 648 | 325M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 148M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 148M | ptr, pattern[0])); | 653 | 148M | { | 654 | 148M | int i = pattern[0]; | 655 | 148M | if (i & 1) | 656 | 20.5M | state->lastindex = i/2 + 1; | 657 | 148M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 145M | int j = state->lastmark + 1; | 663 | 148M | while (j < i) | 664 | 3.71M | state->mark[j++] = NULL; | 665 | 145M | state->lastmark = i; | 666 | 145M | } | 667 | 148M | state->mark[i] = ptr; | 668 | 148M | } | 669 | 148M | pattern++; | 670 | 148M | DISPATCH; | 671 | | | 672 | 148M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 56.2M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 56.2M | ptr, *pattern)); | 677 | 56.2M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 18.2M | RETURN_FAILURE; | 679 | 37.9M | pattern++; | 680 | 37.9M | ptr++; | 681 | 37.9M | DISPATCH; | 682 | | | 683 | 37.9M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 49.5M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 49.5M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 49.5M | if (ctx->toplevel && | 698 | 49.5M | ((state->match_all && ptr != state->end) || | 699 | 18.0M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 49.5M | state->ptr = ptr; | 704 | 49.5M | RETURN_SUCCESS; | 705 | | | 706 | 13.5M | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 13.5M | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 13.5M | if (!SRE(at)(state, ptr, *pattern)) | 711 | 2.80M | RETURN_FAILURE; | 712 | 10.7M | pattern++; | 713 | 10.7M | DISPATCH; | 714 | | | 715 | 10.7M | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 34.7M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 34.7M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 34.7M | if (ptr >= end || | 749 | 34.7M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 366k | RETURN_FAILURE; | 751 | 34.3M | pattern += pattern[0]; | 752 | 34.3M | ptr++; | 753 | 34.3M | DISPATCH; | 754 | | | 755 | 34.3M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 989k | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 989k | pattern, ptr, pattern[0])); | 758 | 989k | if (ptr >= end || | 759 | 989k | sre_lower_ascii(*ptr) != *pattern) | 760 | 169k | RETURN_FAILURE; | 761 | 819k | pattern++; | 762 | 819k | ptr++; | 763 | 819k | DISPATCH; | 764 | | | 765 | 819k | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 26.8M | TARGET(SRE_OP_JUMP): | 845 | 26.8M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 26.8M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 26.8M | ptr, pattern[0])); | 850 | 26.8M | pattern += pattern[0]; | 851 | 26.8M | DISPATCH; | 852 | | | 853 | 54.7M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 54.7M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 54.7M | LASTMARK_SAVE(); | 858 | 54.7M | if (state->repeat) | 859 | 9.61M | MARK_PUSH(ctx->lastmark); | 860 | 166M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 136M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 136M | (ptr >= end || | 863 | 60.4M | (SRE_CODE) *ptr != pattern[2])) | 864 | 28.6M | continue; | 865 | 108M | if (pattern[1] == SRE_OP_IN && | 866 | 108M | (ptr >= end || | 867 | 11.0M | !SRE(charset)(state, pattern + 3, | 868 | 11.0M | (SRE_CODE) *ptr))) | 869 | 6.05M | continue; | 870 | 101M | state->ptr = ptr; | 871 | 101M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 101M | if (ret) { | 873 | 24.6M | if (state->repeat) | 874 | 9.40M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 24.6M | RETURN_ON_ERROR(ret); | 876 | 24.6M | RETURN_SUCCESS; | 877 | 24.6M | } | 878 | 77.2M | if (state->repeat) | 879 | 6.72k | MARK_POP_KEEP(ctx->lastmark); | 880 | 77.2M | LASTMARK_RESTORE(); | 881 | 77.2M | } | 882 | 30.0M | if (state->repeat) | 883 | 210k | MARK_POP_DISCARD(ctx->lastmark); | 884 | 30.0M | RETURN_FAILURE; | 885 | | | 886 | 173M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 173M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 173M | pattern[1], pattern[2])); | 898 | | | 899 | 173M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 938k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 172M | state->ptr = ptr; | 903 | | | 904 | 172M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 172M | RETURN_ON_ERROR(ret); | 906 | 172M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 172M | ctx->count = ret; | 908 | 172M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 172M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 146M | RETURN_FAILURE; | 917 | | | 918 | 26.4M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 26.4M | ptr == state->end && | 920 | 26.4M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 56.2k | { | 922 | | /* tail is empty. we're finished */ | 923 | 56.2k | state->ptr = ptr; | 924 | 56.2k | RETURN_SUCCESS; | 925 | 56.2k | } | 926 | | | 927 | 26.4M | LASTMARK_SAVE(); | 928 | 26.4M | if (state->repeat) | 929 | 12.0M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 26.4M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 5.36M | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 5.36M | for (;;) { | 936 | 17.3M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 17.3M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 11.9M | ptr--; | 939 | 11.9M | ctx->count--; | 940 | 11.9M | } | 941 | 5.36M | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 3.14M | break; | 943 | 2.22M | state->ptr = ptr; | 944 | 2.22M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 2.22M | pattern+pattern[0]); | 946 | 2.22M | if (ret) { | 947 | 2.22M | if (state->repeat) | 948 | 403k | MARK_POP_DISCARD(ctx->lastmark); | 949 | 2.22M | RETURN_ON_ERROR(ret); | 950 | 2.22M | RETURN_SUCCESS; | 951 | 2.22M | } | 952 | 111 | if (state->repeat) | 953 | 111 | MARK_POP_KEEP(ctx->lastmark); | 954 | 111 | LASTMARK_RESTORE(); | 955 | | | 956 | 111 | ptr--; | 957 | 111 | ctx->count--; | 958 | 111 | } | 959 | 3.14M | if (state->repeat) | 960 | 1.51M | MARK_POP_DISCARD(ctx->lastmark); | 961 | 21.0M | } else { | 962 | | /* general case */ | 963 | 23.0M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 22.1M | state->ptr = ptr; | 965 | 22.1M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 22.1M | pattern+pattern[0]); | 967 | 22.1M | if (ret) { | 968 | 20.2M | if (state->repeat) | 969 | 9.46M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 20.2M | RETURN_ON_ERROR(ret); | 971 | 20.2M | RETURN_SUCCESS; | 972 | 20.2M | } | 973 | 1.95M | if (state->repeat) | 974 | 1.19M | MARK_POP_KEEP(ctx->lastmark); | 975 | 1.95M | LASTMARK_RESTORE(); | 976 | | | 977 | 1.95M | ptr--; | 978 | 1.95M | ctx->count--; | 979 | 1.95M | } | 980 | 839k | if (state->repeat) | 981 | 644k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 839k | } | 983 | 3.98M | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 5.65M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 5.65M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 5.65M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 5.65M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 5.65M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 5.65M | ctx->u.rep->count = -1; | 1131 | 5.65M | ctx->u.rep->pattern = pattern; | 1132 | 5.65M | ctx->u.rep->prev = state->repeat; | 1133 | 5.65M | ctx->u.rep->last_ptr = NULL; | 1134 | 5.65M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 5.65M | state->ptr = ptr; | 1137 | 5.65M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 5.65M | state->repeat = ctx->u.rep->prev; | 1139 | 5.65M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 5.65M | if (ret) { | 1142 | 5.55M | RETURN_ON_ERROR(ret); | 1143 | 5.55M | RETURN_SUCCESS; | 1144 | 5.55M | } | 1145 | 100k | RETURN_FAILURE; | 1146 | | | 1147 | 20.2M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 20.2M | ctx->u.rep = state->repeat; | 1155 | 20.2M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 20.2M | state->ptr = ptr; | 1159 | | | 1160 | 20.2M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 20.2M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 20.2M | ptr, ctx->count)); | 1164 | | | 1165 | 20.2M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 20.2M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 20.2M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 20.2M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 16.3M | ctx->u.rep->count = ctx->count; | 1185 | 16.3M | LASTMARK_SAVE(); | 1186 | 16.3M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 16.3M | LAST_PTR_PUSH(); | 1189 | 16.3M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 16.3M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 16.3M | ctx->u.rep->pattern+3); | 1192 | 16.3M | LAST_PTR_POP(); | 1193 | 16.3M | if (ret) { | 1194 | 13.9M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 13.9M | RETURN_ON_ERROR(ret); | 1196 | 13.9M | RETURN_SUCCESS; | 1197 | 13.9M | } | 1198 | 2.37M | MARK_POP(ctx->lastmark); | 1199 | 2.37M | LASTMARK_RESTORE(); | 1200 | 2.37M | ctx->u.rep->count = ctx->count-1; | 1201 | 2.37M | state->ptr = ptr; | 1202 | 2.37M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 6.26M | state->repeat = ctx->u.rep->prev; | 1207 | 6.26M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 6.26M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 6.26M | RETURN_ON_SUCCESS(ret); | 1211 | 702k | state->ptr = ptr; | 1212 | 702k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 1.81M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 1.81M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 1.81M | ptr, pattern[1])); | 1565 | 1.81M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 1.81M | state->ptr = ptr - pattern[1]; | 1568 | 1.81M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 1.81M | RETURN_ON_FAILURE(ret); | 1570 | 1.76M | pattern += pattern[0]; | 1571 | 1.76M | DISPATCH; | 1572 | | | 1573 | 4.44M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 4.44M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 4.44M | ptr, pattern[1])); | 1578 | 4.44M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 4.44M | state->ptr = ptr - pattern[1]; | 1580 | 4.44M | LASTMARK_SAVE(); | 1581 | 4.44M | if (state->repeat) | 1582 | 4.44M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 8.88M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 8.88M | if (ret) { | 1586 | 1.07k | if (state->repeat) | 1587 | 1.07k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 1.07k | RETURN_ON_ERROR(ret); | 1589 | 1.07k | RETURN_FAILURE; | 1590 | 1.07k | } | 1591 | 4.44M | if (state->repeat) | 1592 | 4.44M | MARK_POP(ctx->lastmark); | 1593 | 4.44M | LASTMARK_RESTORE(); | 1594 | 4.44M | } | 1595 | 4.44M | pattern += pattern[0]; | 1596 | 4.44M | DISPATCH; | 1597 | | | 1598 | 4.44M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 333M | exit: | 1620 | 333M | ctx_pos = ctx->last_ctx_pos; | 1621 | 333M | jump = ctx->jump; | 1622 | 333M | DATA_POP_DISCARD(ctx); | 1623 | 333M | if (ctx_pos == -1) { | 1624 | 172M | state->sigcount = sigcount; | 1625 | 172M | return ret; | 1626 | 172M | } | 1627 | 160M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 160M | switch (jump) { | 1630 | 16.3M | case JUMP_MAX_UNTIL_2: | 1631 | 16.3M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 16.3M | goto jump_max_until_2; | 1633 | 6.26M | case JUMP_MAX_UNTIL_3: | 1634 | 6.26M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 6.26M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 101M | case JUMP_BRANCH: | 1643 | 101M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 101M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 5.65M | case JUMP_REPEAT: | 1658 | 5.65M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 5.65M | goto jump_repeat; | 1660 | 2.22M | case JUMP_REPEAT_ONE_1: | 1661 | 2.22M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 2.22M | goto jump_repeat_one_1; | 1663 | 22.1M | case JUMP_REPEAT_ONE_2: | 1664 | 22.1M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 22.1M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 1.81M | case JUMP_ASSERT: | 1673 | 1.81M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 1.81M | goto jump_assert; | 1675 | 4.44M | case JUMP_ASSERT_NOT: | 1676 | 4.44M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 4.44M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 160M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 160M | } |
Line | Count | Source | 600 | 265M | { | 601 | 265M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 265M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 265M | Py_ssize_t ret = 0; | 604 | 265M | int jump; | 605 | 265M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 265M | SRE(match_context)* ctx; | 608 | 265M | SRE(match_context)* nextctx; | 609 | 265M | INIT_TRACE(state); | 610 | | | 611 | 265M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 265M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 265M | ctx->last_ctx_pos = -1; | 615 | 265M | ctx->jump = JUMP_NONE; | 616 | 265M | ctx->toplevel = toplevel; | 617 | 265M | ctx_pos = alloc_pos; | 618 | | | 619 | 265M | #if USE_COMPUTED_GOTOS | 620 | 265M | #include "sre_targets.h" | 621 | 265M | #endif | 622 | | | 623 | 664M | entrance: | 624 | | | 625 | 664M | ; // Fashion statement. | 626 | 664M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 664M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 28.1M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 709 | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 709 | end - ptr, (size_t) pattern[3])); | 634 | 709 | RETURN_FAILURE; | 635 | 709 | } | 636 | 28.1M | pattern += pattern[1] + 1; | 637 | 28.1M | } | 638 | | | 639 | 664M | #if USE_COMPUTED_GOTOS | 640 | 664M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 664M | { | 647 | | | 648 | 664M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 262M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 262M | ptr, pattern[0])); | 653 | 262M | { | 654 | 262M | int i = pattern[0]; | 655 | 262M | if (i & 1) | 656 | 26.6M | state->lastindex = i/2 + 1; | 657 | 262M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 259M | int j = state->lastmark + 1; | 663 | 261M | while (j < i) | 664 | 1.95M | state->mark[j++] = NULL; | 665 | 259M | state->lastmark = i; | 666 | 259M | } | 667 | 262M | state->mark[i] = ptr; | 668 | 262M | } | 669 | 262M | pattern++; | 670 | 262M | DISPATCH; | 671 | | | 672 | 262M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 56.4M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 56.4M | ptr, *pattern)); | 677 | 56.4M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 20.0M | RETURN_FAILURE; | 679 | 36.3M | pattern++; | 680 | 36.3M | ptr++; | 681 | 36.3M | DISPATCH; | 682 | | | 683 | 36.3M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 105M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 105M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 105M | if (ctx->toplevel && | 698 | 105M | ((state->match_all && ptr != state->end) || | 699 | 24.9M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 105M | state->ptr = ptr; | 704 | 105M | RETURN_SUCCESS; | 705 | | | 706 | 791k | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 791k | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 791k | if (!SRE(at)(state, ptr, *pattern)) | 711 | 743k | RETURN_FAILURE; | 712 | 47.8k | pattern++; | 713 | 47.8k | DISPATCH; | 714 | | | 715 | 47.8k | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 133M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 133M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 133M | if (ptr >= end || | 749 | 133M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 5.83M | RETURN_FAILURE; | 751 | 127M | pattern += pattern[0]; | 752 | 127M | ptr++; | 753 | 127M | DISPATCH; | 754 | | | 755 | 127M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 4.22M | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 4.22M | pattern, ptr, pattern[0])); | 758 | 4.22M | if (ptr >= end || | 759 | 4.22M | sre_lower_ascii(*ptr) != *pattern) | 760 | 161k | RETURN_FAILURE; | 761 | 4.06M | pattern++; | 762 | 4.06M | ptr++; | 763 | 4.06M | DISPATCH; | 764 | | | 765 | 4.06M | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 33.8M | TARGET(SRE_OP_JUMP): | 845 | 33.8M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 33.8M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 33.8M | ptr, pattern[0])); | 850 | 33.8M | pattern += pattern[0]; | 851 | 33.8M | DISPATCH; | 852 | | | 853 | 54.1M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 54.1M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 54.1M | LASTMARK_SAVE(); | 858 | 54.1M | if (state->repeat) | 859 | 51.0M | MARK_PUSH(ctx->lastmark); | 860 | 121M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 100M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 100M | (ptr >= end || | 863 | 49.1M | (SRE_CODE) *ptr != pattern[2])) | 864 | 24.7M | continue; | 865 | 75.7M | if (pattern[1] == SRE_OP_IN && | 866 | 75.7M | (ptr >= end || | 867 | 46.9M | !SRE(charset)(state, pattern + 3, | 868 | 46.9M | (SRE_CODE) *ptr))) | 869 | 38.8M | continue; | 870 | 36.8M | state->ptr = ptr; | 871 | 36.8M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 36.8M | if (ret) { | 873 | 33.4M | if (state->repeat) | 874 | 31.8M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 33.4M | RETURN_ON_ERROR(ret); | 876 | 33.4M | RETURN_SUCCESS; | 877 | 33.4M | } | 878 | 3.44M | if (state->repeat) | 879 | 8.55k | MARK_POP_KEEP(ctx->lastmark); | 880 | 3.44M | LASTMARK_RESTORE(); | 881 | 3.44M | } | 882 | 20.7M | if (state->repeat) | 883 | 19.2M | MARK_POP_DISCARD(ctx->lastmark); | 884 | 20.7M | RETURN_FAILURE; | 885 | | | 886 | 308M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 308M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 308M | pattern[1], pattern[2])); | 898 | | | 899 | 308M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 167k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 308M | state->ptr = ptr; | 903 | | | 904 | 308M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 308M | RETURN_ON_ERROR(ret); | 906 | 308M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 308M | ctx->count = ret; | 908 | 308M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 308M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 194M | RETURN_FAILURE; | 917 | | | 918 | 113M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 113M | ptr == state->end && | 920 | 113M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 16.2k | { | 922 | | /* tail is empty. we're finished */ | 923 | 16.2k | state->ptr = ptr; | 924 | 16.2k | RETURN_SUCCESS; | 925 | 16.2k | } | 926 | | | 927 | 113M | LASTMARK_SAVE(); | 928 | 113M | if (state->repeat) | 929 | 82.9M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 113M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 39.9M | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 39.9M | for (;;) { | 936 | 78.2M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 78.2M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 38.2M | ptr--; | 939 | 38.2M | ctx->count--; | 940 | 38.2M | } | 941 | 39.9M | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 32.0M | break; | 943 | 7.95M | state->ptr = ptr; | 944 | 7.95M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 7.95M | pattern+pattern[0]); | 946 | 7.95M | if (ret) { | 947 | 7.95M | if (state->repeat) | 948 | 7.93M | MARK_POP_DISCARD(ctx->lastmark); | 949 | 7.95M | RETURN_ON_ERROR(ret); | 950 | 7.95M | RETURN_SUCCESS; | 951 | 7.95M | } | 952 | 216 | if (state->repeat) | 953 | 216 | MARK_POP_KEEP(ctx->lastmark); | 954 | 216 | LASTMARK_RESTORE(); | 955 | | | 956 | 216 | ptr--; | 957 | 216 | ctx->count--; | 958 | 216 | } | 959 | 32.0M | if (state->repeat) | 960 | 31.9M | MARK_POP_DISCARD(ctx->lastmark); | 961 | 73.6M | } else { | 962 | | /* general case */ | 963 | 74.4M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 74.1M | state->ptr = ptr; | 965 | 74.1M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 74.1M | pattern+pattern[0]); | 967 | 74.1M | if (ret) { | 968 | 73.3M | if (state->repeat) | 969 | 42.7M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 73.3M | RETURN_ON_ERROR(ret); | 971 | 73.3M | RETURN_SUCCESS; | 972 | 73.3M | } | 973 | 809k | if (state->repeat) | 974 | 639k | MARK_POP_KEEP(ctx->lastmark); | 975 | 809k | LASTMARK_RESTORE(); | 976 | | | 977 | 809k | ptr--; | 978 | 809k | ctx->count--; | 979 | 809k | } | 980 | 321k | if (state->repeat) | 981 | 319k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 321k | } | 983 | 32.3M | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 57.3M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 57.3M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 57.3M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 57.3M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 57.3M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 57.3M | ctx->u.rep->count = -1; | 1131 | 57.3M | ctx->u.rep->pattern = pattern; | 1132 | 57.3M | ctx->u.rep->prev = state->repeat; | 1133 | 57.3M | ctx->u.rep->last_ptr = NULL; | 1134 | 57.3M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 57.3M | state->ptr = ptr; | 1137 | 57.3M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 57.3M | state->repeat = ctx->u.rep->prev; | 1139 | 57.3M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 57.3M | if (ret) { | 1142 | 57.3M | RETURN_ON_ERROR(ret); | 1143 | 57.3M | RETURN_SUCCESS; | 1144 | 57.3M | } | 1145 | 896 | RETURN_FAILURE; | 1146 | | | 1147 | 108M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 108M | ctx->u.rep = state->repeat; | 1155 | 108M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 108M | state->ptr = ptr; | 1159 | | | 1160 | 108M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 108M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 108M | ptr, ctx->count)); | 1164 | | | 1165 | 108M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 108M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 108M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 108M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 103M | ctx->u.rep->count = ctx->count; | 1185 | 103M | LASTMARK_SAVE(); | 1186 | 103M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 103M | LAST_PTR_PUSH(); | 1189 | 103M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 103M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 103M | ctx->u.rep->pattern+3); | 1192 | 103M | LAST_PTR_POP(); | 1193 | 103M | if (ret) { | 1194 | 50.8M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 50.8M | RETURN_ON_ERROR(ret); | 1196 | 50.8M | RETURN_SUCCESS; | 1197 | 50.8M | } | 1198 | 53.0M | MARK_POP(ctx->lastmark); | 1199 | 53.0M | LASTMARK_RESTORE(); | 1200 | 53.0M | ctx->u.rep->count = ctx->count-1; | 1201 | 53.0M | state->ptr = ptr; | 1202 | 53.0M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 57.6M | state->repeat = ctx->u.rep->prev; | 1207 | 57.6M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 57.6M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 57.6M | RETURN_ON_SUCCESS(ret); | 1211 | 320k | state->ptr = ptr; | 1212 | 320k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 40.2M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 40.2M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 40.2M | ptr, pattern[1])); | 1565 | 40.2M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 40.2M | state->ptr = ptr - pattern[1]; | 1568 | 40.2M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 40.2M | RETURN_ON_FAILURE(ret); | 1570 | 36.7M | pattern += pattern[0]; | 1571 | 36.7M | DISPATCH; | 1572 | | | 1573 | 36.7M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 20.1M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 20.1M | ptr, pattern[1])); | 1578 | 20.1M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 20.1M | state->ptr = ptr - pattern[1]; | 1580 | 20.1M | LASTMARK_SAVE(); | 1581 | 20.1M | if (state->repeat) | 1582 | 20.1M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 40.3M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 40.3M | if (ret) { | 1586 | 8.31k | if (state->repeat) | 1587 | 8.31k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 8.31k | RETURN_ON_ERROR(ret); | 1589 | 8.31k | RETURN_FAILURE; | 1590 | 8.31k | } | 1591 | 20.1M | if (state->repeat) | 1592 | 20.1M | MARK_POP(ctx->lastmark); | 1593 | 20.1M | LASTMARK_RESTORE(); | 1594 | 20.1M | } | 1595 | 20.1M | pattern += pattern[0]; | 1596 | 20.1M | DISPATCH; | 1597 | | | 1598 | 20.1M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 664M | exit: | 1620 | 664M | ctx_pos = ctx->last_ctx_pos; | 1621 | 664M | jump = ctx->jump; | 1622 | 664M | DATA_POP_DISCARD(ctx); | 1623 | 664M | if (ctx_pos == -1) { | 1624 | 265M | state->sigcount = sigcount; | 1625 | 265M | return ret; | 1626 | 265M | } | 1627 | 398M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 398M | switch (jump) { | 1630 | 103M | case JUMP_MAX_UNTIL_2: | 1631 | 103M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 103M | goto jump_max_until_2; | 1633 | 57.6M | case JUMP_MAX_UNTIL_3: | 1634 | 57.6M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 57.6M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 36.8M | case JUMP_BRANCH: | 1643 | 36.8M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 36.8M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 57.3M | case JUMP_REPEAT: | 1658 | 57.3M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 57.3M | goto jump_repeat; | 1660 | 7.95M | case JUMP_REPEAT_ONE_1: | 1661 | 7.95M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 7.95M | goto jump_repeat_one_1; | 1663 | 74.1M | case JUMP_REPEAT_ONE_2: | 1664 | 74.1M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 74.1M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 40.2M | case JUMP_ASSERT: | 1673 | 40.2M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 40.2M | goto jump_assert; | 1675 | 20.1M | case JUMP_ASSERT_NOT: | 1676 | 20.1M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 20.1M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 398M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 398M | } |
Line | Count | Source | 600 | 108M | { | 601 | 108M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 108M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 108M | Py_ssize_t ret = 0; | 604 | 108M | int jump; | 605 | 108M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 108M | SRE(match_context)* ctx; | 608 | 108M | SRE(match_context)* nextctx; | 609 | 108M | INIT_TRACE(state); | 610 | | | 611 | 108M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 108M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 108M | ctx->last_ctx_pos = -1; | 615 | 108M | ctx->jump = JUMP_NONE; | 616 | 108M | ctx->toplevel = toplevel; | 617 | 108M | ctx_pos = alloc_pos; | 618 | | | 619 | 108M | #if USE_COMPUTED_GOTOS | 620 | 108M | #include "sre_targets.h" | 621 | 108M | #endif | 622 | | | 623 | 639M | entrance: | 624 | | | 625 | 639M | ; // Fashion statement. | 626 | 639M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 639M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 28.0M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 346 | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 346 | end - ptr, (size_t) pattern[3])); | 634 | 346 | RETURN_FAILURE; | 635 | 346 | } | 636 | 28.0M | pattern += pattern[1] + 1; | 637 | 28.0M | } | 638 | | | 639 | 639M | #if USE_COMPUTED_GOTOS | 640 | 639M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 639M | { | 647 | | | 648 | 639M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 166M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 166M | ptr, pattern[0])); | 653 | 166M | { | 654 | 166M | int i = pattern[0]; | 655 | 166M | if (i & 1) | 656 | 35.2M | state->lastindex = i/2 + 1; | 657 | 166M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 164M | int j = state->lastmark + 1; | 663 | 166M | while (j < i) | 664 | 2.00M | state->mark[j++] = NULL; | 665 | 164M | state->lastmark = i; | 666 | 164M | } | 667 | 166M | state->mark[i] = ptr; | 668 | 166M | } | 669 | 166M | pattern++; | 670 | 166M | DISPATCH; | 671 | | | 672 | 166M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 50.7M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 50.7M | ptr, *pattern)); | 677 | 50.7M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 19.1M | RETURN_FAILURE; | 679 | 31.5M | pattern++; | 680 | 31.5M | ptr++; | 681 | 31.5M | DISPATCH; | 682 | | | 683 | 31.5M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 89.4M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 89.4M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 89.4M | if (ctx->toplevel && | 698 | 89.4M | ((state->match_all && ptr != state->end) || | 699 | 27.6M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 89.4M | state->ptr = ptr; | 704 | 89.4M | RETURN_SUCCESS; | 705 | | | 706 | 586k | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 586k | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 586k | if (!SRE(at)(state, ptr, *pattern)) | 711 | 577k | RETURN_FAILURE; | 712 | 8.78k | pattern++; | 713 | 8.78k | DISPATCH; | 714 | | | 715 | 8.78k | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 133M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 133M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 133M | if (ptr >= end || | 749 | 133M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 1.37M | RETURN_FAILURE; | 751 | 131M | pattern += pattern[0]; | 752 | 131M | ptr++; | 753 | 131M | DISPATCH; | 754 | | | 755 | 131M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 923k | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 923k | pattern, ptr, pattern[0])); | 758 | 923k | if (ptr >= end || | 759 | 923k | sre_lower_ascii(*ptr) != *pattern) | 760 | 18.4k | RETURN_FAILURE; | 761 | 904k | pattern++; | 762 | 904k | ptr++; | 763 | 904k | DISPATCH; | 764 | | | 765 | 904k | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 43.4M | TARGET(SRE_OP_JUMP): | 845 | 43.4M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 43.4M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 43.4M | ptr, pattern[0])); | 850 | 43.4M | pattern += pattern[0]; | 851 | 43.4M | DISPATCH; | 852 | | | 853 | 70.6M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 70.6M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 70.6M | LASTMARK_SAVE(); | 858 | 70.6M | if (state->repeat) | 859 | 67.0M | MARK_PUSH(ctx->lastmark); | 860 | 154M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 126M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 126M | (ptr >= end || | 863 | 60.2M | (SRE_CODE) *ptr != pattern[2])) | 864 | 36.9M | continue; | 865 | 89.5M | if (pattern[1] == SRE_OP_IN && | 866 | 89.5M | (ptr >= end || | 867 | 62.9M | !SRE(charset)(state, pattern + 3, | 868 | 62.9M | (SRE_CODE) *ptr))) | 869 | 45.4M | continue; | 870 | 44.1M | state->ptr = ptr; | 871 | 44.1M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 44.1M | if (ret) { | 873 | 42.9M | if (state->repeat) | 874 | 40.0M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 42.9M | RETURN_ON_ERROR(ret); | 876 | 42.9M | RETURN_SUCCESS; | 877 | 42.9M | } | 878 | 1.14M | if (state->repeat) | 879 | 16.4k | MARK_POP_KEEP(ctx->lastmark); | 880 | 1.14M | LASTMARK_RESTORE(); | 881 | 1.14M | } | 882 | 27.6M | if (state->repeat) | 883 | 26.9M | MARK_POP_DISCARD(ctx->lastmark); | 884 | 27.6M | RETURN_FAILURE; | 885 | | | 886 | 225M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 225M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 225M | pattern[1], pattern[2])); | 898 | | | 899 | 225M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 25.2k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 225M | state->ptr = ptr; | 903 | | | 904 | 225M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 225M | RETURN_ON_ERROR(ret); | 906 | 225M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 225M | ctx->count = ret; | 908 | 225M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 225M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 72.5M | RETURN_FAILURE; | 917 | | | 918 | 153M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 153M | ptr == state->end && | 920 | 153M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 3.66k | { | 922 | | /* tail is empty. we're finished */ | 923 | 3.66k | state->ptr = ptr; | 924 | 3.66k | RETURN_SUCCESS; | 925 | 3.66k | } | 926 | | | 927 | 153M | LASTMARK_SAVE(); | 928 | 153M | if (state->repeat) | 929 | 119M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 153M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 58.3M | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 58.3M | for (;;) { | 936 | 139M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 139M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 81.0M | ptr--; | 939 | 81.0M | ctx->count--; | 940 | 81.0M | } | 941 | 58.3M | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 50.5M | break; | 943 | 7.82M | state->ptr = ptr; | 944 | 7.82M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 7.82M | pattern+pattern[0]); | 946 | 7.82M | if (ret) { | 947 | 7.82M | if (state->repeat) | 948 | 7.82M | MARK_POP_DISCARD(ctx->lastmark); | 949 | 7.82M | RETURN_ON_ERROR(ret); | 950 | 7.82M | RETURN_SUCCESS; | 951 | 7.82M | } | 952 | 224 | if (state->repeat) | 953 | 224 | MARK_POP_KEEP(ctx->lastmark); | 954 | 224 | LASTMARK_RESTORE(); | 955 | | | 956 | 224 | ptr--; | 957 | 224 | ctx->count--; | 958 | 224 | } | 959 | 50.5M | if (state->repeat) | 960 | 50.5M | MARK_POP_DISCARD(ctx->lastmark); | 961 | 94.7M | } else { | 962 | | /* general case */ | 963 | 95.5M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 95.4M | state->ptr = ptr; | 965 | 95.4M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 95.4M | pattern+pattern[0]); | 967 | 95.4M | if (ret) { | 968 | 94.6M | if (state->repeat) | 969 | 60.8M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 94.6M | RETURN_ON_ERROR(ret); | 971 | 94.6M | RETURN_SUCCESS; | 972 | 94.6M | } | 973 | 820k | if (state->repeat) | 974 | 170k | MARK_POP_KEEP(ctx->lastmark); | 975 | 820k | LASTMARK_RESTORE(); | 976 | | | 977 | 820k | ptr--; | 978 | 820k | ctx->count--; | 979 | 820k | } | 980 | 86.1k | if (state->repeat) | 981 | 85.4k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 86.1k | } | 983 | 50.6M | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 81.8M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 81.8M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 81.8M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 81.8M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 81.8M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 81.8M | ctx->u.rep->count = -1; | 1131 | 81.8M | ctx->u.rep->pattern = pattern; | 1132 | 81.8M | ctx->u.rep->prev = state->repeat; | 1133 | 81.8M | ctx->u.rep->last_ptr = NULL; | 1134 | 81.8M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 81.8M | state->ptr = ptr; | 1137 | 81.8M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 81.8M | state->repeat = ctx->u.rep->prev; | 1139 | 81.8M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 81.8M | if (ret) { | 1142 | 81.8M | RETURN_ON_ERROR(ret); | 1143 | 81.8M | RETURN_SUCCESS; | 1144 | 81.8M | } | 1145 | 751 | RETURN_FAILURE; | 1146 | | | 1147 | 150M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 150M | ctx->u.rep = state->repeat; | 1155 | 150M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 150M | state->ptr = ptr; | 1159 | | | 1160 | 150M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 150M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 150M | ptr, ctx->count)); | 1164 | | | 1165 | 150M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 150M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 150M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 150M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 145M | ctx->u.rep->count = ctx->count; | 1185 | 145M | LASTMARK_SAVE(); | 1186 | 145M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 145M | LAST_PTR_PUSH(); | 1189 | 145M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 145M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 145M | ctx->u.rep->pattern+3); | 1192 | 145M | LAST_PTR_POP(); | 1193 | 145M | if (ret) { | 1194 | 68.1M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 68.1M | RETURN_ON_ERROR(ret); | 1196 | 68.1M | RETURN_SUCCESS; | 1197 | 68.1M | } | 1198 | 77.7M | MARK_POP(ctx->lastmark); | 1199 | 77.7M | LASTMARK_RESTORE(); | 1200 | 77.7M | ctx->u.rep->count = ctx->count-1; | 1201 | 77.7M | state->ptr = ptr; | 1202 | 77.7M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 81.9M | state->repeat = ctx->u.rep->prev; | 1207 | 81.9M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 81.9M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 81.9M | RETURN_ON_SUCCESS(ret); | 1211 | 85.9k | state->ptr = ptr; | 1212 | 85.9k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 55.3M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 55.3M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 55.3M | ptr, pattern[1])); | 1565 | 55.3M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 55.3M | state->ptr = ptr - pattern[1]; | 1568 | 55.3M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 55.3M | RETURN_ON_FAILURE(ret); | 1570 | 54.7M | pattern += pattern[0]; | 1571 | 54.7M | DISPATCH; | 1572 | | | 1573 | 54.7M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 18.9M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 18.9M | ptr, pattern[1])); | 1578 | 18.9M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 18.9M | state->ptr = ptr - pattern[1]; | 1580 | 18.9M | LASTMARK_SAVE(); | 1581 | 18.9M | if (state->repeat) | 1582 | 18.9M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 37.9M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 37.9M | if (ret) { | 1586 | 16.1k | if (state->repeat) | 1587 | 16.1k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 16.1k | RETURN_ON_ERROR(ret); | 1589 | 16.1k | RETURN_FAILURE; | 1590 | 16.1k | } | 1591 | 18.9M | if (state->repeat) | 1592 | 18.9M | MARK_POP(ctx->lastmark); | 1593 | 18.9M | LASTMARK_RESTORE(); | 1594 | 18.9M | } | 1595 | 18.9M | pattern += pattern[0]; | 1596 | 18.9M | DISPATCH; | 1597 | | | 1598 | 18.9M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 639M | exit: | 1620 | 639M | ctx_pos = ctx->last_ctx_pos; | 1621 | 639M | jump = ctx->jump; | 1622 | 639M | DATA_POP_DISCARD(ctx); | 1623 | 639M | if (ctx_pos == -1) { | 1624 | 108M | state->sigcount = sigcount; | 1625 | 108M | return ret; | 1626 | 108M | } | 1627 | 531M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 531M | switch (jump) { | 1630 | 145M | case JUMP_MAX_UNTIL_2: | 1631 | 145M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 145M | goto jump_max_until_2; | 1633 | 81.9M | case JUMP_MAX_UNTIL_3: | 1634 | 81.9M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 81.9M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 44.1M | case JUMP_BRANCH: | 1643 | 44.1M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 44.1M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 81.8M | case JUMP_REPEAT: | 1658 | 81.8M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 81.8M | goto jump_repeat; | 1660 | 7.82M | case JUMP_REPEAT_ONE_1: | 1661 | 7.82M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 7.82M | goto jump_repeat_one_1; | 1663 | 95.4M | case JUMP_REPEAT_ONE_2: | 1664 | 95.4M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 95.4M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 55.3M | case JUMP_ASSERT: | 1673 | 55.3M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 55.3M | goto jump_assert; | 1675 | 18.9M | case JUMP_ASSERT_NOT: | 1676 | 18.9M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 18.9M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 531M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 531M | } |
|
1686 | | |
1687 | | /* need to reset capturing groups between two SRE(match) callings in loops */ |
1688 | | #define RESET_CAPTURE_GROUP() \ |
1689 | 375M | do { state->lastmark = state->lastindex = -1; } while (0) |
1690 | | |
1691 | | LOCAL(Py_ssize_t) |
1692 | | SRE(search)(SRE_STATE* state, SRE_CODE* pattern) |
1693 | 87.1M | { |
1694 | 87.1M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; |
1695 | 87.1M | SRE_CHAR* end = (SRE_CHAR *)state->end; |
1696 | 87.1M | Py_ssize_t status = 0; |
1697 | 87.1M | Py_ssize_t prefix_len = 0; |
1698 | 87.1M | Py_ssize_t prefix_skip = 0; |
1699 | 87.1M | SRE_CODE* prefix = NULL; |
1700 | 87.1M | SRE_CODE* charset = NULL; |
1701 | 87.1M | SRE_CODE* overlap = NULL; |
1702 | 87.1M | int flags = 0; |
1703 | 87.1M | INIT_TRACE(state); |
1704 | | |
1705 | 87.1M | if (ptr > end) |
1706 | 0 | return 0; |
1707 | | |
1708 | 87.1M | if (pattern[0] == SRE_OP_INFO) { |
1709 | | /* optimization info block */ |
1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ |
1711 | | |
1712 | 87.1M | flags = pattern[2]; |
1713 | | |
1714 | 87.1M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { |
1715 | 1.67M | TRACE(("reject (got %tu chars, need %zu)\n", |
1716 | 1.67M | end - ptr, (size_t) pattern[3])); |
1717 | 1.67M | return 0; |
1718 | 1.67M | } |
1719 | 85.4M | if (pattern[3] > 1) { |
1720 | | /* adjust end point (but make sure we leave at least one |
1721 | | character in there, so literal search will work) */ |
1722 | 7.21M | end -= pattern[3] - 1; |
1723 | 7.21M | if (end <= ptr) |
1724 | 0 | end = ptr; |
1725 | 7.21M | } |
1726 | | |
1727 | 85.4M | if (flags & SRE_INFO_PREFIX) { |
1728 | | /* pattern starts with a known prefix */ |
1729 | | /* <length> <skip> <prefix data> <overlap data> */ |
1730 | 7.21M | prefix_len = pattern[5]; |
1731 | 7.21M | prefix_skip = pattern[6]; |
1732 | 7.21M | prefix = pattern + 7; |
1733 | 7.21M | overlap = prefix + prefix_len - 1; |
1734 | 78.2M | } else if (flags & SRE_INFO_CHARSET) |
1735 | | /* pattern starts with a character from a known set */ |
1736 | | /* <charset> */ |
1737 | 74.1M | charset = pattern + 5; |
1738 | | |
1739 | 85.4M | pattern += 1 + pattern[1]; |
1740 | 85.4M | } |
1741 | | |
1742 | 85.4M | TRACE(("prefix = %p %zd %zd\n", |
1743 | 85.4M | prefix, prefix_len, prefix_skip)); |
1744 | 85.4M | TRACE(("charset = %p\n", charset)); |
1745 | | |
1746 | 85.4M | if (prefix_len == 1) { |
1747 | | /* pattern starts with a literal character */ |
1748 | 6.72M | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1749 | | #if SIZEOF_SRE_CHAR < 4 |
1750 | 3.86M | if ((SRE_CODE) c != prefix[0]) |
1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1752 | 3.86M | #endif |
1753 | 3.86M | end = (SRE_CHAR *)state->end; |
1754 | 3.86M | state->must_advance = 0; |
1755 | 7.39M | while (ptr < end) { |
1756 | 101M | while (*ptr != c) { |
1757 | 94.8M | if (++ptr >= end) |
1758 | 511k | return 0; |
1759 | 94.8M | } |
1760 | 6.87M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); |
1761 | 6.87M | state->start = ptr; |
1762 | 6.87M | state->ptr = ptr + prefix_skip; |
1763 | 6.87M | if (flags & SRE_INFO_LITERAL) |
1764 | 4.39k | return 1; /* we got all of it */ |
1765 | 6.86M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1766 | 6.86M | if (status != 0) |
1767 | 6.20M | return status; |
1768 | 664k | ++ptr; |
1769 | 664k | RESET_CAPTURE_GROUP(); |
1770 | 664k | } |
1771 | 10.0k | return 0; |
1772 | 3.86M | } |
1773 | | |
1774 | 78.7M | if (prefix_len > 1) { |
1775 | | /* pattern starts with a known prefix. use the overlap |
1776 | | table to skip forward as fast as we possibly can */ |
1777 | 487k | Py_ssize_t i = 0; |
1778 | | |
1779 | 487k | end = (SRE_CHAR *)state->end; |
1780 | 487k | if (prefix_len > end - ptr) |
1781 | 0 | return 0; |
1782 | | #if SIZEOF_SRE_CHAR < 4 |
1783 | 1.44M | for (i = 0; i < prefix_len; i++) |
1784 | 966k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) |
1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1786 | 483k | #endif |
1787 | 1.42M | while (ptr < end) { |
1788 | 1.42M | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1789 | 9.72M | while (*ptr++ != c) { |
1790 | 8.30M | if (ptr >= end) |
1791 | 296 | return 0; |
1792 | 8.30M | } |
1793 | 1.42M | if (ptr >= end) |
1794 | 46 | return 0; |
1795 | | |
1796 | 1.42M | i = 1; |
1797 | 1.42M | state->must_advance = 0; |
1798 | 1.42M | do { |
1799 | 1.42M | if (*ptr == (SRE_CHAR) prefix[i]) { |
1800 | 1.26M | if (++i != prefix_len) { |
1801 | 0 | if (++ptr >= end) |
1802 | 0 | return 0; |
1803 | 0 | continue; |
1804 | 0 | } |
1805 | | /* found a potential match */ |
1806 | 1.26M | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); |
1807 | 1.26M | state->start = ptr - (prefix_len - 1); |
1808 | 1.26M | state->ptr = ptr - (prefix_len - prefix_skip - 1); |
1809 | 1.26M | if (flags & SRE_INFO_LITERAL) |
1810 | 0 | return 1; /* we got all of it */ |
1811 | 1.26M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1812 | 1.26M | if (status != 0) |
1813 | 487k | return status; |
1814 | | /* close but no cigar -- try again */ |
1815 | 776k | if (++ptr >= end) |
1816 | 26 | return 0; |
1817 | 776k | RESET_CAPTURE_GROUP(); |
1818 | 776k | } |
1819 | 936k | i = overlap[i]; |
1820 | 936k | } while (i != 0); |
1821 | 1.42M | } |
1822 | 0 | return 0; |
1823 | 487k | } |
1824 | | |
1825 | 78.2M | if (charset) { |
1826 | | /* pattern starts with a character from a known set */ |
1827 | 74.1M | end = (SRE_CHAR *)state->end; |
1828 | 74.1M | state->must_advance = 0; |
1829 | 77.1M | for (;;) { |
1830 | 349M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) |
1831 | 271M | ptr++; |
1832 | 77.1M | if (ptr >= end) |
1833 | 3.73M | return 0; |
1834 | 73.4M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); |
1835 | 73.4M | state->start = ptr; |
1836 | 73.4M | state->ptr = ptr; |
1837 | 73.4M | status = SRE(match)(state, pattern, 0); |
1838 | 73.4M | if (status != 0) |
1839 | 70.4M | break; |
1840 | 3.01M | ptr++; |
1841 | 3.01M | RESET_CAPTURE_GROUP(); |
1842 | 3.01M | } |
1843 | 74.1M | } else { |
1844 | | /* general case */ |
1845 | 4.06M | assert(ptr <= end); |
1846 | 4.06M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1847 | 4.06M | state->start = state->ptr = ptr; |
1848 | 4.06M | status = SRE(match)(state, pattern, 1); |
1849 | 4.06M | state->must_advance = 0; |
1850 | 4.06M | if (status == 0 && pattern[0] == SRE_OP_AT && |
1851 | 4.06M | (pattern[1] == SRE_AT_BEGINNING || |
1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) |
1853 | 0 | { |
1854 | 0 | state->start = state->ptr = ptr = end; |
1855 | 0 | return 0; |
1856 | 0 | } |
1857 | 374M | while (status == 0 && ptr < end) { |
1858 | 370M | ptr++; |
1859 | 370M | RESET_CAPTURE_GROUP(); |
1860 | 370M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1861 | 370M | state->start = state->ptr = ptr; |
1862 | 370M | status = SRE(match)(state, pattern, 0); |
1863 | 370M | } |
1864 | 4.06M | } |
1865 | | |
1866 | 74.4M | return status; |
1867 | 78.2M | } Line | Count | Source | 1693 | 34.9M | { | 1694 | 34.9M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 34.9M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 34.9M | Py_ssize_t status = 0; | 1697 | 34.9M | Py_ssize_t prefix_len = 0; | 1698 | 34.9M | Py_ssize_t prefix_skip = 0; | 1699 | 34.9M | SRE_CODE* prefix = NULL; | 1700 | 34.9M | SRE_CODE* charset = NULL; | 1701 | 34.9M | SRE_CODE* overlap = NULL; | 1702 | 34.9M | int flags = 0; | 1703 | 34.9M | INIT_TRACE(state); | 1704 | | | 1705 | 34.9M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 34.9M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 34.9M | flags = pattern[2]; | 1713 | | | 1714 | 34.9M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 1.54M | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 1.54M | end - ptr, (size_t) pattern[3])); | 1717 | 1.54M | return 0; | 1718 | 1.54M | } | 1719 | 33.4M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 2.31M | end -= pattern[3] - 1; | 1723 | 2.31M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 2.31M | } | 1726 | | | 1727 | 33.4M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 2.31M | prefix_len = pattern[5]; | 1731 | 2.31M | prefix_skip = pattern[6]; | 1732 | 2.31M | prefix = pattern + 7; | 1733 | 2.31M | overlap = prefix + prefix_len - 1; | 1734 | 31.1M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 28.1M | charset = pattern + 5; | 1738 | | | 1739 | 33.4M | pattern += 1 + pattern[1]; | 1740 | 33.4M | } | 1741 | | | 1742 | 33.4M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 33.4M | prefix, prefix_len, prefix_skip)); | 1744 | 33.4M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 33.4M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 2.30M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | 2.30M | #if SIZEOF_SRE_CHAR < 4 | 1750 | 2.30M | if ((SRE_CODE) c != prefix[0]) | 1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | 2.30M | #endif | 1753 | 2.30M | end = (SRE_CHAR *)state->end; | 1754 | 2.30M | state->must_advance = 0; | 1755 | 2.48M | while (ptr < end) { | 1756 | 27.3M | while (*ptr != c) { | 1757 | 25.2M | if (++ptr >= end) | 1758 | 441k | return 0; | 1759 | 25.2M | } | 1760 | 2.03M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 2.03M | state->start = ptr; | 1762 | 2.03M | state->ptr = ptr + prefix_skip; | 1763 | 2.03M | if (flags & SRE_INFO_LITERAL) | 1764 | 337 | return 1; /* we got all of it */ | 1765 | 2.03M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 2.03M | if (status != 0) | 1767 | 1.85M | return status; | 1768 | 182k | ++ptr; | 1769 | 182k | RESET_CAPTURE_GROUP(); | 1770 | 182k | } | 1771 | 7.72k | return 0; | 1772 | 2.30M | } | 1773 | | | 1774 | 31.1M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 11.8k | Py_ssize_t i = 0; | 1778 | | | 1779 | 11.8k | end = (SRE_CHAR *)state->end; | 1780 | 11.8k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | 11.8k | #if SIZEOF_SRE_CHAR < 4 | 1783 | 35.4k | for (i = 0; i < prefix_len; i++) | 1784 | 23.6k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | 11.8k | #endif | 1787 | 276k | while (ptr < end) { | 1788 | 276k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 2.04M | while (*ptr++ != c) { | 1790 | 1.77M | if (ptr >= end) | 1791 | 60 | return 0; | 1792 | 1.77M | } | 1793 | 276k | if (ptr >= end) | 1794 | 23 | return 0; | 1795 | | | 1796 | 276k | i = 1; | 1797 | 276k | state->must_advance = 0; | 1798 | 276k | do { | 1799 | 276k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 206k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 206k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 206k | state->start = ptr - (prefix_len - 1); | 1808 | 206k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 206k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 206k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 206k | if (status != 0) | 1813 | 11.7k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 194k | if (++ptr >= end) | 1816 | 9 | return 0; | 1817 | 194k | RESET_CAPTURE_GROUP(); | 1818 | 194k | } | 1819 | 265k | i = overlap[i]; | 1820 | 265k | } while (i != 0); | 1821 | 276k | } | 1822 | 0 | return 0; | 1823 | 11.8k | } | 1824 | | | 1825 | 31.1M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 28.1M | end = (SRE_CHAR *)state->end; | 1828 | 28.1M | state->must_advance = 0; | 1829 | 30.2M | for (;;) { | 1830 | 81.2M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 50.9M | ptr++; | 1832 | 30.2M | if (ptr >= end) | 1833 | 2.68M | return 0; | 1834 | 27.6M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 27.6M | state->start = ptr; | 1836 | 27.6M | state->ptr = ptr; | 1837 | 27.6M | status = SRE(match)(state, pattern, 0); | 1838 | 27.6M | if (status != 0) | 1839 | 25.5M | break; | 1840 | 2.10M | ptr++; | 1841 | 2.10M | RESET_CAPTURE_GROUP(); | 1842 | 2.10M | } | 1843 | 28.1M | } else { | 1844 | | /* general case */ | 1845 | 2.93M | assert(ptr <= end); | 1846 | 2.93M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 2.93M | state->start = state->ptr = ptr; | 1848 | 2.93M | status = SRE(match)(state, pattern, 1); | 1849 | 2.93M | state->must_advance = 0; | 1850 | 2.93M | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 2.93M | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 109M | while (status == 0 && ptr < end) { | 1858 | 106M | ptr++; | 1859 | 106M | RESET_CAPTURE_GROUP(); | 1860 | 106M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 106M | state->start = state->ptr = ptr; | 1862 | 106M | status = SRE(match)(state, pattern, 0); | 1863 | 106M | } | 1864 | 2.93M | } | 1865 | | | 1866 | 28.4M | return status; | 1867 | 31.1M | } |
Line | Count | Source | 1693 | 45.0M | { | 1694 | 45.0M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 45.0M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 45.0M | Py_ssize_t status = 0; | 1697 | 45.0M | Py_ssize_t prefix_len = 0; | 1698 | 45.0M | Py_ssize_t prefix_skip = 0; | 1699 | 45.0M | SRE_CODE* prefix = NULL; | 1700 | 45.0M | SRE_CODE* charset = NULL; | 1701 | 45.0M | SRE_CODE* overlap = NULL; | 1702 | 45.0M | int flags = 0; | 1703 | 45.0M | INIT_TRACE(state); | 1704 | | | 1705 | 45.0M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 45.0M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 45.0M | flags = pattern[2]; | 1713 | | | 1714 | 45.0M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 116k | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 116k | end - ptr, (size_t) pattern[3])); | 1717 | 116k | return 0; | 1718 | 116k | } | 1719 | 44.9M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 2.02M | end -= pattern[3] - 1; | 1723 | 2.02M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 2.02M | } | 1726 | | | 1727 | 44.9M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 2.03M | prefix_len = pattern[5]; | 1731 | 2.03M | prefix_skip = pattern[6]; | 1732 | 2.03M | prefix = pattern + 7; | 1733 | 2.03M | overlap = prefix + prefix_len - 1; | 1734 | 42.9M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 41.9M | charset = pattern + 5; | 1738 | | | 1739 | 44.9M | pattern += 1 + pattern[1]; | 1740 | 44.9M | } | 1741 | | | 1742 | 44.9M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 44.9M | prefix, prefix_len, prefix_skip)); | 1744 | 44.9M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 44.9M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 1.55M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | 1.55M | #if SIZEOF_SRE_CHAR < 4 | 1750 | 1.55M | if ((SRE_CODE) c != prefix[0]) | 1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | 1.55M | #endif | 1753 | 1.55M | end = (SRE_CHAR *)state->end; | 1754 | 1.55M | state->must_advance = 0; | 1755 | 1.74M | while (ptr < end) { | 1756 | 50.2M | while (*ptr != c) { | 1757 | 48.5M | if (++ptr >= end) | 1758 | 65.3k | return 0; | 1759 | 48.5M | } | 1760 | 1.68M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 1.68M | state->start = ptr; | 1762 | 1.68M | state->ptr = ptr + prefix_skip; | 1763 | 1.68M | if (flags & SRE_INFO_LITERAL) | 1764 | 1.38k | return 1; /* we got all of it */ | 1765 | 1.68M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 1.68M | if (status != 0) | 1767 | 1.49M | return status; | 1768 | 190k | ++ptr; | 1769 | 190k | RESET_CAPTURE_GROUP(); | 1770 | 190k | } | 1771 | 1.39k | return 0; | 1772 | 1.55M | } | 1773 | | | 1774 | 43.3M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 471k | Py_ssize_t i = 0; | 1778 | | | 1779 | 471k | end = (SRE_CHAR *)state->end; | 1780 | 471k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | 471k | #if SIZEOF_SRE_CHAR < 4 | 1783 | 1.41M | for (i = 0; i < prefix_len; i++) | 1784 | 942k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | 471k | #endif | 1787 | 946k | while (ptr < end) { | 1788 | 946k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 4.65M | while (*ptr++ != c) { | 1790 | 3.70M | if (ptr >= end) | 1791 | 115 | return 0; | 1792 | 3.70M | } | 1793 | 946k | if (ptr >= end) | 1794 | 11 | return 0; | 1795 | | | 1796 | 946k | i = 1; | 1797 | 946k | state->must_advance = 0; | 1798 | 946k | do { | 1799 | 946k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 865k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 865k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 865k | state->start = ptr - (prefix_len - 1); | 1808 | 865k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 865k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 865k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 865k | if (status != 0) | 1813 | 471k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 393k | if (++ptr >= end) | 1816 | 12 | return 0; | 1817 | 393k | RESET_CAPTURE_GROUP(); | 1818 | 393k | } | 1819 | 475k | i = overlap[i]; | 1820 | 475k | } while (i != 0); | 1821 | 946k | } | 1822 | 0 | return 0; | 1823 | 471k | } | 1824 | | | 1825 | 42.9M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 41.9M | end = (SRE_CHAR *)state->end; | 1828 | 41.9M | state->must_advance = 0; | 1829 | 42.4M | for (;;) { | 1830 | 190M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 147M | ptr++; | 1832 | 42.4M | if (ptr >= end) | 1833 | 1.00M | return 0; | 1834 | 41.4M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 41.4M | state->start = ptr; | 1836 | 41.4M | state->ptr = ptr; | 1837 | 41.4M | status = SRE(match)(state, pattern, 0); | 1838 | 41.4M | if (status != 0) | 1839 | 40.9M | break; | 1840 | 423k | ptr++; | 1841 | 423k | RESET_CAPTURE_GROUP(); | 1842 | 423k | } | 1843 | 41.9M | } else { | 1844 | | /* general case */ | 1845 | 927k | assert(ptr <= end); | 1846 | 927k | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 927k | state->start = state->ptr = ptr; | 1848 | 927k | status = SRE(match)(state, pattern, 1); | 1849 | 927k | state->must_advance = 0; | 1850 | 927k | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 927k | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 193M | while (status == 0 && ptr < end) { | 1858 | 192M | ptr++; | 1859 | 192M | RESET_CAPTURE_GROUP(); | 1860 | 192M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 192M | state->start = state->ptr = ptr; | 1862 | 192M | status = SRE(match)(state, pattern, 0); | 1863 | 192M | } | 1864 | 927k | } | 1865 | | | 1866 | 41.9M | return status; | 1867 | 42.9M | } |
Line | Count | Source | 1693 | 7.04M | { | 1694 | 7.04M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 7.04M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 7.04M | Py_ssize_t status = 0; | 1697 | 7.04M | Py_ssize_t prefix_len = 0; | 1698 | 7.04M | Py_ssize_t prefix_skip = 0; | 1699 | 7.04M | SRE_CODE* prefix = NULL; | 1700 | 7.04M | SRE_CODE* charset = NULL; | 1701 | 7.04M | SRE_CODE* overlap = NULL; | 1702 | 7.04M | int flags = 0; | 1703 | 7.04M | INIT_TRACE(state); | 1704 | | | 1705 | 7.04M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 7.04M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 7.04M | flags = pattern[2]; | 1713 | | | 1714 | 7.04M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 6.39k | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 6.39k | end - ptr, (size_t) pattern[3])); | 1717 | 6.39k | return 0; | 1718 | 6.39k | } | 1719 | 7.03M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 2.86M | end -= pattern[3] - 1; | 1723 | 2.86M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 2.86M | } | 1726 | | | 1727 | 7.03M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 2.86M | prefix_len = pattern[5]; | 1731 | 2.86M | prefix_skip = pattern[6]; | 1732 | 2.86M | prefix = pattern + 7; | 1733 | 2.86M | overlap = prefix + prefix_len - 1; | 1734 | 4.16M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 3.97M | charset = pattern + 5; | 1738 | | | 1739 | 7.03M | pattern += 1 + pattern[1]; | 1740 | 7.03M | } | 1741 | | | 1742 | 7.03M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 7.03M | prefix, prefix_len, prefix_skip)); | 1744 | 7.03M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 7.03M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 2.86M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | | #if SIZEOF_SRE_CHAR < 4 | 1750 | | if ((SRE_CODE) c != prefix[0]) | 1751 | | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | | #endif | 1753 | 2.86M | end = (SRE_CHAR *)state->end; | 1754 | 2.86M | state->must_advance = 0; | 1755 | 3.15M | while (ptr < end) { | 1756 | 24.1M | while (*ptr != c) { | 1757 | 21.0M | if (++ptr >= end) | 1758 | 4.47k | return 0; | 1759 | 21.0M | } | 1760 | 3.14M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 3.14M | state->start = ptr; | 1762 | 3.14M | state->ptr = ptr + prefix_skip; | 1763 | 3.14M | if (flags & SRE_INFO_LITERAL) | 1764 | 2.66k | return 1; /* we got all of it */ | 1765 | 3.14M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 3.14M | if (status != 0) | 1767 | 2.85M | return status; | 1768 | 291k | ++ptr; | 1769 | 291k | RESET_CAPTURE_GROUP(); | 1770 | 291k | } | 1771 | 907 | return 0; | 1772 | 2.86M | } | 1773 | | | 1774 | 4.17M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 4.37k | Py_ssize_t i = 0; | 1778 | | | 1779 | 4.37k | end = (SRE_CHAR *)state->end; | 1780 | 4.37k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | | #if SIZEOF_SRE_CHAR < 4 | 1783 | | for (i = 0; i < prefix_len; i++) | 1784 | | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | | #endif | 1787 | 200k | while (ptr < end) { | 1788 | 200k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 3.02M | while (*ptr++ != c) { | 1790 | 2.82M | if (ptr >= end) | 1791 | 121 | return 0; | 1792 | 2.82M | } | 1793 | 200k | if (ptr >= end) | 1794 | 12 | return 0; | 1795 | | | 1796 | 200k | i = 1; | 1797 | 200k | state->must_advance = 0; | 1798 | 200k | do { | 1799 | 200k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 192k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 192k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 192k | state->start = ptr - (prefix_len - 1); | 1808 | 192k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 192k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 192k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 192k | if (status != 0) | 1813 | 4.23k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 187k | if (++ptr >= end) | 1816 | 5 | return 0; | 1817 | 187k | RESET_CAPTURE_GROUP(); | 1818 | 187k | } | 1819 | 196k | i = overlap[i]; | 1820 | 196k | } while (i != 0); | 1821 | 200k | } | 1822 | 0 | return 0; | 1823 | 4.37k | } | 1824 | | | 1825 | 4.16M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 3.97M | end = (SRE_CHAR *)state->end; | 1828 | 3.97M | state->must_advance = 0; | 1829 | 4.46M | for (;;) { | 1830 | 77.5M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 73.1M | ptr++; | 1832 | 4.46M | if (ptr >= end) | 1833 | 51.9k | return 0; | 1834 | 4.41M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 4.41M | state->start = ptr; | 1836 | 4.41M | state->ptr = ptr; | 1837 | 4.41M | status = SRE(match)(state, pattern, 0); | 1838 | 4.41M | if (status != 0) | 1839 | 3.91M | break; | 1840 | 492k | ptr++; | 1841 | 492k | RESET_CAPTURE_GROUP(); | 1842 | 492k | } | 1843 | 3.97M | } else { | 1844 | | /* general case */ | 1845 | 198k | assert(ptr <= end); | 1846 | 198k | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 198k | state->start = state->ptr = ptr; | 1848 | 198k | status = SRE(match)(state, pattern, 1); | 1849 | 198k | state->must_advance = 0; | 1850 | 198k | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 198k | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 72.2M | while (status == 0 && ptr < end) { | 1858 | 72.0M | ptr++; | 1859 | 72.0M | RESET_CAPTURE_GROUP(); | 1860 | 72.0M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 72.0M | state->start = state->ptr = ptr; | 1862 | 72.0M | status = SRE(match)(state, pattern, 0); | 1863 | 72.0M | } | 1864 | 198k | } | 1865 | | | 1866 | 4.11M | return status; | 1867 | 4.16M | } |
|
1868 | | |
1869 | | #undef SRE_CHAR |
1870 | | #undef SIZEOF_SRE_CHAR |
1871 | | #undef SRE |
1872 | | |
1873 | | /* vim:ts=4:sw=4:et |
1874 | | */ |