/src/cpython/Modules/_sre/sre_lib.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Secret Labs' Regular Expression Engine |
3 | | * |
4 | | * regular expression matching engine |
5 | | * |
6 | | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
7 | | * |
8 | | * See the sre.c file for information on usage and redistribution. |
9 | | */ |
10 | | |
11 | | /* String matching engine */ |
12 | | |
13 | | /* This file is included three times, with different character settings */ |
14 | | |
15 | | LOCAL(int) |
16 | | SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) |
17 | 13.2M | { |
18 | | /* check if pointer is at given position */ |
19 | | |
20 | 13.2M | Py_ssize_t thisp, thatp; |
21 | | |
22 | 13.2M | switch (at) { |
23 | | |
24 | 5.94M | case SRE_AT_BEGINNING: |
25 | 5.94M | case SRE_AT_BEGINNING_STRING: |
26 | 5.94M | return ((void*) ptr == state->beginning); |
27 | | |
28 | 0 | case SRE_AT_BEGINNING_LINE: |
29 | 0 | return ((void*) ptr == state->beginning || |
30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); |
31 | | |
32 | 4.61M | case SRE_AT_END: |
33 | 4.61M | return (((SRE_CHAR *)state->end - ptr == 1 && |
34 | 4.61M | SRE_IS_LINEBREAK((int) ptr[0])) || |
35 | 4.61M | ((void*) ptr == state->end)); |
36 | | |
37 | 0 | case SRE_AT_END_LINE: |
38 | 0 | return ((void*) ptr == state->end || |
39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); |
40 | | |
41 | 2.73M | case SRE_AT_END_STRING: |
42 | 2.73M | return ((void*) ptr == state->end); |
43 | | |
44 | 0 | case SRE_AT_BOUNDARY: |
45 | 0 | thatp = ((void*) ptr > state->beginning) ? |
46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
47 | 0 | thisp = ((void*) ptr < state->end) ? |
48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
49 | 0 | return thisp != thatp; |
50 | | |
51 | 0 | case SRE_AT_NON_BOUNDARY: |
52 | 0 | thatp = ((void*) ptr > state->beginning) ? |
53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; |
54 | 0 | thisp = ((void*) ptr < state->end) ? |
55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; |
56 | 0 | return thisp == thatp; |
57 | | |
58 | 0 | case SRE_AT_LOC_BOUNDARY: |
59 | 0 | thatp = ((void*) ptr > state->beginning) ? |
60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
61 | 0 | thisp = ((void*) ptr < state->end) ? |
62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
63 | 0 | return thisp != thatp; |
64 | | |
65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: |
66 | 0 | thatp = ((void*) ptr > state->beginning) ? |
67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; |
68 | 0 | thisp = ((void*) ptr < state->end) ? |
69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; |
70 | 0 | return thisp == thatp; |
71 | | |
72 | 0 | case SRE_AT_UNI_BOUNDARY: |
73 | 0 | thatp = ((void*) ptr > state->beginning) ? |
74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
75 | 0 | thisp = ((void*) ptr < state->end) ? |
76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
77 | 0 | return thisp != thatp; |
78 | | |
79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: |
80 | 0 | thatp = ((void*) ptr > state->beginning) ? |
81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; |
82 | 0 | thisp = ((void*) ptr < state->end) ? |
83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; |
84 | 0 | return thisp == thatp; |
85 | | |
86 | 13.2M | } |
87 | | |
88 | 0 | return 0; |
89 | 13.2M | } Line | Count | Source | 17 | 11.6M | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 11.6M | Py_ssize_t thisp, thatp; | 21 | | | 22 | 11.6M | switch (at) { | 23 | | | 24 | 5.90M | case SRE_AT_BEGINNING: | 25 | 5.90M | case SRE_AT_BEGINNING_STRING: | 26 | 5.90M | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 3.90M | case SRE_AT_END: | 33 | 3.90M | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 3.90M | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 3.90M | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 1.83M | case SRE_AT_END_STRING: | 42 | 1.83M | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 11.6M | } | 87 | | | 88 | 0 | return 0; | 89 | 11.6M | } |
Line | Count | Source | 17 | 1.12M | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 1.12M | Py_ssize_t thisp, thatp; | 21 | | | 22 | 1.12M | switch (at) { | 23 | | | 24 | 33.8k | case SRE_AT_BEGINNING: | 25 | 33.8k | case SRE_AT_BEGINNING_STRING: | 26 | 33.8k | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 621k | case SRE_AT_END: | 33 | 621k | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 621k | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 621k | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 469k | case SRE_AT_END_STRING: | 42 | 469k | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 1.12M | } | 87 | | | 88 | 0 | return 0; | 89 | 1.12M | } |
Line | Count | Source | 17 | 517k | { | 18 | | /* check if pointer is at given position */ | 19 | | | 20 | 517k | Py_ssize_t thisp, thatp; | 21 | | | 22 | 517k | switch (at) { | 23 | | | 24 | 5.29k | case SRE_AT_BEGINNING: | 25 | 5.29k | case SRE_AT_BEGINNING_STRING: | 26 | 5.29k | return ((void*) ptr == state->beginning); | 27 | | | 28 | 0 | case SRE_AT_BEGINNING_LINE: | 29 | 0 | return ((void*) ptr == state->beginning || | 30 | 0 | SRE_IS_LINEBREAK((int) ptr[-1])); | 31 | | | 32 | 80.6k | case SRE_AT_END: | 33 | 80.6k | return (((SRE_CHAR *)state->end - ptr == 1 && | 34 | 80.6k | SRE_IS_LINEBREAK((int) ptr[0])) || | 35 | 80.6k | ((void*) ptr == state->end)); | 36 | | | 37 | 0 | case SRE_AT_END_LINE: | 38 | 0 | return ((void*) ptr == state->end || | 39 | 0 | SRE_IS_LINEBREAK((int) ptr[0])); | 40 | | | 41 | 431k | case SRE_AT_END_STRING: | 42 | 431k | return ((void*) ptr == state->end); | 43 | | | 44 | 0 | case SRE_AT_BOUNDARY: | 45 | 0 | thatp = ((void*) ptr > state->beginning) ? | 46 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 47 | 0 | thisp = ((void*) ptr < state->end) ? | 48 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 49 | 0 | return thisp != thatp; | 50 | | | 51 | 0 | case SRE_AT_NON_BOUNDARY: | 52 | 0 | thatp = ((void*) ptr > state->beginning) ? | 53 | 0 | SRE_IS_WORD((int) ptr[-1]) : 0; | 54 | 0 | thisp = ((void*) ptr < state->end) ? | 55 | 0 | SRE_IS_WORD((int) ptr[0]) : 0; | 56 | 0 | return thisp == thatp; | 57 | | | 58 | 0 | case SRE_AT_LOC_BOUNDARY: | 59 | 0 | thatp = ((void*) ptr > state->beginning) ? | 60 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 61 | 0 | thisp = ((void*) ptr < state->end) ? | 62 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 63 | 0 | return thisp != thatp; | 64 | | | 65 | 0 | case SRE_AT_LOC_NON_BOUNDARY: | 66 | 0 | thatp = ((void*) ptr > state->beginning) ? | 67 | 0 | SRE_LOC_IS_WORD((int) ptr[-1]) : 0; | 68 | 0 | thisp = ((void*) ptr < state->end) ? | 69 | 0 | SRE_LOC_IS_WORD((int) ptr[0]) : 0; | 70 | 0 | return thisp == thatp; | 71 | | | 72 | 0 | case SRE_AT_UNI_BOUNDARY: | 73 | 0 | thatp = ((void*) ptr > state->beginning) ? | 74 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 75 | 0 | thisp = ((void*) ptr < state->end) ? | 76 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 77 | 0 | return thisp != thatp; | 78 | | | 79 | 0 | case SRE_AT_UNI_NON_BOUNDARY: | 80 | 0 | thatp = ((void*) ptr > state->beginning) ? | 81 | 0 | SRE_UNI_IS_WORD((int) ptr[-1]) : 0; | 82 | 0 | thisp = ((void*) ptr < state->end) ? | 83 | 0 | SRE_UNI_IS_WORD((int) ptr[0]) : 0; | 84 | 0 | return thisp == thatp; | 85 | | | 86 | 517k | } | 87 | | | 88 | 0 | return 0; | 89 | 517k | } |
|
90 | | |
91 | | LOCAL(int) |
92 | | SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) |
93 | 1.89G | { |
94 | | /* check if character is a member of the given set */ |
95 | | |
96 | 1.89G | int ok = 1; |
97 | | |
98 | 4.38G | for (;;) { |
99 | 4.38G | switch (*set++) { |
100 | | |
101 | 1.26G | case SRE_OP_FAILURE: |
102 | 1.26G | return !ok; |
103 | | |
104 | 1.36G | case SRE_OP_LITERAL: |
105 | | /* <LITERAL> <code> */ |
106 | 1.36G | if (ch == set[0]) |
107 | 6.44M | return ok; |
108 | 1.35G | set++; |
109 | 1.35G | break; |
110 | | |
111 | 10.9M | case SRE_OP_CATEGORY: |
112 | | /* <CATEGORY> <code> */ |
113 | 10.9M | if (sre_category(set[0], (int) ch)) |
114 | 7.54M | return ok; |
115 | 3.44M | set++; |
116 | 3.44M | break; |
117 | | |
118 | 938M | case SRE_OP_CHARSET: |
119 | | /* <CHARSET> <bitmap> */ |
120 | 938M | if (ch < 256 && |
121 | 938M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) |
122 | 425M | return ok; |
123 | 513M | set += 256/SRE_CODE_BITS; |
124 | 513M | break; |
125 | | |
126 | 332M | case SRE_OP_RANGE: |
127 | | /* <RANGE> <lower> <upper> */ |
128 | 332M | if (set[0] <= ch && ch <= set[1]) |
129 | 197M | return ok; |
130 | 135M | set += 2; |
131 | 135M | break; |
132 | | |
133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: |
134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ |
135 | 0 | { |
136 | 0 | SRE_CODE uch; |
137 | | /* ch is already lower cased */ |
138 | 0 | if (set[0] <= ch && ch <= set[1]) |
139 | 0 | return ok; |
140 | 0 | uch = sre_upper_unicode(ch); |
141 | 0 | if (set[0] <= uch && uch <= set[1]) |
142 | 0 | return ok; |
143 | 0 | set += 2; |
144 | 0 | break; |
145 | 0 | } |
146 | | |
147 | 478M | case SRE_OP_NEGATE: |
148 | 478M | ok = !ok; |
149 | 478M | break; |
150 | | |
151 | 0 | case SRE_OP_BIGCHARSET: |
152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ |
153 | 0 | { |
154 | 0 | Py_ssize_t count, block; |
155 | 0 | count = *(set++); |
156 | |
|
157 | 0 | if (ch < 0x10000u) |
158 | 0 | block = ((unsigned char*)set)[ch >> 8]; |
159 | 0 | else |
160 | 0 | block = -1; |
161 | 0 | set += 256/sizeof(SRE_CODE); |
162 | 0 | if (block >=0 && |
163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & |
164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) |
165 | 0 | return ok; |
166 | 0 | set += count * (256/SRE_CODE_BITS); |
167 | 0 | break; |
168 | 0 | } |
169 | | |
170 | 0 | default: |
171 | | /* internal error -- there's not much we can do about it |
172 | | here, so let's just pretend it didn't match... */ |
173 | 0 | return 0; |
174 | 4.38G | } |
175 | 4.38G | } |
176 | 1.89G | } Line | Count | Source | 93 | 370M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 370M | int ok = 1; | 97 | | | 98 | 833M | for (;;) { | 99 | 833M | switch (*set++) { | 100 | | | 101 | 234M | case SRE_OP_FAILURE: | 102 | 234M | return !ok; | 103 | | | 104 | 327M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 327M | if (ch == set[0]) | 107 | 2.59M | return ok; | 108 | 324M | set++; | 109 | 324M | break; | 110 | | | 111 | 9.85M | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 9.85M | if (sre_category(set[0], (int) ch)) | 114 | 6.47M | return ok; | 115 | 3.38M | set++; | 116 | 3.38M | break; | 117 | | | 118 | 71.0M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 71.0M | if (ch < 256 && | 121 | 71.0M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 32.9M | return ok; | 123 | 38.0M | set += 256/SRE_CODE_BITS; | 124 | 38.0M | break; | 125 | | | 126 | 154M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 154M | if (set[0] <= ch && ch <= set[1]) | 129 | 94.3M | return ok; | 130 | 59.6M | set += 2; | 131 | 59.6M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 36.3M | case SRE_OP_NEGATE: | 148 | 36.3M | ok = !ok; | 149 | 36.3M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 833M | } | 175 | 833M | } | 176 | 370M | } |
Line | Count | Source | 93 | 806M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 806M | int ok = 1; | 97 | | | 98 | 1.97G | for (;;) { | 99 | 1.97G | switch (*set++) { | 100 | | | 101 | 591M | case SRE_OP_FAILURE: | 102 | 591M | return !ok; | 103 | | | 104 | 735M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 735M | if (ch == set[0]) | 107 | 1.56M | return ok; | 108 | 734M | set++; | 109 | 734M | break; | 110 | | | 111 | 168k | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 168k | if (sre_category(set[0], (int) ch)) | 114 | 147k | return ok; | 115 | 21.0k | set++; | 116 | 21.0k | break; | 117 | | | 118 | 314M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 314M | if (ch < 256 && | 121 | 314M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 121M | return ok; | 123 | 193M | set += 256/SRE_CODE_BITS; | 124 | 193M | break; | 125 | | | 126 | 154M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 154M | if (set[0] <= ch && ch <= set[1]) | 129 | 91.6M | return ok; | 130 | 62.7M | set += 2; | 131 | 62.7M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 175M | case SRE_OP_NEGATE: | 148 | 175M | ok = !ok; | 149 | 175M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 1.97G | } | 175 | 1.97G | } | 176 | 806M | } |
Line | Count | Source | 93 | 721M | { | 94 | | /* check if character is a member of the given set */ | 95 | | | 96 | 721M | int ok = 1; | 97 | | | 98 | 1.57G | for (;;) { | 99 | 1.57G | switch (*set++) { | 100 | | | 101 | 436M | case SRE_OP_FAILURE: | 102 | 436M | return !ok; | 103 | | | 104 | 298M | case SRE_OP_LITERAL: | 105 | | /* <LITERAL> <code> */ | 106 | 298M | if (ch == set[0]) | 107 | 2.28M | return ok; | 108 | 296M | set++; | 109 | 296M | break; | 110 | | | 111 | 966k | case SRE_OP_CATEGORY: | 112 | | /* <CATEGORY> <code> */ | 113 | 966k | if (sre_category(set[0], (int) ch)) | 114 | 926k | return ok; | 115 | 39.7k | set++; | 116 | 39.7k | break; | 117 | | | 118 | 552M | case SRE_OP_CHARSET: | 119 | | /* <CHARSET> <bitmap> */ | 120 | 552M | if (ch < 256 && | 121 | 552M | (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) | 122 | 270M | return ok; | 123 | 281M | set += 256/SRE_CODE_BITS; | 124 | 281M | break; | 125 | | | 126 | 24.6M | case SRE_OP_RANGE: | 127 | | /* <RANGE> <lower> <upper> */ | 128 | 24.6M | if (set[0] <= ch && ch <= set[1]) | 129 | 11.2M | return ok; | 130 | 13.3M | set += 2; | 131 | 13.3M | break; | 132 | | | 133 | 0 | case SRE_OP_RANGE_UNI_IGNORE: | 134 | | /* <RANGE_UNI_IGNORE> <lower> <upper> */ | 135 | 0 | { | 136 | 0 | SRE_CODE uch; | 137 | | /* ch is already lower cased */ | 138 | 0 | if (set[0] <= ch && ch <= set[1]) | 139 | 0 | return ok; | 140 | 0 | uch = sre_upper_unicode(ch); | 141 | 0 | if (set[0] <= uch && uch <= set[1]) | 142 | 0 | return ok; | 143 | 0 | set += 2; | 144 | 0 | break; | 145 | 0 | } | 146 | | | 147 | 266M | case SRE_OP_NEGATE: | 148 | 266M | ok = !ok; | 149 | 266M | break; | 150 | | | 151 | 0 | case SRE_OP_BIGCHARSET: | 152 | | /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ | 153 | 0 | { | 154 | 0 | Py_ssize_t count, block; | 155 | 0 | count = *(set++); | 156 | |
| 157 | 0 | if (ch < 0x10000u) | 158 | 0 | block = ((unsigned char*)set)[ch >> 8]; | 159 | 0 | else | 160 | 0 | block = -1; | 161 | 0 | set += 256/sizeof(SRE_CODE); | 162 | 0 | if (block >=0 && | 163 | 0 | (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & | 164 | 0 | (1u << (ch & (SRE_CODE_BITS-1))))) | 165 | 0 | return ok; | 166 | 0 | set += count * (256/SRE_CODE_BITS); | 167 | 0 | break; | 168 | 0 | } | 169 | | | 170 | 0 | default: | 171 | | /* internal error -- there's not much we can do about it | 172 | | here, so let's just pretend it didn't match... */ | 173 | 0 | return 0; | 174 | 1.57G | } | 175 | 1.57G | } | 176 | 721M | } |
|
177 | | |
178 | | LOCAL(int) |
179 | | SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) |
180 | 0 | { |
181 | 0 | SRE_CODE lo, up; |
182 | 0 | lo = sre_lower_locale(ch); |
183 | 0 | if (SRE(charset)(state, set, lo)) |
184 | 0 | return 1; |
185 | | |
186 | 0 | up = sre_upper_locale(ch); |
187 | 0 | return up != lo && SRE(charset)(state, set, up); |
188 | 0 | } Unexecuted instantiation: sre.c:sre_ucs1_charset_loc_ignore Unexecuted instantiation: sre.c:sre_ucs2_charset_loc_ignore Unexecuted instantiation: sre.c:sre_ucs4_charset_loc_ignore |
189 | | |
190 | | LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); |
191 | | |
192 | | LOCAL(Py_ssize_t) |
193 | | SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) |
194 | 720M | { |
195 | 720M | SRE_CODE chr; |
196 | 720M | SRE_CHAR c; |
197 | 720M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; |
198 | 720M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; |
199 | 720M | Py_ssize_t i; |
200 | 720M | INIT_TRACE(state); |
201 | | |
202 | | /* adjust end */ |
203 | 720M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) |
204 | 10.4M | end = ptr + maxcount; |
205 | | |
206 | 720M | switch (pattern[0]) { |
207 | | |
208 | 663M | case SRE_OP_IN: |
209 | | /* repeated set */ |
210 | 663M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); |
211 | 1.05G | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) |
212 | 394M | ptr++; |
213 | 663M | break; |
214 | | |
215 | 0 | case SRE_OP_ANY: |
216 | | /* repeated dot wildcard. */ |
217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); |
218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) |
219 | 0 | ptr++; |
220 | 0 | break; |
221 | | |
222 | 0 | case SRE_OP_ANY_ALL: |
223 | | /* repeated dot wildcard. skip to the end of the target |
224 | | string, and backtrack from there */ |
225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); |
226 | 0 | ptr = end; |
227 | 0 | break; |
228 | | |
229 | 52.5M | case SRE_OP_LITERAL: |
230 | | /* repeated literal */ |
231 | 52.5M | chr = pattern[1]; |
232 | 52.5M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); |
233 | 52.5M | c = (SRE_CHAR) chr; |
234 | | #if SIZEOF_SRE_CHAR < 4 |
235 | 50.4M | if ((SRE_CODE) c != chr) |
236 | 0 | ; /* literal can't match: doesn't fit in char width */ |
237 | 50.4M | else |
238 | 50.4M | #endif |
239 | 56.4M | while (ptr < end && *ptr == c) |
240 | 3.92M | ptr++; |
241 | 52.5M | break; |
242 | | |
243 | 0 | case SRE_OP_LITERAL_IGNORE: |
244 | | /* repeated literal */ |
245 | 0 | chr = pattern[1]; |
246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) |
248 | 0 | ptr++; |
249 | 0 | break; |
250 | | |
251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: |
252 | | /* repeated literal */ |
253 | 0 | chr = pattern[1]; |
254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) |
256 | 0 | ptr++; |
257 | 0 | break; |
258 | | |
259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: |
260 | | /* repeated literal */ |
261 | 0 | chr = pattern[1]; |
262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) |
264 | 0 | ptr++; |
265 | 0 | break; |
266 | | |
267 | 4.35M | case SRE_OP_NOT_LITERAL: |
268 | | /* repeated non-literal */ |
269 | 4.35M | chr = pattern[1]; |
270 | 4.35M | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); |
271 | 4.35M | c = (SRE_CHAR) chr; |
272 | | #if SIZEOF_SRE_CHAR < 4 |
273 | 1.05M | if ((SRE_CODE) c != chr) |
274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ |
275 | 1.05M | else |
276 | 1.05M | #endif |
277 | 40.1M | while (ptr < end && *ptr != c) |
278 | 35.7M | ptr++; |
279 | 4.35M | break; |
280 | | |
281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: |
282 | | /* repeated non-literal */ |
283 | 0 | chr = pattern[1]; |
284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); |
285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) |
286 | 0 | ptr++; |
287 | 0 | break; |
288 | | |
289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: |
290 | | /* repeated non-literal */ |
291 | 0 | chr = pattern[1]; |
292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); |
293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) |
294 | 0 | ptr++; |
295 | 0 | break; |
296 | | |
297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: |
298 | | /* repeated non-literal */ |
299 | 0 | chr = pattern[1]; |
300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); |
301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) |
302 | 0 | ptr++; |
303 | 0 | break; |
304 | | |
305 | 0 | default: |
306 | | /* repeated single character pattern */ |
307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); |
308 | 0 | while ((SRE_CHAR*) state->ptr < end) { |
309 | 0 | i = SRE(match)(state, pattern, 0); |
310 | 0 | if (i < 0) |
311 | 0 | return i; |
312 | 0 | if (!i) |
313 | 0 | break; |
314 | 0 | } |
315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, |
316 | 0 | (SRE_CHAR*) state->ptr - ptr)); |
317 | 0 | return (SRE_CHAR*) state->ptr - ptr; |
318 | 720M | } |
319 | | |
320 | 720M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, |
321 | 720M | ptr - (SRE_CHAR*) state->ptr)); |
322 | 720M | return ptr - (SRE_CHAR*) state->ptr; |
323 | 720M | } Line | Count | Source | 194 | 201M | { | 195 | 201M | SRE_CODE chr; | 196 | 201M | SRE_CHAR c; | 197 | 201M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 201M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 201M | Py_ssize_t i; | 200 | 201M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 201M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 1.48M | end = ptr + maxcount; | 205 | | | 206 | 201M | switch (pattern[0]) { | 207 | | | 208 | 156M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 156M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 257M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 101M | ptr++; | 213 | 156M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 44.6M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 44.6M | chr = pattern[1]; | 232 | 44.6M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 44.6M | c = (SRE_CHAR) chr; | 234 | 44.6M | #if SIZEOF_SRE_CHAR < 4 | 235 | 44.6M | if ((SRE_CODE) c != chr) | 236 | 0 | ; /* literal can't match: doesn't fit in char width */ | 237 | 44.6M | else | 238 | 44.6M | #endif | 239 | 46.4M | while (ptr < end && *ptr == c) | 240 | 1.77M | ptr++; | 241 | 44.6M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 202k | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 202k | chr = pattern[1]; | 270 | 202k | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 202k | c = (SRE_CHAR) chr; | 272 | 202k | #if SIZEOF_SRE_CHAR < 4 | 273 | 202k | if ((SRE_CODE) c != chr) | 274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | 202k | else | 276 | 202k | #endif | 277 | 368k | while (ptr < end && *ptr != c) | 278 | 165k | ptr++; | 279 | 202k | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 201M | } | 319 | | | 320 | 201M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 201M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 201M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 201M | } |
Line | Count | Source | 194 | 312M | { | 195 | 312M | SRE_CODE chr; | 196 | 312M | SRE_CHAR c; | 197 | 312M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 312M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 312M | Py_ssize_t i; | 200 | 312M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 312M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 4.77M | end = ptr + maxcount; | 205 | | | 206 | 312M | switch (pattern[0]) { | 207 | | | 208 | 305M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 305M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 438M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 132M | ptr++; | 213 | 305M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 5.83M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 5.83M | chr = pattern[1]; | 232 | 5.83M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 5.83M | c = (SRE_CHAR) chr; | 234 | 5.83M | #if SIZEOF_SRE_CHAR < 4 | 235 | 5.83M | if ((SRE_CODE) c != chr) | 236 | 0 | ; /* literal can't match: doesn't fit in char width */ | 237 | 5.83M | else | 238 | 5.83M | #endif | 239 | 7.70M | while (ptr < end && *ptr == c) | 240 | 1.86M | ptr++; | 241 | 5.83M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 848k | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 848k | chr = pattern[1]; | 270 | 848k | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 848k | c = (SRE_CHAR) chr; | 272 | 848k | #if SIZEOF_SRE_CHAR < 4 | 273 | 848k | if ((SRE_CODE) c != chr) | 274 | 0 | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | 848k | else | 276 | 848k | #endif | 277 | 22.8M | while (ptr < end && *ptr != c) | 278 | 21.9M | ptr++; | 279 | 848k | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 312M | } | 319 | | | 320 | 312M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 312M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 312M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 312M | } |
Line | Count | Source | 194 | 206M | { | 195 | 206M | SRE_CODE chr; | 196 | 206M | SRE_CHAR c; | 197 | 206M | const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; | 198 | 206M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 199 | 206M | Py_ssize_t i; | 200 | 206M | INIT_TRACE(state); | 201 | | | 202 | | /* adjust end */ | 203 | 206M | if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) | 204 | 4.15M | end = ptr + maxcount; | 205 | | | 206 | 206M | switch (pattern[0]) { | 207 | | | 208 | 201M | case SRE_OP_IN: | 209 | | /* repeated set */ | 210 | 201M | TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); | 211 | 362M | while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) | 212 | 160M | ptr++; | 213 | 201M | break; | 214 | | | 215 | 0 | case SRE_OP_ANY: | 216 | | /* repeated dot wildcard. */ | 217 | 0 | TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); | 218 | 0 | while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) | 219 | 0 | ptr++; | 220 | 0 | break; | 221 | | | 222 | 0 | case SRE_OP_ANY_ALL: | 223 | | /* repeated dot wildcard. skip to the end of the target | 224 | | string, and backtrack from there */ | 225 | 0 | TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); | 226 | 0 | ptr = end; | 227 | 0 | break; | 228 | | | 229 | 2.07M | case SRE_OP_LITERAL: | 230 | | /* repeated literal */ | 231 | 2.07M | chr = pattern[1]; | 232 | 2.07M | TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); | 233 | 2.07M | c = (SRE_CHAR) chr; | 234 | | #if SIZEOF_SRE_CHAR < 4 | 235 | | if ((SRE_CODE) c != chr) | 236 | | ; /* literal can't match: doesn't fit in char width */ | 237 | | else | 238 | | #endif | 239 | 2.35M | while (ptr < end && *ptr == c) | 240 | 277k | ptr++; | 241 | 2.07M | break; | 242 | | | 243 | 0 | case SRE_OP_LITERAL_IGNORE: | 244 | | /* repeated literal */ | 245 | 0 | chr = pattern[1]; | 246 | 0 | TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 247 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) | 248 | 0 | ptr++; | 249 | 0 | break; | 250 | | | 251 | 0 | case SRE_OP_LITERAL_UNI_IGNORE: | 252 | | /* repeated literal */ | 253 | 0 | chr = pattern[1]; | 254 | 0 | TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 255 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) | 256 | 0 | ptr++; | 257 | 0 | break; | 258 | | | 259 | 0 | case SRE_OP_LITERAL_LOC_IGNORE: | 260 | | /* repeated literal */ | 261 | 0 | chr = pattern[1]; | 262 | 0 | TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 263 | 0 | while (ptr < end && char_loc_ignore(chr, *ptr)) | 264 | 0 | ptr++; | 265 | 0 | break; | 266 | | | 267 | 3.30M | case SRE_OP_NOT_LITERAL: | 268 | | /* repeated non-literal */ | 269 | 3.30M | chr = pattern[1]; | 270 | 3.30M | TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); | 271 | 3.30M | c = (SRE_CHAR) chr; | 272 | | #if SIZEOF_SRE_CHAR < 4 | 273 | | if ((SRE_CODE) c != chr) | 274 | | ptr = end; /* literal can't match: doesn't fit in char width */ | 275 | | else | 276 | | #endif | 277 | 16.9M | while (ptr < end && *ptr != c) | 278 | 13.6M | ptr++; | 279 | 3.30M | break; | 280 | | | 281 | 0 | case SRE_OP_NOT_LITERAL_IGNORE: | 282 | | /* repeated non-literal */ | 283 | 0 | chr = pattern[1]; | 284 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); | 285 | 0 | while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) | 286 | 0 | ptr++; | 287 | 0 | break; | 288 | | | 289 | 0 | case SRE_OP_NOT_LITERAL_UNI_IGNORE: | 290 | | /* repeated non-literal */ | 291 | 0 | chr = pattern[1]; | 292 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); | 293 | 0 | while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) | 294 | 0 | ptr++; | 295 | 0 | break; | 296 | | | 297 | 0 | case SRE_OP_NOT_LITERAL_LOC_IGNORE: | 298 | | /* repeated non-literal */ | 299 | 0 | chr = pattern[1]; | 300 | 0 | TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); | 301 | 0 | while (ptr < end && !char_loc_ignore(chr, *ptr)) | 302 | 0 | ptr++; | 303 | 0 | break; | 304 | | | 305 | 0 | default: | 306 | | /* repeated single character pattern */ | 307 | 0 | TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); | 308 | 0 | while ((SRE_CHAR*) state->ptr < end) { | 309 | 0 | i = SRE(match)(state, pattern, 0); | 310 | 0 | if (i < 0) | 311 | 0 | return i; | 312 | 0 | if (!i) | 313 | 0 | break; | 314 | 0 | } | 315 | 0 | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 316 | 0 | (SRE_CHAR*) state->ptr - ptr)); | 317 | 0 | return (SRE_CHAR*) state->ptr - ptr; | 318 | 206M | } | 319 | | | 320 | 206M | TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, | 321 | 206M | ptr - (SRE_CHAR*) state->ptr)); | 322 | 206M | return ptr - (SRE_CHAR*) state->ptr; | 323 | 206M | } |
|
324 | | |
325 | | /* The macros below should be used to protect recursive SRE(match)() |
326 | | * calls that *failed* and do *not* return immediately (IOW, those |
327 | | * that will backtrack). Explaining: |
328 | | * |
329 | | * - Recursive SRE(match)() returned true: that's usually a success |
330 | | * (besides atypical cases like ASSERT_NOT), therefore there's no |
331 | | * reason to restore lastmark; |
332 | | * |
333 | | * - Recursive SRE(match)() returned false but the current SRE(match)() |
334 | | * is returning to the caller: If the current SRE(match)() is the |
335 | | * top function of the recursion, returning false will be a matching |
336 | | * failure, and it doesn't matter where lastmark is pointing to. |
337 | | * If it's *not* the top function, it will be a recursive SRE(match)() |
338 | | * failure by itself, and the calling SRE(match)() will have to deal |
339 | | * with the failure by the same rules explained here (it will restore |
340 | | * lastmark by itself if necessary); |
341 | | * |
342 | | * - Recursive SRE(match)() returned false, and will continue the |
343 | | * outside 'for' loop: must be protected when breaking, since the next |
344 | | * OP could potentially depend on lastmark; |
345 | | * |
346 | | * - Recursive SRE(match)() returned false, and will be called again |
347 | | * inside a local for/while loop: must be protected between each |
348 | | * loop iteration, since the recursive SRE(match)() could do anything, |
349 | | * and could potentially depend on lastmark. |
350 | | * |
351 | | * For more information, check the discussion at SF patch #712900. |
352 | | */ |
353 | | #define LASTMARK_SAVE() \ |
354 | 756M | do { \ |
355 | 756M | ctx->lastmark = state->lastmark; \ |
356 | 756M | ctx->lastindex = state->lastindex; \ |
357 | 756M | } while (0) |
358 | | #define LASTMARK_RESTORE() \ |
359 | 278M | do { \ |
360 | 278M | state->lastmark = ctx->lastmark; \ |
361 | 278M | state->lastindex = ctx->lastindex; \ |
362 | 278M | } while (0) |
363 | | |
364 | | #define LAST_PTR_PUSH() \ |
365 | 317M | do { \ |
366 | 317M | TRACE(("push last_ptr: %zd", \ |
367 | 317M | PTR_TO_INDEX(ctx->u.rep->last_ptr))); \ |
368 | 317M | DATA_PUSH(&ctx->u.rep->last_ptr); \ |
369 | 317M | } while (0) |
370 | | #define LAST_PTR_POP() \ |
371 | 317M | do { \ |
372 | 317M | DATA_POP(&ctx->u.rep->last_ptr); \ |
373 | 317M | TRACE(("pop last_ptr: %zd", \ |
374 | 317M | PTR_TO_INDEX(ctx->u.rep->last_ptr))); \ |
375 | 317M | } while (0) |
376 | | |
377 | 0 | #define RETURN_ERROR(i) do { return i; } while(0) |
378 | 772M | #define RETURN_FAILURE do { ret = 0; goto exit; } while(0) |
379 | 1.10G | #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) |
380 | | |
381 | | #define RETURN_ON_ERROR(i) \ |
382 | 1.66G | do { if (i < 0) RETURN_ERROR(i); } while (0) |
383 | | #define RETURN_ON_SUCCESS(i) \ |
384 | 181M | do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) |
385 | | #define RETURN_ON_FAILURE(i) \ |
386 | 122M | do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) |
387 | | |
388 | 1.87G | #define DATA_STACK_ALLOC(state, type, ptr) \ |
389 | 1.87G | do { \ |
390 | 1.87G | alloc_pos = state->data_stack_base; \ |
391 | 1.87G | TRACE(("allocating %s in %zd (%zd)\n", \ |
392 | 1.87G | Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ |
393 | 1.87G | if (sizeof(type) > state->data_stack_size - alloc_pos) { \ |
394 | 176M | int j = data_stack_grow(state, sizeof(type)); \ |
395 | 176M | if (j < 0) return j; \ |
396 | 176M | if (ctx_pos != -1) \ |
397 | 176M | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
398 | 176M | } \ |
399 | 1.87G | ptr = (type*)(state->data_stack+alloc_pos); \ |
400 | 1.87G | state->data_stack_base += sizeof(type); \ |
401 | 1.87G | } while (0) |
402 | | |
403 | 1.94G | #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ |
404 | 1.94G | do { \ |
405 | 1.94G | TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ |
406 | 1.94G | ptr = (type*)(state->data_stack+pos); \ |
407 | 1.94G | } while (0) |
408 | | |
409 | 686M | #define DATA_STACK_PUSH(state, data, size) \ |
410 | 686M | do { \ |
411 | 686M | TRACE(("copy data in %p to %zd (%zd)\n", \ |
412 | 686M | data, state->data_stack_base, size)); \ |
413 | 686M | if (size > state->data_stack_size - state->data_stack_base) { \ |
414 | 90.8k | int j = data_stack_grow(state, size); \ |
415 | 90.8k | if (j < 0) return j; \ |
416 | 90.8k | if (ctx_pos != -1) \ |
417 | 90.8k | DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ |
418 | 90.8k | } \ |
419 | 686M | memcpy(state->data_stack+state->data_stack_base, data, size); \ |
420 | 686M | state->data_stack_base += size; \ |
421 | 686M | } while (0) |
422 | | |
423 | | /* We add an explicit cast to memcpy here because MSVC has a bug when |
424 | | compiling C code where it believes that `const void**` cannot be |
425 | | safely casted to `void*`, see bpo-39943 for details. */ |
426 | 470M | #define DATA_STACK_POP(state, data, size, discard) \ |
427 | 470M | do { \ |
428 | 470M | TRACE(("copy data to %p from %zd (%zd)\n", \ |
429 | 470M | data, state->data_stack_base-size, size)); \ |
430 | 470M | memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ |
431 | 470M | if (discard) \ |
432 | 470M | state->data_stack_base -= size; \ |
433 | 470M | } while (0) |
434 | | |
435 | 2.09G | #define DATA_STACK_POP_DISCARD(state, size) \ |
436 | 2.09G | do { \ |
437 | 2.09G | TRACE(("discard data from %zd (%zd)\n", \ |
438 | 2.09G | state->data_stack_base-size, size)); \ |
439 | 2.09G | state->data_stack_base -= size; \ |
440 | 2.09G | } while(0) |
441 | | |
442 | | #define DATA_PUSH(x) \ |
443 | 317M | DATA_STACK_PUSH(state, (x), sizeof(*(x))) |
444 | | #define DATA_POP(x) \ |
445 | 317M | DATA_STACK_POP(state, (x), sizeof(*(x)), 1) |
446 | | #define DATA_POP_DISCARD(x) \ |
447 | 1.87G | DATA_STACK_POP_DISCARD(state, sizeof(*(x))) |
448 | | #define DATA_ALLOC(t,p) \ |
449 | 1.87G | DATA_STACK_ALLOC(state, t, p) |
450 | | #define DATA_LOOKUP_AT(t,p,pos) \ |
451 | 1.94G | DATA_STACK_LOOKUP_AT(state,t,p,pos) |
452 | | |
453 | | #define PTR_TO_INDEX(ptr) \ |
454 | | ((ptr) ? ((char*)(ptr) - (char*)state->beginning) / state->charsize : -1) |
455 | | |
456 | | #if VERBOSE |
457 | | # define MARK_TRACE(label, lastmark) \ |
458 | | do if (DO_TRACE) { \ |
459 | | TRACE(("%s %d marks:", (label), (lastmark)+1)); \ |
460 | | for (int j = 0; j <= (lastmark); j++) { \ |
461 | | if (j && (j & 1) == 0) { \ |
462 | | TRACE((" ")); \ |
463 | | } \ |
464 | | TRACE((" %zd", PTR_TO_INDEX(state->mark[j]))); \ |
465 | | } \ |
466 | | TRACE(("\n")); \ |
467 | | } while (0) |
468 | | #else |
469 | | # define MARK_TRACE(label, lastmark) |
470 | | #endif |
471 | | #define MARK_PUSH(lastmark) \ |
472 | 631M | do if (lastmark >= 0) { \ |
473 | 369M | MARK_TRACE("push", (lastmark)); \ |
474 | 369M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
475 | 369M | DATA_STACK_PUSH(state, state->mark, _marks_size); \ |
476 | 631M | } while (0) |
477 | | #define MARK_POP(lastmark) \ |
478 | 211M | do if (lastmark >= 0) { \ |
479 | 149M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
480 | 149M | DATA_STACK_POP(state, state->mark, _marks_size, 1); \ |
481 | 149M | MARK_TRACE("pop", (lastmark)); \ |
482 | 211M | } while (0) |
483 | | #define MARK_POP_KEEP(lastmark) \ |
484 | 2.59M | do if (lastmark >= 0) { \ |
485 | 2.59M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
486 | 2.59M | DATA_STACK_POP(state, state->mark, _marks_size, 0); \ |
487 | 2.59M | MARK_TRACE("pop keep", (lastmark)); \ |
488 | 2.59M | } while (0) |
489 | | #define MARK_POP_DISCARD(lastmark) \ |
490 | 419M | do if (lastmark >= 0) { \ |
491 | 219M | size_t _marks_size = (lastmark+1) * sizeof(void*); \ |
492 | 219M | DATA_STACK_POP_DISCARD(state, _marks_size); \ |
493 | 219M | MARK_TRACE("pop discard", (lastmark)); \ |
494 | 419M | } while (0) |
495 | | |
496 | 649M | #define JUMP_NONE 0 |
497 | 0 | #define JUMP_MAX_UNTIL_1 1 |
498 | 317M | #define JUMP_MAX_UNTIL_2 2 |
499 | 181M | #define JUMP_MAX_UNTIL_3 3 |
500 | 0 | #define JUMP_MIN_UNTIL_1 4 |
501 | 0 | #define JUMP_MIN_UNTIL_2 5 |
502 | 0 | #define JUMP_MIN_UNTIL_3 6 |
503 | 180M | #define JUMP_REPEAT 7 |
504 | 5.53M | #define JUMP_REPEAT_ONE_1 8 |
505 | 217M | #define JUMP_REPEAT_ONE_2 9 |
506 | 0 | #define JUMP_MIN_REPEAT_ONE 10 |
507 | 159M | #define JUMP_BRANCH 11 |
508 | 122M | #define JUMP_ASSERT 12 |
509 | 39.2M | #define JUMP_ASSERT_NOT 13 |
510 | 0 | #define JUMP_POSS_REPEAT_1 14 |
511 | 0 | #define JUMP_POSS_REPEAT_2 15 |
512 | 0 | #define JUMP_ATOMIC_GROUP 16 |
513 | | |
514 | | #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ |
515 | 1.22G | ctx->pattern = pattern; \ |
516 | 1.22G | ctx->ptr = ptr; \ |
517 | 1.22G | DATA_ALLOC(SRE(match_context), nextctx); \ |
518 | 1.22G | nextctx->pattern = nextpattern; \ |
519 | 1.22G | nextctx->toplevel = toplevel_; \ |
520 | 1.22G | nextctx->jump = jumpvalue; \ |
521 | 1.22G | nextctx->last_ctx_pos = ctx_pos; \ |
522 | 1.22G | pattern = nextpattern; \ |
523 | 1.22G | ctx_pos = alloc_pos; \ |
524 | 1.22G | ctx = nextctx; \ |
525 | 1.22G | goto entrance; \ |
526 | 1.22G | jumplabel: \ |
527 | 1.22G | pattern = ctx->pattern; \ |
528 | 1.22G | ptr = ctx->ptr; |
529 | | |
530 | | #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ |
531 | 1.06G | DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) |
532 | | |
533 | | #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ |
534 | 161M | DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) |
535 | | |
536 | | typedef struct { |
537 | | Py_ssize_t count; |
538 | | union { |
539 | | SRE_CODE chr; |
540 | | SRE_REPEAT* rep; |
541 | | } u; |
542 | | int lastmark; |
543 | | int lastindex; |
544 | | const SRE_CODE* pattern; |
545 | | const SRE_CHAR* ptr; |
546 | | int toplevel; |
547 | | int jump; |
548 | | Py_ssize_t last_ctx_pos; |
549 | | } SRE(match_context); |
550 | | |
551 | | #define _MAYBE_CHECK_SIGNALS \ |
552 | 3.27G | do { \ |
553 | 3.27G | if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \ |
554 | 0 | RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ |
555 | 0 | } \ |
556 | 3.27G | } while (0) |
557 | | |
558 | | #ifdef Py_DEBUG |
559 | | # define MAYBE_CHECK_SIGNALS \ |
560 | | do { \ |
561 | | _MAYBE_CHECK_SIGNALS; \ |
562 | | if (state->fail_after_count >= 0) { \ |
563 | | if (state->fail_after_count-- == 0) { \ |
564 | | PyErr_SetNone(state->fail_after_exc); \ |
565 | | RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ |
566 | | } \ |
567 | | } \ |
568 | | } while (0) |
569 | | #else |
570 | 3.27G | # define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS |
571 | | #endif /* Py_DEBUG */ |
572 | | |
573 | | #ifdef HAVE_COMPUTED_GOTOS |
574 | | #ifndef USE_COMPUTED_GOTOS |
575 | | #define USE_COMPUTED_GOTOS 1 |
576 | | #endif |
577 | | #elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS |
578 | | #error "Computed gotos are not supported on this compiler." |
579 | | #else |
580 | | #undef USE_COMPUTED_GOTOS |
581 | | #define USE_COMPUTED_GOTOS 0 |
582 | | #endif |
583 | | |
584 | | #if USE_COMPUTED_GOTOS |
585 | 3.37G | #define TARGET(OP) TARGET_ ## OP |
586 | | #define DISPATCH \ |
587 | 3.27G | do { \ |
588 | 3.27G | MAYBE_CHECK_SIGNALS; \ |
589 | 3.27G | goto *sre_targets[*pattern++]; \ |
590 | 3.27G | } while (0) |
591 | | #else |
592 | | #define TARGET(OP) case OP |
593 | | #define DISPATCH goto dispatch |
594 | | #endif |
595 | | |
596 | | /* check if string matches the given pattern. returns <0 for |
597 | | error, 0 for failure, and 1 for success */ |
598 | | LOCAL(Py_ssize_t) |
599 | | SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) |
600 | 649M | { |
601 | 649M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; |
602 | 649M | Py_ssize_t alloc_pos, ctx_pos = -1; |
603 | 649M | Py_ssize_t ret = 0; |
604 | 649M | int jump; |
605 | 649M | unsigned int sigcount = state->sigcount; |
606 | | |
607 | 649M | SRE(match_context)* ctx; |
608 | 649M | SRE(match_context)* nextctx; |
609 | 649M | INIT_TRACE(state); |
610 | | |
611 | 649M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); |
612 | | |
613 | 649M | DATA_ALLOC(SRE(match_context), ctx); |
614 | 649M | ctx->last_ctx_pos = -1; |
615 | 649M | ctx->jump = JUMP_NONE; |
616 | 649M | ctx->toplevel = toplevel; |
617 | 649M | ctx_pos = alloc_pos; |
618 | | |
619 | 649M | #if USE_COMPUTED_GOTOS |
620 | 649M | #include "sre_targets.h" |
621 | 649M | #endif |
622 | | |
623 | 1.87G | entrance: |
624 | | |
625 | 1.87G | ; // Fashion statement. |
626 | 1.87G | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; |
627 | | |
628 | 1.87G | if (pattern[0] == SRE_OP_INFO) { |
629 | | /* optimization info block */ |
630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ |
631 | 96.0M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { |
632 | 5.84M | TRACE(("reject (got %tu chars, need %zu)\n", |
633 | 5.84M | end - ptr, (size_t) pattern[3])); |
634 | 5.84M | RETURN_FAILURE; |
635 | 5.84M | } |
636 | 90.1M | pattern += pattern[1] + 1; |
637 | 90.1M | } |
638 | | |
639 | 1.86G | #if USE_COMPUTED_GOTOS |
640 | 1.86G | DISPATCH; |
641 | | #else |
642 | | dispatch: |
643 | | MAYBE_CHECK_SIGNALS; |
644 | | switch (*pattern++) |
645 | | #endif |
646 | 1.86G | { |
647 | | |
648 | 1.86G | TARGET(SRE_OP_MARK): |
649 | | /* set mark */ |
650 | | /* <MARK> <gid> */ |
651 | 693M | TRACE(("|%p|%p|MARK %d\n", pattern, |
652 | 693M | ptr, pattern[0])); |
653 | 693M | { |
654 | 693M | int i = pattern[0]; |
655 | 693M | if (i & 1) |
656 | 85.0M | state->lastindex = i/2 + 1; |
657 | 693M | if (i > state->lastmark) { |
658 | | /* state->lastmark is the highest valid index in the |
659 | | state->mark array. If it is increased by more than 1, |
660 | | the intervening marks must be set to NULL to signal |
661 | | that these marks have not been encountered. */ |
662 | 687M | int j = state->lastmark + 1; |
663 | 692M | while (j < i) |
664 | 5.03M | state->mark[j++] = NULL; |
665 | 687M | state->lastmark = i; |
666 | 687M | } |
667 | 693M | state->mark[i] = ptr; |
668 | 693M | } |
669 | 693M | pattern++; |
670 | 693M | DISPATCH; |
671 | | |
672 | 693M | TARGET(SRE_OP_LITERAL): |
673 | | /* match literal string */ |
674 | | /* <LITERAL> <code> */ |
675 | 259M | TRACE(("|%p|%p|LITERAL %d\n", pattern, |
676 | 259M | ptr, *pattern)); |
677 | 259M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) |
678 | 167M | RETURN_FAILURE; |
679 | 91.9M | pattern++; |
680 | 91.9M | ptr++; |
681 | 91.9M | DISPATCH; |
682 | | |
683 | 91.9M | TARGET(SRE_OP_NOT_LITERAL): |
684 | | /* match anything that is not literal character */ |
685 | | /* <NOT_LITERAL> <code> */ |
686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, |
687 | 0 | ptr, *pattern)); |
688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) |
689 | 0 | RETURN_FAILURE; |
690 | 0 | pattern++; |
691 | 0 | ptr++; |
692 | 0 | DISPATCH; |
693 | | |
694 | 279M | TARGET(SRE_OP_SUCCESS): |
695 | | /* end of pattern */ |
696 | 279M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); |
697 | 279M | if (ctx->toplevel && |
698 | 279M | ((state->match_all && ptr != state->end) || |
699 | 80.9M | (state->must_advance && ptr == state->start))) |
700 | 0 | { |
701 | 0 | RETURN_FAILURE; |
702 | 0 | } |
703 | 279M | state->ptr = ptr; |
704 | 279M | RETURN_SUCCESS; |
705 | | |
706 | 13.2M | TARGET(SRE_OP_AT): |
707 | | /* match at given position */ |
708 | | /* <AT> <code> */ |
709 | 13.2M | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); |
710 | 13.2M | if (!SRE(at)(state, ptr, *pattern)) |
711 | 4.10M | RETURN_FAILURE; |
712 | 9.18M | pattern++; |
713 | 9.18M | DISPATCH; |
714 | | |
715 | 9.18M | TARGET(SRE_OP_CATEGORY): |
716 | | /* match at given category */ |
717 | | /* <CATEGORY> <code> */ |
718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, |
719 | 0 | ptr, *pattern)); |
720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) |
721 | 0 | RETURN_FAILURE; |
722 | 0 | pattern++; |
723 | 0 | ptr++; |
724 | 0 | DISPATCH; |
725 | | |
726 | 0 | TARGET(SRE_OP_ANY): |
727 | | /* match anything (except a newline) */ |
728 | | /* <ANY> */ |
729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); |
730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) |
731 | 0 | RETURN_FAILURE; |
732 | 0 | ptr++; |
733 | 0 | DISPATCH; |
734 | | |
735 | 0 | TARGET(SRE_OP_ANY_ALL): |
736 | | /* match anything */ |
737 | | /* <ANY_ALL> */ |
738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); |
739 | 0 | if (ptr >= end) |
740 | 0 | RETURN_FAILURE; |
741 | 0 | ptr++; |
742 | 0 | DISPATCH; |
743 | | |
744 | 357M | TARGET(SRE_OP_IN): |
745 | | /* match set member (or non_member) */ |
746 | | /* <IN> <skip> <set> */ |
747 | 357M | TRACE(("|%p|%p|IN\n", pattern, ptr)); |
748 | 357M | if (ptr >= end || |
749 | 357M | !SRE(charset)(state, pattern + 1, *ptr)) |
750 | 6.31M | RETURN_FAILURE; |
751 | 351M | pattern += pattern[0]; |
752 | 351M | ptr++; |
753 | 351M | DISPATCH; |
754 | | |
755 | 351M | TARGET(SRE_OP_LITERAL_IGNORE): |
756 | 4.75M | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", |
757 | 4.75M | pattern, ptr, pattern[0])); |
758 | 4.75M | if (ptr >= end || |
759 | 4.75M | sre_lower_ascii(*ptr) != *pattern) |
760 | 389k | RETURN_FAILURE; |
761 | 4.36M | pattern++; |
762 | 4.36M | ptr++; |
763 | 4.36M | DISPATCH; |
764 | | |
765 | 4.36M | TARGET(SRE_OP_LITERAL_UNI_IGNORE): |
766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", |
767 | 0 | pattern, ptr, pattern[0])); |
768 | 0 | if (ptr >= end || |
769 | 0 | sre_lower_unicode(*ptr) != *pattern) |
770 | 0 | RETURN_FAILURE; |
771 | 0 | pattern++; |
772 | 0 | ptr++; |
773 | 0 | DISPATCH; |
774 | | |
775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): |
776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", |
777 | 0 | pattern, ptr, pattern[0])); |
778 | 0 | if (ptr >= end |
779 | 0 | || !char_loc_ignore(*pattern, *ptr)) |
780 | 0 | RETURN_FAILURE; |
781 | 0 | pattern++; |
782 | 0 | ptr++; |
783 | 0 | DISPATCH; |
784 | | |
785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): |
786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", |
787 | 0 | pattern, ptr, *pattern)); |
788 | 0 | if (ptr >= end || |
789 | 0 | sre_lower_ascii(*ptr) == *pattern) |
790 | 0 | RETURN_FAILURE; |
791 | 0 | pattern++; |
792 | 0 | ptr++; |
793 | 0 | DISPATCH; |
794 | | |
795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): |
796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", |
797 | 0 | pattern, ptr, *pattern)); |
798 | 0 | if (ptr >= end || |
799 | 0 | sre_lower_unicode(*ptr) == *pattern) |
800 | 0 | RETURN_FAILURE; |
801 | 0 | pattern++; |
802 | 0 | ptr++; |
803 | 0 | DISPATCH; |
804 | | |
805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): |
806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", |
807 | 0 | pattern, ptr, *pattern)); |
808 | 0 | if (ptr >= end |
809 | 0 | || char_loc_ignore(*pattern, *ptr)) |
810 | 0 | RETURN_FAILURE; |
811 | 0 | pattern++; |
812 | 0 | ptr++; |
813 | 0 | DISPATCH; |
814 | | |
815 | 0 | TARGET(SRE_OP_IN_IGNORE): |
816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); |
817 | 0 | if (ptr >= end |
818 | 0 | || !SRE(charset)(state, pattern+1, |
819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) |
820 | 0 | RETURN_FAILURE; |
821 | 0 | pattern += pattern[0]; |
822 | 0 | ptr++; |
823 | 0 | DISPATCH; |
824 | | |
825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): |
826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); |
827 | 0 | if (ptr >= end |
828 | 0 | || !SRE(charset)(state, pattern+1, |
829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) |
830 | 0 | RETURN_FAILURE; |
831 | 0 | pattern += pattern[0]; |
832 | 0 | ptr++; |
833 | 0 | DISPATCH; |
834 | | |
835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): |
836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); |
837 | 0 | if (ptr >= end |
838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) |
839 | 0 | RETURN_FAILURE; |
840 | 0 | pattern += pattern[0]; |
841 | 0 | ptr++; |
842 | 0 | DISPATCH; |
843 | | |
844 | 99.7M | TARGET(SRE_OP_JUMP): |
845 | 99.7M | TARGET(SRE_OP_INFO): |
846 | | /* jump forward */ |
847 | | /* <JUMP> <offset> */ |
848 | 99.7M | TRACE(("|%p|%p|JUMP %d\n", pattern, |
849 | 99.7M | ptr, pattern[0])); |
850 | 99.7M | pattern += pattern[0]; |
851 | 99.7M | DISPATCH; |
852 | | |
853 | 177M | TARGET(SRE_OP_BRANCH): |
854 | | /* alternation */ |
855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ |
856 | 177M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); |
857 | 177M | LASTMARK_SAVE(); |
858 | 177M | if (state->repeat) |
859 | 136M | MARK_PUSH(ctx->lastmark); |
860 | 428M | for (; pattern[0]; pattern += pattern[0]) { |
861 | 347M | if (pattern[1] == SRE_OP_LITERAL && |
862 | 347M | (ptr >= end || |
863 | 164M | (SRE_CODE) *ptr != pattern[2])) |
864 | 91.4M | continue; |
865 | 256M | if (pattern[1] == SRE_OP_IN && |
866 | 256M | (ptr >= end || |
867 | 132M | !SRE(charset)(state, pattern + 3, |
868 | 132M | (SRE_CODE) *ptr))) |
869 | 97.0M | continue; |
870 | 159M | state->ptr = ptr; |
871 | 159M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); |
872 | 159M | if (ret) { |
873 | 97.0M | if (state->repeat) |
874 | 79.1M | MARK_POP_DISCARD(ctx->lastmark); |
875 | 97.0M | RETURN_ON_ERROR(ret); |
876 | 97.0M | RETURN_SUCCESS; |
877 | 97.0M | } |
878 | 62.3M | if (state->repeat) |
879 | 16.6k | MARK_POP_KEEP(ctx->lastmark); |
880 | 62.3M | LASTMARK_RESTORE(); |
881 | 62.3M | } |
882 | 80.6M | if (state->repeat) |
883 | 57.0M | MARK_POP_DISCARD(ctx->lastmark); |
884 | 80.6M | RETURN_FAILURE; |
885 | | |
886 | 721M | TARGET(SRE_OP_REPEAT_ONE): |
887 | | /* match repeated sequence (maximizing regexp) */ |
888 | | |
889 | | /* this operator only works if the repeated item is |
890 | | exactly one character wide, and we're not already |
891 | | collecting backtracking points. for other cases, |
892 | | use the MAX_REPEAT operator */ |
893 | | |
894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
895 | | |
896 | 721M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, |
897 | 721M | pattern[1], pattern[2])); |
898 | | |
899 | 721M | if ((Py_ssize_t) pattern[1] > end - ptr) |
900 | 1.16M | RETURN_FAILURE; /* cannot match */ |
901 | | |
902 | 720M | state->ptr = ptr; |
903 | | |
904 | 720M | ret = SRE(count)(state, pattern+3, pattern[2]); |
905 | 720M | RETURN_ON_ERROR(ret); |
906 | 720M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
907 | 720M | ctx->count = ret; |
908 | 720M | ptr += ctx->count; |
909 | | |
910 | | /* when we arrive here, count contains the number of |
911 | | matches, and ptr points to the tail of the target |
912 | | string. check if the rest of the pattern matches, |
913 | | and backtrack if not. */ |
914 | | |
915 | 720M | if (ctx->count < (Py_ssize_t) pattern[1]) |
916 | 498M | RETURN_FAILURE; |
917 | | |
918 | 221M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && |
919 | 221M | ptr == state->end && |
920 | 221M | !(ctx->toplevel && state->must_advance && ptr == state->start)) |
921 | 71.2k | { |
922 | | /* tail is empty. we're finished */ |
923 | 71.2k | state->ptr = ptr; |
924 | 71.2k | RETURN_SUCCESS; |
925 | 71.2k | } |
926 | | |
927 | 221M | LASTMARK_SAVE(); |
928 | 221M | if (state->repeat) |
929 | 138M | MARK_PUSH(ctx->lastmark); |
930 | | |
931 | 221M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { |
932 | | /* tail starts with a literal. skip positions where |
933 | | the rest of the pattern cannot possibly match */ |
934 | 6.93M | ctx->u.chr = pattern[pattern[0]+1]; |
935 | 6.93M | for (;;) { |
936 | 37.2M | while (ctx->count >= (Py_ssize_t) pattern[1] && |
937 | 37.2M | (ptr >= end || *ptr != ctx->u.chr)) { |
938 | 30.2M | ptr--; |
939 | 30.2M | ctx->count--; |
940 | 30.2M | } |
941 | 6.93M | if (ctx->count < (Py_ssize_t) pattern[1]) |
942 | 1.40M | break; |
943 | 5.53M | state->ptr = ptr; |
944 | 5.53M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, |
945 | 5.53M | pattern+pattern[0]); |
946 | 5.53M | if (ret) { |
947 | 5.53M | if (state->repeat) |
948 | 4.35M | MARK_POP_DISCARD(ctx->lastmark); |
949 | 5.53M | RETURN_ON_ERROR(ret); |
950 | 5.53M | RETURN_SUCCESS; |
951 | 5.53M | } |
952 | 1.45k | if (state->repeat) |
953 | 0 | MARK_POP_KEEP(ctx->lastmark); |
954 | 1.45k | LASTMARK_RESTORE(); |
955 | | |
956 | 1.45k | ptr--; |
957 | 1.45k | ctx->count--; |
958 | 1.45k | } |
959 | 1.40M | if (state->repeat) |
960 | 756 | MARK_POP_DISCARD(ctx->lastmark); |
961 | 214M | } else { |
962 | | /* general case */ |
963 | 219M | while (ctx->count >= (Py_ssize_t) pattern[1]) { |
964 | 217M | state->ptr = ptr; |
965 | 217M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, |
966 | 217M | pattern+pattern[0]); |
967 | 217M | if (ret) { |
968 | 213M | if (state->repeat) |
969 | 132M | MARK_POP_DISCARD(ctx->lastmark); |
970 | 213M | RETURN_ON_ERROR(ret); |
971 | 213M | RETURN_SUCCESS; |
972 | 213M | } |
973 | 4.31M | if (state->repeat) |
974 | 2.57M | MARK_POP_KEEP(ctx->lastmark); |
975 | 4.31M | LASTMARK_RESTORE(); |
976 | | |
977 | 4.31M | ptr--; |
978 | 4.31M | ctx->count--; |
979 | 4.31M | } |
980 | 1.50M | if (state->repeat) |
981 | 1.33M | MARK_POP_DISCARD(ctx->lastmark); |
982 | 1.50M | } |
983 | 2.90M | RETURN_FAILURE; |
984 | | |
985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): |
986 | | /* match repeated sequence (minimizing regexp) */ |
987 | | |
988 | | /* this operator only works if the repeated item is |
989 | | exactly one character wide, and we're not already |
990 | | collecting backtracking points. for other cases, |
991 | | use the MIN_REPEAT operator */ |
992 | | |
993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ |
994 | |
|
995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, |
996 | 0 | pattern[1], pattern[2])); |
997 | |
|
998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) |
999 | 0 | RETURN_FAILURE; /* cannot match */ |
1000 | | |
1001 | 0 | state->ptr = ptr; |
1002 | |
|
1003 | 0 | if (pattern[1] == 0) |
1004 | 0 | ctx->count = 0; |
1005 | 0 | else { |
1006 | | /* count using pattern min as the maximum */ |
1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); |
1008 | 0 | RETURN_ON_ERROR(ret); |
1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) |
1011 | | /* didn't match minimum number of times */ |
1012 | 0 | RETURN_FAILURE; |
1013 | | /* advance past minimum matches of repeat */ |
1014 | 0 | ctx->count = ret; |
1015 | 0 | ptr += ctx->count; |
1016 | 0 | } |
1017 | | |
1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && |
1019 | 0 | !(ctx->toplevel && |
1020 | 0 | ((state->match_all && ptr != state->end) || |
1021 | 0 | (state->must_advance && ptr == state->start)))) |
1022 | 0 | { |
1023 | | /* tail is empty. we're finished */ |
1024 | 0 | state->ptr = ptr; |
1025 | 0 | RETURN_SUCCESS; |
1026 | |
|
1027 | 0 | } else { |
1028 | | /* general case */ |
1029 | 0 | LASTMARK_SAVE(); |
1030 | 0 | if (state->repeat) |
1031 | 0 | MARK_PUSH(ctx->lastmark); |
1032 | | |
1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT |
1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { |
1035 | 0 | state->ptr = ptr; |
1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, |
1037 | 0 | pattern+pattern[0]); |
1038 | 0 | if (ret) { |
1039 | 0 | if (state->repeat) |
1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1041 | 0 | RETURN_ON_ERROR(ret); |
1042 | 0 | RETURN_SUCCESS; |
1043 | 0 | } |
1044 | 0 | if (state->repeat) |
1045 | 0 | MARK_POP_KEEP(ctx->lastmark); |
1046 | 0 | LASTMARK_RESTORE(); |
1047 | |
|
1048 | 0 | state->ptr = ptr; |
1049 | 0 | ret = SRE(count)(state, pattern+3, 1); |
1050 | 0 | RETURN_ON_ERROR(ret); |
1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1052 | 0 | if (ret == 0) |
1053 | 0 | break; |
1054 | 0 | assert(ret == 1); |
1055 | 0 | ptr++; |
1056 | 0 | ctx->count++; |
1057 | 0 | } |
1058 | 0 | if (state->repeat) |
1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1060 | 0 | } |
1061 | 0 | RETURN_FAILURE; |
1062 | | |
1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): |
1064 | | /* match repeated sequence (maximizing regexp) without |
1065 | | backtracking */ |
1066 | | |
1067 | | /* this operator only works if the repeated item is |
1068 | | exactly one character wide, and we're not already |
1069 | | collecting backtracking points. for other cases, |
1070 | | use the MAX_REPEAT operator */ |
1071 | | |
1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> |
1073 | | tail */ |
1074 | |
|
1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, |
1076 | 0 | ptr, pattern[1], pattern[2])); |
1077 | |
|
1078 | 0 | if (ptr + pattern[1] > end) { |
1079 | 0 | RETURN_FAILURE; /* cannot match */ |
1080 | 0 | } |
1081 | | |
1082 | 0 | state->ptr = ptr; |
1083 | |
|
1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); |
1085 | 0 | RETURN_ON_ERROR(ret); |
1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1087 | 0 | ctx->count = ret; |
1088 | 0 | ptr += ctx->count; |
1089 | | |
1090 | | /* when we arrive here, count contains the number of |
1091 | | matches, and ptr points to the tail of the target |
1092 | | string. check if the rest of the pattern matches, |
1093 | | and fail if not. */ |
1094 | | |
1095 | | /* Test for not enough repetitions in match */ |
1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { |
1097 | 0 | RETURN_FAILURE; |
1098 | 0 | } |
1099 | | |
1100 | | /* Update the pattern to point to the next op code */ |
1101 | 0 | pattern += pattern[0]; |
1102 | | |
1103 | | /* Let the tail be evaluated separately and consider this |
1104 | | match successful. */ |
1105 | 0 | if (*pattern == SRE_OP_SUCCESS && |
1106 | 0 | ptr == state->end && |
1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) |
1108 | 0 | { |
1109 | | /* tail is empty. we're finished */ |
1110 | 0 | state->ptr = ptr; |
1111 | 0 | RETURN_SUCCESS; |
1112 | 0 | } |
1113 | | |
1114 | | /* Attempt to match the rest of the string */ |
1115 | 0 | DISPATCH; |
1116 | | |
1117 | 180M | TARGET(SRE_OP_REPEAT): |
1118 | | /* create repeat context. all the hard work is done |
1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ |
1120 | | /* <REPEAT> <skip> <1=min> <2=max> |
1121 | | <3=repeat_index> item <UNTIL> tail */ |
1122 | 180M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, |
1123 | 180M | pattern[1], pattern[2])); |
1124 | | |
1125 | | /* install new repeat context */ |
1126 | 180M | ctx->u.rep = repeat_pool_malloc(state); |
1127 | 180M | if (!ctx->u.rep) { |
1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); |
1129 | 0 | } |
1130 | 180M | ctx->u.rep->count = -1; |
1131 | 180M | ctx->u.rep->pattern = pattern; |
1132 | 180M | ctx->u.rep->prev = state->repeat; |
1133 | 180M | ctx->u.rep->last_ptr = NULL; |
1134 | 180M | state->repeat = ctx->u.rep; |
1135 | | |
1136 | 180M | state->ptr = ptr; |
1137 | 180M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); |
1138 | 180M | state->repeat = ctx->u.rep->prev; |
1139 | 180M | repeat_pool_free(state, ctx->u.rep); |
1140 | | |
1141 | 180M | if (ret) { |
1142 | 180M | RETURN_ON_ERROR(ret); |
1143 | 180M | RETURN_SUCCESS; |
1144 | 180M | } |
1145 | 90.1k | RETURN_FAILURE; |
1146 | | |
1147 | 327M | TARGET(SRE_OP_MAX_UNTIL): |
1148 | | /* maximizing repeat */ |
1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ |
1150 | | |
1151 | | /* FIXME: we probably need to deal with zero-width |
1152 | | matches in here... */ |
1153 | | |
1154 | 327M | ctx->u.rep = state->repeat; |
1155 | 327M | if (!ctx->u.rep) |
1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1157 | | |
1158 | 327M | state->ptr = ptr; |
1159 | | |
1160 | 327M | ctx->count = ctx->u.rep->count+1; |
1161 | | |
1162 | 327M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, |
1163 | 327M | ptr, ctx->count)); |
1164 | | |
1165 | 327M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1166 | | /* not enough matches */ |
1167 | 0 | ctx->u.rep->count = ctx->count; |
1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, |
1169 | 0 | ctx->u.rep->pattern+3); |
1170 | 0 | if (ret) { |
1171 | 0 | RETURN_ON_ERROR(ret); |
1172 | 0 | RETURN_SUCCESS; |
1173 | 0 | } |
1174 | 0 | ctx->u.rep->count = ctx->count-1; |
1175 | 0 | state->ptr = ptr; |
1176 | 0 | RETURN_FAILURE; |
1177 | 0 | } |
1178 | | |
1179 | 327M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || |
1180 | 327M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && |
1181 | 327M | state->ptr != ctx->u.rep->last_ptr) { |
1182 | | /* we may have enough matches, but if we can |
1183 | | match another item, do so */ |
1184 | 317M | ctx->u.rep->count = ctx->count; |
1185 | 317M | LASTMARK_SAVE(); |
1186 | 317M | MARK_PUSH(ctx->lastmark); |
1187 | | /* zero-width match protection */ |
1188 | 317M | LAST_PTR_PUSH(); |
1189 | 317M | ctx->u.rep->last_ptr = state->ptr; |
1190 | 317M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, |
1191 | 317M | ctx->u.rep->pattern+3); |
1192 | 317M | LAST_PTR_POP(); |
1193 | 317M | if (ret) { |
1194 | 145M | MARK_POP_DISCARD(ctx->lastmark); |
1195 | 145M | RETURN_ON_ERROR(ret); |
1196 | 145M | RETURN_SUCCESS; |
1197 | 145M | } |
1198 | 172M | MARK_POP(ctx->lastmark); |
1199 | 172M | LASTMARK_RESTORE(); |
1200 | 172M | ctx->u.rep->count = ctx->count-1; |
1201 | 172M | state->ptr = ptr; |
1202 | 172M | } |
1203 | | |
1204 | | /* cannot match more repeated items here. make sure the |
1205 | | tail matches */ |
1206 | 181M | state->repeat = ctx->u.rep->prev; |
1207 | 181M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); |
1208 | 181M | state->repeat = ctx->u.rep; // restore repeat before return |
1209 | | |
1210 | 181M | RETURN_ON_SUCCESS(ret); |
1211 | 1.38M | state->ptr = ptr; |
1212 | 1.38M | RETURN_FAILURE; |
1213 | | |
1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): |
1215 | | /* minimizing repeat */ |
1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ |
1217 | |
|
1218 | 0 | ctx->u.rep = state->repeat; |
1219 | 0 | if (!ctx->u.rep) |
1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); |
1221 | | |
1222 | 0 | state->ptr = ptr; |
1223 | |
|
1224 | 0 | ctx->count = ctx->u.rep->count+1; |
1225 | |
|
1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, |
1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); |
1228 | |
|
1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { |
1230 | | /* not enough matches */ |
1231 | 0 | ctx->u.rep->count = ctx->count; |
1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, |
1233 | 0 | ctx->u.rep->pattern+3); |
1234 | 0 | if (ret) { |
1235 | 0 | RETURN_ON_ERROR(ret); |
1236 | 0 | RETURN_SUCCESS; |
1237 | 0 | } |
1238 | 0 | ctx->u.rep->count = ctx->count-1; |
1239 | 0 | state->ptr = ptr; |
1240 | 0 | RETURN_FAILURE; |
1241 | 0 | } |
1242 | | |
1243 | | /* see if the tail matches */ |
1244 | 0 | state->repeat = ctx->u.rep->prev; |
1245 | |
|
1246 | 0 | LASTMARK_SAVE(); |
1247 | 0 | if (state->repeat) |
1248 | 0 | MARK_PUSH(ctx->lastmark); |
1249 | | |
1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); |
1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; |
1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return |
1253 | |
|
1254 | 0 | if (ret) { |
1255 | 0 | if (repeat_of_tail) |
1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1257 | 0 | RETURN_ON_ERROR(ret); |
1258 | 0 | RETURN_SUCCESS; |
1259 | 0 | } |
1260 | 0 | if (repeat_of_tail) |
1261 | 0 | MARK_POP(ctx->lastmark); |
1262 | 0 | LASTMARK_RESTORE(); |
1263 | |
|
1264 | 0 | state->ptr = ptr; |
1265 | |
|
1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] |
1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || |
1268 | 0 | state->ptr == ctx->u.rep->last_ptr) |
1269 | 0 | RETURN_FAILURE; |
1270 | | |
1271 | 0 | ctx->u.rep->count = ctx->count; |
1272 | | /* zero-width match protection */ |
1273 | 0 | LAST_PTR_PUSH(); |
1274 | 0 | ctx->u.rep->last_ptr = state->ptr; |
1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, |
1276 | 0 | ctx->u.rep->pattern+3); |
1277 | 0 | LAST_PTR_POP(); |
1278 | 0 | if (ret) { |
1279 | 0 | RETURN_ON_ERROR(ret); |
1280 | 0 | RETURN_SUCCESS; |
1281 | 0 | } |
1282 | 0 | ctx->u.rep->count = ctx->count-1; |
1283 | 0 | state->ptr = ptr; |
1284 | 0 | RETURN_FAILURE; |
1285 | | |
1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): |
1287 | | /* create possessive repeat contexts. */ |
1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern |
1289 | | <SUCCESS> tail */ |
1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, |
1291 | 0 | ptr, pattern[1], pattern[2])); |
1292 | | |
1293 | | /* Set the global Input pointer to this context's Input |
1294 | | pointer */ |
1295 | 0 | state->ptr = ptr; |
1296 | | |
1297 | | /* Set state->repeat to non-NULL */ |
1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); |
1299 | 0 | if (!ctx->u.rep) { |
1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); |
1301 | 0 | } |
1302 | 0 | ctx->u.rep->count = -1; |
1303 | 0 | ctx->u.rep->pattern = NULL; |
1304 | 0 | ctx->u.rep->prev = state->repeat; |
1305 | 0 | ctx->u.rep->last_ptr = NULL; |
1306 | 0 | state->repeat = ctx->u.rep; |
1307 | | |
1308 | | /* Initialize Count to 0 */ |
1309 | 0 | ctx->count = 0; |
1310 | | |
1311 | | /* Check for minimum required matches. */ |
1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { |
1313 | | /* not enough matches */ |
1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, |
1315 | 0 | &pattern[3]); |
1316 | 0 | if (ret) { |
1317 | 0 | RETURN_ON_ERROR(ret); |
1318 | 0 | ctx->count++; |
1319 | 0 | } |
1320 | 0 | else { |
1321 | 0 | state->ptr = ptr; |
1322 | | /* Restore state->repeat */ |
1323 | 0 | state->repeat = ctx->u.rep->prev; |
1324 | 0 | repeat_pool_free(state, ctx->u.rep); |
1325 | 0 | RETURN_FAILURE; |
1326 | 0 | } |
1327 | 0 | } |
1328 | | |
1329 | | /* Clear the context's Input stream pointer so that it |
1330 | | doesn't match the global state so that the while loop can |
1331 | | be entered. */ |
1332 | 0 | ptr = NULL; |
1333 | | |
1334 | | /* Keep trying to parse the <pattern> sub-pattern until the |
1335 | | end is reached, creating a new context each time. */ |
1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || |
1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && |
1338 | 0 | state->ptr != ptr) { |
1339 | | /* Save the Capture Group Marker state into the current |
1340 | | Context and back up the current highest number |
1341 | | Capture Group marker. */ |
1342 | 0 | LASTMARK_SAVE(); |
1343 | 0 | MARK_PUSH(ctx->lastmark); |
1344 | | |
1345 | | /* zero-width match protection */ |
1346 | | /* Set the context's Input Stream pointer to be the |
1347 | | current Input Stream pointer from the global |
1348 | | state. When the loop reaches the next iteration, |
1349 | | the context will then store the last known good |
1350 | | position with the global state holding the Input |
1351 | | Input Stream position that has been updated with |
1352 | | the most recent match. Thus, if state's Input |
1353 | | stream remains the same as the one stored in the |
1354 | | current Context, we know we have successfully |
1355 | | matched an empty string and that all subsequent |
1356 | | matches will also be the empty string until the |
1357 | | maximum number of matches are counted, and because |
1358 | | of this, we could immediately stop at that point and |
1359 | | consider this match successful. */ |
1360 | 0 | ptr = state->ptr; |
1361 | | |
1362 | | /* We have not reached the maximin matches, so try to |
1363 | | match once more. */ |
1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, |
1365 | 0 | &pattern[3]); |
1366 | | |
1367 | | /* Check to see if the last attempted match |
1368 | | succeeded. */ |
1369 | 0 | if (ret) { |
1370 | | /* Drop the saved highest number Capture Group |
1371 | | marker saved above and use the newly updated |
1372 | | value. */ |
1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); |
1374 | 0 | RETURN_ON_ERROR(ret); |
1375 | | |
1376 | | /* Success, increment the count. */ |
1377 | 0 | ctx->count++; |
1378 | 0 | } |
1379 | | /* Last attempted match failed. */ |
1380 | 0 | else { |
1381 | | /* Restore the previously saved highest number |
1382 | | Capture Group marker since the last iteration |
1383 | | did not match, then restore that to the global |
1384 | | state. */ |
1385 | 0 | MARK_POP(ctx->lastmark); |
1386 | 0 | LASTMARK_RESTORE(); |
1387 | | |
1388 | | /* Restore the global Input Stream pointer |
1389 | | since it can change after jumps. */ |
1390 | 0 | state->ptr = ptr; |
1391 | | |
1392 | | /* We have sufficient matches, so exit loop. */ |
1393 | 0 | break; |
1394 | 0 | } |
1395 | 0 | } |
1396 | | |
1397 | | /* Restore state->repeat */ |
1398 | 0 | state->repeat = ctx->u.rep->prev; |
1399 | 0 | repeat_pool_free(state, ctx->u.rep); |
1400 | | |
1401 | | /* Evaluate Tail */ |
1402 | | /* Jump to end of pattern indicated by skip, and then skip |
1403 | | the SUCCESS op code that follows it. */ |
1404 | 0 | pattern += pattern[0] + 1; |
1405 | 0 | ptr = state->ptr; |
1406 | 0 | DISPATCH; |
1407 | | |
1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): |
1409 | | /* Atomic Group Sub Pattern */ |
1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ |
1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); |
1412 | | |
1413 | | /* Set the global Input pointer to this context's Input |
1414 | | pointer */ |
1415 | 0 | state->ptr = ptr; |
1416 | | |
1417 | | /* Evaluate the Atomic Group in a new context, terminating |
1418 | | when the end of the group, represented by a SUCCESS op |
1419 | | code, is reached. */ |
1420 | | /* Group Pattern begins at an offset of 1 code. */ |
1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, |
1422 | 0 | &pattern[1]); |
1423 | | |
1424 | | /* Test Exit Condition */ |
1425 | 0 | RETURN_ON_ERROR(ret); |
1426 | | |
1427 | 0 | if (ret == 0) { |
1428 | | /* Atomic Group failed to Match. */ |
1429 | 0 | state->ptr = ptr; |
1430 | 0 | RETURN_FAILURE; |
1431 | 0 | } |
1432 | | |
1433 | | /* Evaluate Tail */ |
1434 | | /* Jump to end of pattern indicated by skip, and then skip |
1435 | | the SUCCESS op code that follows it. */ |
1436 | 0 | pattern += pattern[0]; |
1437 | 0 | ptr = state->ptr; |
1438 | 0 | DISPATCH; |
1439 | | |
1440 | 0 | TARGET(SRE_OP_GROUPREF): |
1441 | | /* match backreference */ |
1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, |
1443 | 0 | ptr, pattern[0])); |
1444 | 0 | { |
1445 | 0 | int groupref = pattern[0] * 2; |
1446 | 0 | if (groupref >= state->lastmark) { |
1447 | 0 | RETURN_FAILURE; |
1448 | 0 | } else { |
1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1451 | 0 | if (!p || !e || e < p) |
1452 | 0 | RETURN_FAILURE; |
1453 | 0 | while (p < e) { |
1454 | 0 | if (ptr >= end || *ptr != *p) |
1455 | 0 | RETURN_FAILURE; |
1456 | 0 | p++; |
1457 | 0 | ptr++; |
1458 | 0 | } |
1459 | 0 | } |
1460 | 0 | } |
1461 | 0 | pattern++; |
1462 | 0 | DISPATCH; |
1463 | | |
1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): |
1465 | | /* match backreference */ |
1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, |
1467 | 0 | ptr, pattern[0])); |
1468 | 0 | { |
1469 | 0 | int groupref = pattern[0] * 2; |
1470 | 0 | if (groupref >= state->lastmark) { |
1471 | 0 | RETURN_FAILURE; |
1472 | 0 | } else { |
1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1475 | 0 | if (!p || !e || e < p) |
1476 | 0 | RETURN_FAILURE; |
1477 | 0 | while (p < e) { |
1478 | 0 | if (ptr >= end || |
1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) |
1480 | 0 | RETURN_FAILURE; |
1481 | 0 | p++; |
1482 | 0 | ptr++; |
1483 | 0 | } |
1484 | 0 | } |
1485 | 0 | } |
1486 | 0 | pattern++; |
1487 | 0 | DISPATCH; |
1488 | | |
1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): |
1490 | | /* match backreference */ |
1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, |
1492 | 0 | ptr, pattern[0])); |
1493 | 0 | { |
1494 | 0 | int groupref = pattern[0] * 2; |
1495 | 0 | if (groupref >= state->lastmark) { |
1496 | 0 | RETURN_FAILURE; |
1497 | 0 | } else { |
1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1500 | 0 | if (!p || !e || e < p) |
1501 | 0 | RETURN_FAILURE; |
1502 | 0 | while (p < e) { |
1503 | 0 | if (ptr >= end || |
1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) |
1505 | 0 | RETURN_FAILURE; |
1506 | 0 | p++; |
1507 | 0 | ptr++; |
1508 | 0 | } |
1509 | 0 | } |
1510 | 0 | } |
1511 | 0 | pattern++; |
1512 | 0 | DISPATCH; |
1513 | | |
1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): |
1515 | | /* match backreference */ |
1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, |
1517 | 0 | ptr, pattern[0])); |
1518 | 0 | { |
1519 | 0 | int groupref = pattern[0] * 2; |
1520 | 0 | if (groupref >= state->lastmark) { |
1521 | 0 | RETURN_FAILURE; |
1522 | 0 | } else { |
1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1525 | 0 | if (!p || !e || e < p) |
1526 | 0 | RETURN_FAILURE; |
1527 | 0 | while (p < e) { |
1528 | 0 | if (ptr >= end || |
1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) |
1530 | 0 | RETURN_FAILURE; |
1531 | 0 | p++; |
1532 | 0 | ptr++; |
1533 | 0 | } |
1534 | 0 | } |
1535 | 0 | } |
1536 | 0 | pattern++; |
1537 | 0 | DISPATCH; |
1538 | | |
1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): |
1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, |
1541 | 0 | ptr, pattern[0])); |
1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ |
1543 | 0 | { |
1544 | 0 | int groupref = pattern[0] * 2; |
1545 | 0 | if (groupref >= state->lastmark) { |
1546 | 0 | pattern += pattern[1]; |
1547 | 0 | DISPATCH; |
1548 | 0 | } else { |
1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; |
1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; |
1551 | 0 | if (!p || !e || e < p) { |
1552 | 0 | pattern += pattern[1]; |
1553 | 0 | DISPATCH; |
1554 | 0 | } |
1555 | 0 | } |
1556 | 0 | } |
1557 | 0 | pattern += 2; |
1558 | 0 | DISPATCH; |
1559 | | |
1560 | 122M | TARGET(SRE_OP_ASSERT): |
1561 | | /* assert subpattern */ |
1562 | | /* <ASSERT> <skip> <back> <pattern> */ |
1563 | 122M | TRACE(("|%p|%p|ASSERT %d\n", pattern, |
1564 | 122M | ptr, pattern[1])); |
1565 | 122M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) |
1566 | 0 | RETURN_FAILURE; |
1567 | 122M | state->ptr = ptr - pattern[1]; |
1568 | 122M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); |
1569 | 122M | RETURN_ON_FAILURE(ret); |
1570 | 118M | pattern += pattern[0]; |
1571 | 118M | DISPATCH; |
1572 | | |
1573 | 118M | TARGET(SRE_OP_ASSERT_NOT): |
1574 | | /* assert not subpattern */ |
1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ |
1576 | 39.2M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, |
1577 | 39.2M | ptr, pattern[1])); |
1578 | 39.2M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { |
1579 | 39.2M | state->ptr = ptr - pattern[1]; |
1580 | 39.2M | LASTMARK_SAVE(); |
1581 | 39.2M | if (state->repeat) |
1582 | 39.2M | MARK_PUSH(ctx->lastmark); |
1583 | | |
1584 | 78.5M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); |
1585 | 78.5M | if (ret) { |
1586 | 9.82k | if (state->repeat) |
1587 | 9.82k | MARK_POP_DISCARD(ctx->lastmark); |
1588 | 9.82k | RETURN_ON_ERROR(ret); |
1589 | 9.82k | RETURN_FAILURE; |
1590 | 9.82k | } |
1591 | 39.2M | if (state->repeat) |
1592 | 39.2M | MARK_POP(ctx->lastmark); |
1593 | 39.2M | LASTMARK_RESTORE(); |
1594 | 39.2M | } |
1595 | 39.2M | pattern += pattern[0]; |
1596 | 39.2M | DISPATCH; |
1597 | | |
1598 | 39.2M | TARGET(SRE_OP_FAILURE): |
1599 | | /* immediate failure */ |
1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); |
1601 | 0 | RETURN_FAILURE; |
1602 | | |
1603 | | #if !USE_COMPUTED_GOTOS |
1604 | | default: |
1605 | | #endif |
1606 | | // Also any unused opcodes: |
1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): |
1608 | 0 | TARGET(SRE_OP_SUBPATTERN): |
1609 | 0 | TARGET(SRE_OP_RANGE): |
1610 | 0 | TARGET(SRE_OP_NEGATE): |
1611 | 0 | TARGET(SRE_OP_BIGCHARSET): |
1612 | 0 | TARGET(SRE_OP_CHARSET): |
1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, |
1614 | 0 | pattern[-1])); |
1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); |
1616 | |
|
1617 | 0 | } |
1618 | | |
1619 | 1.87G | exit: |
1620 | 1.87G | ctx_pos = ctx->last_ctx_pos; |
1621 | 1.87G | jump = ctx->jump; |
1622 | 1.87G | DATA_POP_DISCARD(ctx); |
1623 | 1.87G | if (ctx_pos == -1) { |
1624 | 649M | state->sigcount = sigcount; |
1625 | 649M | return ret; |
1626 | 649M | } |
1627 | 1.22G | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); |
1628 | | |
1629 | 1.22G | switch (jump) { |
1630 | 317M | case JUMP_MAX_UNTIL_2: |
1631 | 317M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); |
1632 | 317M | goto jump_max_until_2; |
1633 | 181M | case JUMP_MAX_UNTIL_3: |
1634 | 181M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); |
1635 | 181M | goto jump_max_until_3; |
1636 | 0 | case JUMP_MIN_UNTIL_2: |
1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); |
1638 | 0 | goto jump_min_until_2; |
1639 | 0 | case JUMP_MIN_UNTIL_3: |
1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); |
1641 | 0 | goto jump_min_until_3; |
1642 | 159M | case JUMP_BRANCH: |
1643 | 159M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); |
1644 | 159M | goto jump_branch; |
1645 | 0 | case JUMP_MAX_UNTIL_1: |
1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); |
1647 | 0 | goto jump_max_until_1; |
1648 | 0 | case JUMP_MIN_UNTIL_1: |
1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); |
1650 | 0 | goto jump_min_until_1; |
1651 | 0 | case JUMP_POSS_REPEAT_1: |
1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); |
1653 | 0 | goto jump_poss_repeat_1; |
1654 | 0 | case JUMP_POSS_REPEAT_2: |
1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); |
1656 | 0 | goto jump_poss_repeat_2; |
1657 | 180M | case JUMP_REPEAT: |
1658 | 180M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); |
1659 | 180M | goto jump_repeat; |
1660 | 5.53M | case JUMP_REPEAT_ONE_1: |
1661 | 5.53M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); |
1662 | 5.53M | goto jump_repeat_one_1; |
1663 | 217M | case JUMP_REPEAT_ONE_2: |
1664 | 217M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); |
1665 | 217M | goto jump_repeat_one_2; |
1666 | 0 | case JUMP_MIN_REPEAT_ONE: |
1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); |
1668 | 0 | goto jump_min_repeat_one; |
1669 | 0 | case JUMP_ATOMIC_GROUP: |
1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); |
1671 | 0 | goto jump_atomic_group; |
1672 | 122M | case JUMP_ASSERT: |
1673 | 122M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); |
1674 | 122M | goto jump_assert; |
1675 | 39.2M | case JUMP_ASSERT_NOT: |
1676 | 39.2M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); |
1677 | 39.2M | goto jump_assert_not; |
1678 | 0 | case JUMP_NONE: |
1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, |
1680 | 0 | ptr, ret)); |
1681 | 0 | break; |
1682 | 1.22G | } |
1683 | | |
1684 | 0 | return ret; /* should never get here */ |
1685 | 1.22G | } Line | Count | Source | 600 | 211M | { | 601 | 211M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 211M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 211M | Py_ssize_t ret = 0; | 604 | 211M | int jump; | 605 | 211M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 211M | SRE(match_context)* ctx; | 608 | 211M | SRE(match_context)* nextctx; | 609 | 211M | INIT_TRACE(state); | 610 | | | 611 | 211M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 211M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 211M | ctx->last_ctx_pos = -1; | 615 | 211M | ctx->jump = JUMP_NONE; | 616 | 211M | ctx->toplevel = toplevel; | 617 | 211M | ctx_pos = alloc_pos; | 618 | | | 619 | 211M | #if USE_COMPUTED_GOTOS | 620 | 211M | #include "sre_targets.h" | 621 | 211M | #endif | 622 | | | 623 | 345M | entrance: | 624 | | | 625 | 345M | ; // Fashion statement. | 626 | 345M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 345M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 29.7M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 5.84M | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 5.84M | end - ptr, (size_t) pattern[3])); | 634 | 5.84M | RETURN_FAILURE; | 635 | 5.84M | } | 636 | 23.9M | pattern += pattern[1] + 1; | 637 | 23.9M | } | 638 | | | 639 | 339M | #if USE_COMPUTED_GOTOS | 640 | 339M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 339M | { | 647 | | | 648 | 339M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 186M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 186M | ptr, pattern[0])); | 653 | 186M | { | 654 | 186M | int i = pattern[0]; | 655 | 186M | if (i & 1) | 656 | 17.1M | state->lastindex = i/2 + 1; | 657 | 186M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 183M | int j = state->lastmark + 1; | 663 | 186M | while (j < i) | 664 | 3.35M | state->mark[j++] = NULL; | 665 | 183M | state->lastmark = i; | 666 | 183M | } | 667 | 186M | state->mark[i] = ptr; | 668 | 186M | } | 669 | 186M | pattern++; | 670 | 186M | DISPATCH; | 671 | | | 672 | 186M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 56.2M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 56.2M | ptr, *pattern)); | 677 | 56.2M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 19.6M | RETURN_FAILURE; | 679 | 36.5M | pattern++; | 680 | 36.5M | ptr++; | 681 | 36.5M | DISPATCH; | 682 | | | 683 | 36.5M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 48.8M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 48.8M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 48.8M | if (ctx->toplevel && | 698 | 48.8M | ((state->match_all && ptr != state->end) || | 699 | 17.9M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 48.8M | state->ptr = ptr; | 704 | 48.8M | RETURN_SUCCESS; | 705 | | | 706 | 11.6M | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 11.6M | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 11.6M | if (!SRE(at)(state, ptr, *pattern)) | 711 | 2.50M | RETURN_FAILURE; | 712 | 9.14M | pattern++; | 713 | 9.14M | DISPATCH; | 714 | | | 715 | 9.14M | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 34.4M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 34.4M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 34.4M | if (ptr >= end || | 749 | 34.4M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 376k | RETURN_FAILURE; | 751 | 34.0M | pattern += pattern[0]; | 752 | 34.0M | ptr++; | 753 | 34.0M | DISPATCH; | 754 | | | 755 | 34.0M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 1.45M | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 1.45M | pattern, ptr, pattern[0])); | 758 | 1.45M | if (ptr >= end || | 759 | 1.45M | sre_lower_ascii(*ptr) != *pattern) | 760 | 235k | RETURN_FAILURE; | 761 | 1.21M | pattern++; | 762 | 1.21M | ptr++; | 763 | 1.21M | DISPATCH; | 764 | | | 765 | 1.21M | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 22.0M | TARGET(SRE_OP_JUMP): | 845 | 22.0M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 22.0M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 22.0M | ptr, pattern[0])); | 850 | 22.0M | pattern += pattern[0]; | 851 | 22.0M | DISPATCH; | 852 | | | 853 | 41.6M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 41.6M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 41.6M | LASTMARK_SAVE(); | 858 | 41.6M | if (state->repeat) | 859 | 5.77M | MARK_PUSH(ctx->lastmark); | 860 | 124M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 102M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 102M | (ptr >= end || | 863 | 51.8M | (SRE_CODE) *ptr != pattern[2])) | 864 | 20.4M | continue; | 865 | 82.2M | if (pattern[1] == SRE_OP_IN && | 866 | 82.2M | (ptr >= end || | 867 | 6.99M | !SRE(charset)(state, pattern + 3, | 868 | 6.98M | (SRE_CODE) *ptr))) | 869 | 3.97M | continue; | 870 | 78.3M | state->ptr = ptr; | 871 | 78.3M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 78.3M | if (ret) { | 873 | 20.2M | if (state->repeat) | 874 | 5.63M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 20.2M | RETURN_ON_ERROR(ret); | 876 | 20.2M | RETURN_SUCCESS; | 877 | 20.2M | } | 878 | 58.0M | if (state->repeat) | 879 | 7.56k | MARK_POP_KEEP(ctx->lastmark); | 880 | 58.0M | LASTMARK_RESTORE(); | 881 | 58.0M | } | 882 | 21.4M | if (state->repeat) | 883 | 134k | MARK_POP_DISCARD(ctx->lastmark); | 884 | 21.4M | RETURN_FAILURE; | 885 | | | 886 | 201M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 201M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 201M | pattern[1], pattern[2])); | 898 | | | 899 | 201M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 939k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 201M | state->ptr = ptr; | 903 | | | 904 | 201M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 201M | RETURN_ON_ERROR(ret); | 906 | 201M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 201M | ctx->count = ret; | 908 | 201M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 201M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 178M | RETURN_FAILURE; | 917 | | | 918 | 22.4M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 22.4M | ptr == state->end && | 920 | 22.4M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 51.6k | { | 922 | | /* tail is empty. we're finished */ | 923 | 51.6k | state->ptr = ptr; | 924 | 51.6k | RETURN_SUCCESS; | 925 | 51.6k | } | 926 | | | 927 | 22.3M | LASTMARK_SAVE(); | 928 | 22.3M | if (state->repeat) | 929 | 11.7M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 22.3M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 2.70M | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 2.70M | for (;;) { | 936 | 10.9M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 10.9M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 8.24M | ptr--; | 939 | 8.24M | ctx->count--; | 940 | 8.24M | } | 941 | 2.70M | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 1.36M | break; | 943 | 1.34M | state->ptr = ptr; | 944 | 1.34M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 1.34M | pattern+pattern[0]); | 946 | 1.34M | if (ret) { | 947 | 1.34M | if (state->repeat) | 948 | 202k | MARK_POP_DISCARD(ctx->lastmark); | 949 | 1.34M | RETURN_ON_ERROR(ret); | 950 | 1.34M | RETURN_SUCCESS; | 951 | 1.34M | } | 952 | 273 | if (state->repeat) | 953 | 0 | MARK_POP_KEEP(ctx->lastmark); | 954 | 273 | LASTMARK_RESTORE(); | 955 | | | 956 | 273 | ptr--; | 957 | 273 | ctx->count--; | 958 | 273 | } | 959 | 1.36M | if (state->repeat) | 960 | 212 | MARK_POP_DISCARD(ctx->lastmark); | 961 | 19.6M | } else { | 962 | | /* general case */ | 963 | 21.5M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 20.7M | state->ptr = ptr; | 965 | 20.7M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 20.7M | pattern+pattern[0]); | 967 | 20.7M | if (ret) { | 968 | 18.8M | if (state->repeat) | 969 | 10.9M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 18.8M | RETURN_ON_ERROR(ret); | 971 | 18.8M | RETURN_SUCCESS; | 972 | 18.8M | } | 973 | 1.83M | if (state->repeat) | 974 | 1.17M | MARK_POP_KEEP(ctx->lastmark); | 975 | 1.83M | LASTMARK_RESTORE(); | 976 | | | 977 | 1.83M | ptr--; | 978 | 1.83M | ctx->count--; | 979 | 1.83M | } | 980 | 798k | if (state->repeat) | 981 | 631k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 798k | } | 983 | 2.16M | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 6.30M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 6.30M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 6.30M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 6.30M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 6.30M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 6.30M | ctx->u.rep->count = -1; | 1131 | 6.30M | ctx->u.rep->pattern = pattern; | 1132 | 6.30M | ctx->u.rep->prev = state->repeat; | 1133 | 6.30M | ctx->u.rep->last_ptr = NULL; | 1134 | 6.30M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 6.30M | state->ptr = ptr; | 1137 | 6.30M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 6.30M | state->repeat = ctx->u.rep->prev; | 1139 | 6.30M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 6.30M | if (ret) { | 1142 | 6.21M | RETURN_ON_ERROR(ret); | 1143 | 6.21M | RETURN_SUCCESS; | 1144 | 6.21M | } | 1145 | 88.6k | RETURN_FAILURE; | 1146 | | | 1147 | 17.9M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 17.9M | ctx->u.rep = state->repeat; | 1155 | 17.9M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 17.9M | state->ptr = ptr; | 1159 | | | 1160 | 17.9M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 17.9M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 17.9M | ptr, ctx->count)); | 1164 | | | 1165 | 17.9M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 17.9M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 17.9M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 17.9M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 14.3M | ctx->u.rep->count = ctx->count; | 1185 | 14.3M | LASTMARK_SAVE(); | 1186 | 14.3M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 14.3M | LAST_PTR_PUSH(); | 1189 | 14.3M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 14.3M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 14.3M | ctx->u.rep->pattern+3); | 1192 | 14.3M | LAST_PTR_POP(); | 1193 | 14.3M | if (ret) { | 1194 | 11.0M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 11.0M | RETURN_ON_ERROR(ret); | 1196 | 11.0M | RETURN_SUCCESS; | 1197 | 11.0M | } | 1198 | 3.29M | MARK_POP(ctx->lastmark); | 1199 | 3.29M | LASTMARK_RESTORE(); | 1200 | 3.29M | ctx->u.rep->count = ctx->count-1; | 1201 | 3.29M | state->ptr = ptr; | 1202 | 3.29M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 6.90M | state->repeat = ctx->u.rep->prev; | 1207 | 6.90M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 6.90M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 6.90M | RETURN_ON_SUCCESS(ret); | 1211 | 683k | state->ptr = ptr; | 1212 | 683k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 2.90M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 2.90M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 2.90M | ptr, pattern[1])); | 1565 | 2.90M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 2.90M | state->ptr = ptr - pattern[1]; | 1568 | 2.90M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 2.90M | RETURN_ON_FAILURE(ret); | 1570 | 2.82M | pattern += pattern[0]; | 1571 | 2.82M | DISPATCH; | 1572 | | | 1573 | 2.82M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 2.60M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 2.60M | ptr, pattern[1])); | 1578 | 2.60M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 2.60M | state->ptr = ptr - pattern[1]; | 1580 | 2.60M | LASTMARK_SAVE(); | 1581 | 2.60M | if (state->repeat) | 1582 | 2.60M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 5.20M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 5.20M | if (ret) { | 1586 | 1.31k | if (state->repeat) | 1587 | 1.31k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 1.31k | RETURN_ON_ERROR(ret); | 1589 | 1.31k | RETURN_FAILURE; | 1590 | 1.31k | } | 1591 | 2.60M | if (state->repeat) | 1592 | 2.60M | MARK_POP(ctx->lastmark); | 1593 | 2.60M | LASTMARK_RESTORE(); | 1594 | 2.60M | } | 1595 | 2.60M | pattern += pattern[0]; | 1596 | 2.60M | DISPATCH; | 1597 | | | 1598 | 2.60M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 345M | exit: | 1620 | 345M | ctx_pos = ctx->last_ctx_pos; | 1621 | 345M | jump = ctx->jump; | 1622 | 345M | DATA_POP_DISCARD(ctx); | 1623 | 345M | if (ctx_pos == -1) { | 1624 | 211M | state->sigcount = sigcount; | 1625 | 211M | return ret; | 1626 | 211M | } | 1627 | 133M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 133M | switch (jump) { | 1630 | 14.3M | case JUMP_MAX_UNTIL_2: | 1631 | 14.3M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 14.3M | goto jump_max_until_2; | 1633 | 6.90M | case JUMP_MAX_UNTIL_3: | 1634 | 6.90M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 6.90M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 78.3M | case JUMP_BRANCH: | 1643 | 78.3M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 78.3M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 6.30M | case JUMP_REPEAT: | 1658 | 6.30M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 6.30M | goto jump_repeat; | 1660 | 1.34M | case JUMP_REPEAT_ONE_1: | 1661 | 1.34M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 1.34M | goto jump_repeat_one_1; | 1663 | 20.7M | case JUMP_REPEAT_ONE_2: | 1664 | 20.7M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 20.7M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 2.90M | case JUMP_ASSERT: | 1673 | 2.90M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 2.90M | goto jump_assert; | 1675 | 2.60M | case JUMP_ASSERT_NOT: | 1676 | 2.60M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 2.60M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 133M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 133M | } |
Line | Count | Source | 600 | 311M | { | 601 | 311M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 311M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 311M | Py_ssize_t ret = 0; | 604 | 311M | int jump; | 605 | 311M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 311M | SRE(match_context)* ctx; | 608 | 311M | SRE(match_context)* nextctx; | 609 | 311M | INIT_TRACE(state); | 610 | | | 611 | 311M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 311M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 311M | ctx->last_ctx_pos = -1; | 615 | 311M | ctx->jump = JUMP_NONE; | 616 | 311M | ctx->toplevel = toplevel; | 617 | 311M | ctx_pos = alloc_pos; | 618 | | | 619 | 311M | #if USE_COMPUTED_GOTOS | 620 | 311M | #include "sre_targets.h" | 621 | 311M | #endif | 622 | | | 623 | 682M | entrance: | 624 | | | 625 | 682M | ; // Fashion statement. | 626 | 682M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 682M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 26.1M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 671 | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 671 | end - ptr, (size_t) pattern[3])); | 634 | 671 | RETURN_FAILURE; | 635 | 671 | } | 636 | 26.1M | pattern += pattern[1] + 1; | 637 | 26.1M | } | 638 | | | 639 | 682M | #if USE_COMPUTED_GOTOS | 640 | 682M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 682M | { | 647 | | | 648 | 682M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 301M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 301M | ptr, pattern[0])); | 653 | 301M | { | 654 | 301M | int i = pattern[0]; | 655 | 301M | if (i & 1) | 656 | 22.2M | state->lastindex = i/2 + 1; | 657 | 301M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 301M | int j = state->lastmark + 1; | 663 | 301M | while (j < i) | 664 | 106k | state->mark[j++] = NULL; | 665 | 301M | state->lastmark = i; | 666 | 301M | } | 667 | 301M | state->mark[i] = ptr; | 668 | 301M | } | 669 | 301M | pattern++; | 670 | 301M | DISPATCH; | 671 | | | 672 | 301M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 79.9M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 79.9M | ptr, *pattern)); | 677 | 79.9M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 54.3M | RETURN_FAILURE; | 679 | 25.6M | pattern++; | 680 | 25.6M | ptr++; | 681 | 25.6M | DISPATCH; | 682 | | | 683 | 25.6M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 104M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 104M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 104M | if (ctx->toplevel && | 698 | 104M | ((state->match_all && ptr != state->end) || | 699 | 23.5M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 104M | state->ptr = ptr; | 704 | 104M | RETURN_SUCCESS; | 705 | | | 706 | 1.12M | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 1.12M | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 1.12M | if (!SRE(at)(state, ptr, *pattern)) | 711 | 1.08M | RETURN_FAILURE; | 712 | 34.9k | pattern++; | 713 | 34.9k | DISPATCH; | 714 | | | 715 | 34.9k | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 129M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 129M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 129M | if (ptr >= end || | 749 | 129M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 4.46M | RETURN_FAILURE; | 751 | 124M | pattern += pattern[0]; | 752 | 124M | ptr++; | 753 | 124M | DISPATCH; | 754 | | | 755 | 124M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 2.86M | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 2.86M | pattern, ptr, pattern[0])); | 758 | 2.86M | if (ptr >= end || | 759 | 2.86M | sre_lower_ascii(*ptr) != *pattern) | 760 | 142k | RETURN_FAILURE; | 761 | 2.72M | pattern++; | 762 | 2.72M | ptr++; | 763 | 2.72M | DISPATCH; | 764 | | | 765 | 2.72M | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 25.7M | TARGET(SRE_OP_JUMP): | 845 | 25.7M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 25.7M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 25.7M | ptr, pattern[0])); | 850 | 25.7M | pattern += pattern[0]; | 851 | 25.7M | DISPATCH; | 852 | | | 853 | 45.0M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 45.0M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 45.0M | LASTMARK_SAVE(); | 858 | 45.0M | if (state->repeat) | 859 | 42.0M | MARK_PUSH(ctx->lastmark); | 860 | 107M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 88.1M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 88.1M | (ptr >= end || | 863 | 43.2M | (SRE_CODE) *ptr != pattern[2])) | 864 | 22.8M | continue; | 865 | 65.2M | if (pattern[1] == SRE_OP_IN && | 866 | 65.2M | (ptr >= end || | 867 | 41.2M | !SRE(charset)(state, pattern + 3, | 868 | 41.2M | (SRE_CODE) *ptr))) | 869 | 36.6M | continue; | 870 | 28.5M | state->ptr = ptr; | 871 | 28.5M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 28.5M | if (ret) { | 873 | 25.2M | if (state->repeat) | 874 | 23.8M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 25.2M | RETURN_ON_ERROR(ret); | 876 | 25.2M | RETURN_SUCCESS; | 877 | 25.2M | } | 878 | 3.32M | if (state->repeat) | 879 | 3.02k | MARK_POP_KEEP(ctx->lastmark); | 880 | 3.32M | LASTMARK_RESTORE(); | 881 | 3.32M | } | 882 | 19.7M | if (state->repeat) | 883 | 18.1M | MARK_POP_DISCARD(ctx->lastmark); | 884 | 19.7M | RETURN_FAILURE; | 885 | | | 886 | 312M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 312M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 312M | pattern[1], pattern[2])); | 898 | | | 899 | 312M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 200k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 312M | state->ptr = ptr; | 903 | | | 904 | 312M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 312M | RETURN_ON_ERROR(ret); | 906 | 312M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 312M | ctx->count = ret; | 908 | 312M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 312M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 242M | RETURN_FAILURE; | 917 | | | 918 | 70.1M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 70.1M | ptr == state->end && | 920 | 70.1M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 15.5k | { | 922 | | /* tail is empty. we're finished */ | 923 | 15.5k | state->ptr = ptr; | 924 | 15.5k | RETURN_SUCCESS; | 925 | 15.5k | } | 926 | | | 927 | 70.1M | LASTMARK_SAVE(); | 928 | 70.1M | if (state->repeat) | 929 | 41.6M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 70.1M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 887k | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 888k | for (;;) { | 936 | 18.4M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 18.4M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 17.5M | ptr--; | 939 | 17.5M | ctx->count--; | 940 | 17.5M | } | 941 | 888k | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 16.0k | break; | 943 | 872k | state->ptr = ptr; | 944 | 872k | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 872k | pattern+pattern[0]); | 946 | 872k | if (ret) { | 947 | 871k | if (state->repeat) | 948 | 848k | MARK_POP_DISCARD(ctx->lastmark); | 949 | 871k | RETURN_ON_ERROR(ret); | 950 | 871k | RETURN_SUCCESS; | 951 | 871k | } | 952 | 917 | if (state->repeat) | 953 | 0 | MARK_POP_KEEP(ctx->lastmark); | 954 | 917 | LASTMARK_RESTORE(); | 955 | | | 956 | 917 | ptr--; | 957 | 917 | ctx->count--; | 958 | 917 | } | 959 | 16.0k | if (state->repeat) | 960 | 285 | MARK_POP_DISCARD(ctx->lastmark); | 961 | 69.2M | } else { | 962 | | /* general case */ | 963 | 70.6M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 70.0M | state->ptr = ptr; | 965 | 70.0M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 70.0M | pattern+pattern[0]); | 967 | 70.0M | if (ret) { | 968 | 68.6M | if (state->repeat) | 969 | 40.1M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 68.6M | RETURN_ON_ERROR(ret); | 971 | 68.6M | RETURN_SUCCESS; | 972 | 68.6M | } | 973 | 1.38M | if (state->repeat) | 974 | 1.23M | MARK_POP_KEEP(ctx->lastmark); | 975 | 1.38M | LASTMARK_RESTORE(); | 976 | | | 977 | 1.38M | ptr--; | 978 | 1.38M | ctx->count--; | 979 | 1.38M | } | 980 | 622k | if (state->repeat) | 981 | 619k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 622k | } | 983 | 638k | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 56.3M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 56.3M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 56.3M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 56.3M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 56.3M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 56.3M | ctx->u.rep->count = -1; | 1131 | 56.3M | ctx->u.rep->pattern = pattern; | 1132 | 56.3M | ctx->u.rep->prev = state->repeat; | 1133 | 56.3M | ctx->u.rep->last_ptr = NULL; | 1134 | 56.3M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 56.3M | state->ptr = ptr; | 1137 | 56.3M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 56.3M | state->repeat = ctx->u.rep->prev; | 1139 | 56.3M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 56.3M | if (ret) { | 1142 | 56.3M | RETURN_ON_ERROR(ret); | 1143 | 56.3M | RETURN_SUCCESS; | 1144 | 56.3M | } | 1145 | 954 | RETURN_FAILURE; | 1146 | | | 1147 | 100M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 100M | ctx->u.rep = state->repeat; | 1155 | 100M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 100M | state->ptr = ptr; | 1159 | | | 1160 | 100M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 100M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 100M | ptr, ctx->count)); | 1164 | | | 1165 | 100M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 100M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 100M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 100M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 99.2M | ctx->u.rep->count = ctx->count; | 1185 | 99.2M | LASTMARK_SAVE(); | 1186 | 99.2M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 99.2M | LAST_PTR_PUSH(); | 1189 | 99.2M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 99.2M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 99.2M | ctx->u.rep->pattern+3); | 1192 | 99.2M | LAST_PTR_POP(); | 1193 | 99.2M | if (ret) { | 1194 | 43.8M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 43.8M | RETURN_ON_ERROR(ret); | 1196 | 43.8M | RETURN_SUCCESS; | 1197 | 43.8M | } | 1198 | 55.3M | MARK_POP(ctx->lastmark); | 1199 | 55.3M | LASTMARK_RESTORE(); | 1200 | 55.3M | ctx->u.rep->count = ctx->count-1; | 1201 | 55.3M | state->ptr = ptr; | 1202 | 55.3M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 56.9M | state->repeat = ctx->u.rep->prev; | 1207 | 56.9M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 56.9M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 56.9M | RETURN_ON_SUCCESS(ret); | 1211 | 620k | state->ptr = ptr; | 1212 | 620k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 39.8M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 39.8M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 39.8M | ptr, pattern[1])); | 1565 | 39.8M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 39.8M | state->ptr = ptr - pattern[1]; | 1568 | 39.8M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 39.8M | RETURN_ON_FAILURE(ret); | 1570 | 36.9M | pattern += pattern[0]; | 1571 | 36.9M | DISPATCH; | 1572 | | | 1573 | 36.9M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 18.4M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 18.4M | ptr, pattern[1])); | 1578 | 18.4M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 18.4M | state->ptr = ptr - pattern[1]; | 1580 | 18.4M | LASTMARK_SAVE(); | 1581 | 18.4M | if (state->repeat) | 1582 | 18.4M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 36.9M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 36.9M | if (ret) { | 1586 | 2.73k | if (state->repeat) | 1587 | 2.73k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 2.73k | RETURN_ON_ERROR(ret); | 1589 | 2.73k | RETURN_FAILURE; | 1590 | 2.73k | } | 1591 | 18.4M | if (state->repeat) | 1592 | 18.4M | MARK_POP(ctx->lastmark); | 1593 | 18.4M | LASTMARK_RESTORE(); | 1594 | 18.4M | } | 1595 | 18.4M | pattern += pattern[0]; | 1596 | 18.4M | DISPATCH; | 1597 | | | 1598 | 18.4M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 682M | exit: | 1620 | 682M | ctx_pos = ctx->last_ctx_pos; | 1621 | 682M | jump = ctx->jump; | 1622 | 682M | DATA_POP_DISCARD(ctx); | 1623 | 682M | if (ctx_pos == -1) { | 1624 | 311M | state->sigcount = sigcount; | 1625 | 311M | return ret; | 1626 | 311M | } | 1627 | 370M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 370M | switch (jump) { | 1630 | 99.2M | case JUMP_MAX_UNTIL_2: | 1631 | 99.2M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 99.2M | goto jump_max_until_2; | 1633 | 56.9M | case JUMP_MAX_UNTIL_3: | 1634 | 56.9M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 56.9M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 28.5M | case JUMP_BRANCH: | 1643 | 28.5M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 28.5M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 56.3M | case JUMP_REPEAT: | 1658 | 56.3M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 56.3M | goto jump_repeat; | 1660 | 872k | case JUMP_REPEAT_ONE_1: | 1661 | 872k | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 872k | goto jump_repeat_one_1; | 1663 | 70.0M | case JUMP_REPEAT_ONE_2: | 1664 | 70.0M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 70.0M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 39.8M | case JUMP_ASSERT: | 1673 | 39.8M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 39.8M | goto jump_assert; | 1675 | 18.4M | case JUMP_ASSERT_NOT: | 1676 | 18.4M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 18.4M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 370M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 370M | } |
Line | Count | Source | 600 | 125M | { | 601 | 125M | const SRE_CHAR* end = (const SRE_CHAR *)state->end; | 602 | 125M | Py_ssize_t alloc_pos, ctx_pos = -1; | 603 | 125M | Py_ssize_t ret = 0; | 604 | 125M | int jump; | 605 | 125M | unsigned int sigcount = state->sigcount; | 606 | | | 607 | 125M | SRE(match_context)* ctx; | 608 | 125M | SRE(match_context)* nextctx; | 609 | 125M | INIT_TRACE(state); | 610 | | | 611 | 125M | TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); | 612 | | | 613 | 125M | DATA_ALLOC(SRE(match_context), ctx); | 614 | 125M | ctx->last_ctx_pos = -1; | 615 | 125M | ctx->jump = JUMP_NONE; | 616 | 125M | ctx->toplevel = toplevel; | 617 | 125M | ctx_pos = alloc_pos; | 618 | | | 619 | 125M | #if USE_COMPUTED_GOTOS | 620 | 125M | #include "sre_targets.h" | 621 | 125M | #endif | 622 | | | 623 | 846M | entrance: | 624 | | | 625 | 846M | ; // Fashion statement. | 626 | 846M | const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; | 627 | | | 628 | 846M | if (pattern[0] == SRE_OP_INFO) { | 629 | | /* optimization info block */ | 630 | | /* <INFO> <1=skip> <2=flags> <3=min> ... */ | 631 | 40.1M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 632 | 378 | TRACE(("reject (got %tu chars, need %zu)\n", | 633 | 378 | end - ptr, (size_t) pattern[3])); | 634 | 378 | RETURN_FAILURE; | 635 | 378 | } | 636 | 40.1M | pattern += pattern[1] + 1; | 637 | 40.1M | } | 638 | | | 639 | 846M | #if USE_COMPUTED_GOTOS | 640 | 846M | DISPATCH; | 641 | | #else | 642 | | dispatch: | 643 | | MAYBE_CHECK_SIGNALS; | 644 | | switch (*pattern++) | 645 | | #endif | 646 | 846M | { | 647 | | | 648 | 846M | TARGET(SRE_OP_MARK): | 649 | | /* set mark */ | 650 | | /* <MARK> <gid> */ | 651 | 204M | TRACE(("|%p|%p|MARK %d\n", pattern, | 652 | 204M | ptr, pattern[0])); | 653 | 204M | { | 654 | 204M | int i = pattern[0]; | 655 | 204M | if (i & 1) | 656 | 45.5M | state->lastindex = i/2 + 1; | 657 | 204M | if (i > state->lastmark) { | 658 | | /* state->lastmark is the highest valid index in the | 659 | | state->mark array. If it is increased by more than 1, | 660 | | the intervening marks must be set to NULL to signal | 661 | | that these marks have not been encountered. */ | 662 | 203M | int j = state->lastmark + 1; | 663 | 204M | while (j < i) | 664 | 1.57M | state->mark[j++] = NULL; | 665 | 203M | state->lastmark = i; | 666 | 203M | } | 667 | 204M | state->mark[i] = ptr; | 668 | 204M | } | 669 | 204M | pattern++; | 670 | 204M | DISPATCH; | 671 | | | 672 | 204M | TARGET(SRE_OP_LITERAL): | 673 | | /* match literal string */ | 674 | | /* <LITERAL> <code> */ | 675 | 123M | TRACE(("|%p|%p|LITERAL %d\n", pattern, | 676 | 123M | ptr, *pattern)); | 677 | 123M | if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) | 678 | 93.6M | RETURN_FAILURE; | 679 | 29.7M | pattern++; | 680 | 29.7M | ptr++; | 681 | 29.7M | DISPATCH; | 682 | | | 683 | 29.7M | TARGET(SRE_OP_NOT_LITERAL): | 684 | | /* match anything that is not literal character */ | 685 | | /* <NOT_LITERAL> <code> */ | 686 | 0 | TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, | 687 | 0 | ptr, *pattern)); | 688 | 0 | if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) | 689 | 0 | RETURN_FAILURE; | 690 | 0 | pattern++; | 691 | 0 | ptr++; | 692 | 0 | DISPATCH; | 693 | | | 694 | 126M | TARGET(SRE_OP_SUCCESS): | 695 | | /* end of pattern */ | 696 | 126M | TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); | 697 | 126M | if (ctx->toplevel && | 698 | 126M | ((state->match_all && ptr != state->end) || | 699 | 39.5M | (state->must_advance && ptr == state->start))) | 700 | 0 | { | 701 | 0 | RETURN_FAILURE; | 702 | 0 | } | 703 | 126M | state->ptr = ptr; | 704 | 126M | RETURN_SUCCESS; | 705 | | | 706 | 517k | TARGET(SRE_OP_AT): | 707 | | /* match at given position */ | 708 | | /* <AT> <code> */ | 709 | 517k | TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); | 710 | 517k | if (!SRE(at)(state, ptr, *pattern)) | 711 | 511k | RETURN_FAILURE; | 712 | 5.66k | pattern++; | 713 | 5.66k | DISPATCH; | 714 | | | 715 | 5.66k | TARGET(SRE_OP_CATEGORY): | 716 | | /* match at given category */ | 717 | | /* <CATEGORY> <code> */ | 718 | 0 | TRACE(("|%p|%p|CATEGORY %d\n", pattern, | 719 | 0 | ptr, *pattern)); | 720 | 0 | if (ptr >= end || !sre_category(pattern[0], ptr[0])) | 721 | 0 | RETURN_FAILURE; | 722 | 0 | pattern++; | 723 | 0 | ptr++; | 724 | 0 | DISPATCH; | 725 | | | 726 | 0 | TARGET(SRE_OP_ANY): | 727 | | /* match anything (except a newline) */ | 728 | | /* <ANY> */ | 729 | 0 | TRACE(("|%p|%p|ANY\n", pattern, ptr)); | 730 | 0 | if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) | 731 | 0 | RETURN_FAILURE; | 732 | 0 | ptr++; | 733 | 0 | DISPATCH; | 734 | | | 735 | 0 | TARGET(SRE_OP_ANY_ALL): | 736 | | /* match anything */ | 737 | | /* <ANY_ALL> */ | 738 | 0 | TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); | 739 | 0 | if (ptr >= end) | 740 | 0 | RETURN_FAILURE; | 741 | 0 | ptr++; | 742 | 0 | DISPATCH; | 743 | | | 744 | 193M | TARGET(SRE_OP_IN): | 745 | | /* match set member (or non_member) */ | 746 | | /* <IN> <skip> <set> */ | 747 | 193M | TRACE(("|%p|%p|IN\n", pattern, ptr)); | 748 | 193M | if (ptr >= end || | 749 | 193M | !SRE(charset)(state, pattern + 1, *ptr)) | 750 | 1.47M | RETURN_FAILURE; | 751 | 192M | pattern += pattern[0]; | 752 | 192M | ptr++; | 753 | 192M | DISPATCH; | 754 | | | 755 | 192M | TARGET(SRE_OP_LITERAL_IGNORE): | 756 | 436k | TRACE(("|%p|%p|LITERAL_IGNORE %d\n", | 757 | 436k | pattern, ptr, pattern[0])); | 758 | 436k | if (ptr >= end || | 759 | 436k | sre_lower_ascii(*ptr) != *pattern) | 760 | 11.4k | RETURN_FAILURE; | 761 | 425k | pattern++; | 762 | 425k | ptr++; | 763 | 425k | DISPATCH; | 764 | | | 765 | 425k | TARGET(SRE_OP_LITERAL_UNI_IGNORE): | 766 | 0 | TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", | 767 | 0 | pattern, ptr, pattern[0])); | 768 | 0 | if (ptr >= end || | 769 | 0 | sre_lower_unicode(*ptr) != *pattern) | 770 | 0 | RETURN_FAILURE; | 771 | 0 | pattern++; | 772 | 0 | ptr++; | 773 | 0 | DISPATCH; | 774 | | | 775 | 0 | TARGET(SRE_OP_LITERAL_LOC_IGNORE): | 776 | 0 | TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", | 777 | 0 | pattern, ptr, pattern[0])); | 778 | 0 | if (ptr >= end | 779 | 0 | || !char_loc_ignore(*pattern, *ptr)) | 780 | 0 | RETURN_FAILURE; | 781 | 0 | pattern++; | 782 | 0 | ptr++; | 783 | 0 | DISPATCH; | 784 | | | 785 | 0 | TARGET(SRE_OP_NOT_LITERAL_IGNORE): | 786 | 0 | TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", | 787 | 0 | pattern, ptr, *pattern)); | 788 | 0 | if (ptr >= end || | 789 | 0 | sre_lower_ascii(*ptr) == *pattern) | 790 | 0 | RETURN_FAILURE; | 791 | 0 | pattern++; | 792 | 0 | ptr++; | 793 | 0 | DISPATCH; | 794 | | | 795 | 0 | TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): | 796 | 0 | TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", | 797 | 0 | pattern, ptr, *pattern)); | 798 | 0 | if (ptr >= end || | 799 | 0 | sre_lower_unicode(*ptr) == *pattern) | 800 | 0 | RETURN_FAILURE; | 801 | 0 | pattern++; | 802 | 0 | ptr++; | 803 | 0 | DISPATCH; | 804 | | | 805 | 0 | TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): | 806 | 0 | TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", | 807 | 0 | pattern, ptr, *pattern)); | 808 | 0 | if (ptr >= end | 809 | 0 | || char_loc_ignore(*pattern, *ptr)) | 810 | 0 | RETURN_FAILURE; | 811 | 0 | pattern++; | 812 | 0 | ptr++; | 813 | 0 | DISPATCH; | 814 | | | 815 | 0 | TARGET(SRE_OP_IN_IGNORE): | 816 | 0 | TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); | 817 | 0 | if (ptr >= end | 818 | 0 | || !SRE(charset)(state, pattern+1, | 819 | 0 | (SRE_CODE)sre_lower_ascii(*ptr))) | 820 | 0 | RETURN_FAILURE; | 821 | 0 | pattern += pattern[0]; | 822 | 0 | ptr++; | 823 | 0 | DISPATCH; | 824 | | | 825 | 0 | TARGET(SRE_OP_IN_UNI_IGNORE): | 826 | 0 | TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); | 827 | 0 | if (ptr >= end | 828 | 0 | || !SRE(charset)(state, pattern+1, | 829 | 0 | (SRE_CODE)sre_lower_unicode(*ptr))) | 830 | 0 | RETURN_FAILURE; | 831 | 0 | pattern += pattern[0]; | 832 | 0 | ptr++; | 833 | 0 | DISPATCH; | 834 | | | 835 | 0 | TARGET(SRE_OP_IN_LOC_IGNORE): | 836 | 0 | TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); | 837 | 0 | if (ptr >= end | 838 | 0 | || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) | 839 | 0 | RETURN_FAILURE; | 840 | 0 | pattern += pattern[0]; | 841 | 0 | ptr++; | 842 | 0 | DISPATCH; | 843 | | | 844 | 51.9M | TARGET(SRE_OP_JUMP): | 845 | 51.9M | TARGET(SRE_OP_INFO): | 846 | | /* jump forward */ | 847 | | /* <JUMP> <offset> */ | 848 | 51.9M | TRACE(("|%p|%p|JUMP %d\n", pattern, | 849 | 51.9M | ptr, pattern[0])); | 850 | 51.9M | pattern += pattern[0]; | 851 | 51.9M | DISPATCH; | 852 | | | 853 | 91.0M | TARGET(SRE_OP_BRANCH): | 854 | | /* alternation */ | 855 | | /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ | 856 | 91.0M | TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); | 857 | 91.0M | LASTMARK_SAVE(); | 858 | 91.0M | if (state->repeat) | 859 | 88.2M | MARK_PUSH(ctx->lastmark); | 860 | 196M | for (; pattern[0]; pattern += pattern[0]) { | 861 | 157M | if (pattern[1] == SRE_OP_LITERAL && | 862 | 157M | (ptr >= end || | 863 | 69.9M | (SRE_CODE) *ptr != pattern[2])) | 864 | 48.1M | continue; | 865 | 108M | if (pattern[1] == SRE_OP_IN && | 866 | 108M | (ptr >= end || | 867 | 84.5M | !SRE(charset)(state, pattern + 3, | 868 | 84.5M | (SRE_CODE) *ptr))) | 869 | 56.4M | continue; | 870 | 52.4M | state->ptr = ptr; | 871 | 52.4M | DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); | 872 | 52.4M | if (ret) { | 873 | 51.5M | if (state->repeat) | 874 | 49.5M | MARK_POP_DISCARD(ctx->lastmark); | 875 | 51.5M | RETURN_ON_ERROR(ret); | 876 | 51.5M | RETURN_SUCCESS; | 877 | 51.5M | } | 878 | 901k | if (state->repeat) | 879 | 6.03k | MARK_POP_KEEP(ctx->lastmark); | 880 | 901k | LASTMARK_RESTORE(); | 881 | 901k | } | 882 | 39.4M | if (state->repeat) | 883 | 38.6M | MARK_POP_DISCARD(ctx->lastmark); | 884 | 39.4M | RETURN_FAILURE; | 885 | | | 886 | 206M | TARGET(SRE_OP_REPEAT_ONE): | 887 | | /* match repeated sequence (maximizing regexp) */ | 888 | | | 889 | | /* this operator only works if the repeated item is | 890 | | exactly one character wide, and we're not already | 891 | | collecting backtracking points. for other cases, | 892 | | use the MAX_REPEAT operator */ | 893 | | | 894 | | /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 895 | | | 896 | 206M | TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, | 897 | 206M | pattern[1], pattern[2])); | 898 | | | 899 | 206M | if ((Py_ssize_t) pattern[1] > end - ptr) | 900 | 26.3k | RETURN_FAILURE; /* cannot match */ | 901 | | | 902 | 206M | state->ptr = ptr; | 903 | | | 904 | 206M | ret = SRE(count)(state, pattern+3, pattern[2]); | 905 | 206M | RETURN_ON_ERROR(ret); | 906 | 206M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 907 | 206M | ctx->count = ret; | 908 | 206M | ptr += ctx->count; | 909 | | | 910 | | /* when we arrive here, count contains the number of | 911 | | matches, and ptr points to the tail of the target | 912 | | string. check if the rest of the pattern matches, | 913 | | and backtrack if not. */ | 914 | | | 915 | 206M | if (ctx->count < (Py_ssize_t) pattern[1]) | 916 | 77.7M | RETURN_FAILURE; | 917 | | | 918 | 129M | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 919 | 129M | ptr == state->end && | 920 | 129M | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 921 | 4.08k | { | 922 | | /* tail is empty. we're finished */ | 923 | 4.08k | state->ptr = ptr; | 924 | 4.08k | RETURN_SUCCESS; | 925 | 4.08k | } | 926 | | | 927 | 129M | LASTMARK_SAVE(); | 928 | 129M | if (state->repeat) | 929 | 85.1M | MARK_PUSH(ctx->lastmark); | 930 | | | 931 | 129M | if (pattern[pattern[0]] == SRE_OP_LITERAL) { | 932 | | /* tail starts with a literal. skip positions where | 933 | | the rest of the pattern cannot possibly match */ | 934 | 3.34M | ctx->u.chr = pattern[pattern[0]+1]; | 935 | 3.34M | for (;;) { | 936 | 7.84M | while (ctx->count >= (Py_ssize_t) pattern[1] && | 937 | 7.84M | (ptr >= end || *ptr != ctx->u.chr)) { | 938 | 4.50M | ptr--; | 939 | 4.50M | ctx->count--; | 940 | 4.50M | } | 941 | 3.34M | if (ctx->count < (Py_ssize_t) pattern[1]) | 942 | 21.7k | break; | 943 | 3.32M | state->ptr = ptr; | 944 | 3.32M | DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, | 945 | 3.32M | pattern+pattern[0]); | 946 | 3.32M | if (ret) { | 947 | 3.32M | if (state->repeat) | 948 | 3.30M | MARK_POP_DISCARD(ctx->lastmark); | 949 | 3.32M | RETURN_ON_ERROR(ret); | 950 | 3.32M | RETURN_SUCCESS; | 951 | 3.32M | } | 952 | 262 | if (state->repeat) | 953 | 0 | MARK_POP_KEEP(ctx->lastmark); | 954 | 262 | LASTMARK_RESTORE(); | 955 | | | 956 | 262 | ptr--; | 957 | 262 | ctx->count--; | 958 | 262 | } | 959 | 21.7k | if (state->repeat) | 960 | 259 | MARK_POP_DISCARD(ctx->lastmark); | 961 | 125M | } else { | 962 | | /* general case */ | 963 | 126M | while (ctx->count >= (Py_ssize_t) pattern[1]) { | 964 | 126M | state->ptr = ptr; | 965 | 126M | DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, | 966 | 126M | pattern+pattern[0]); | 967 | 126M | if (ret) { | 968 | 125M | if (state->repeat) | 969 | 81.7M | MARK_POP_DISCARD(ctx->lastmark); | 970 | 125M | RETURN_ON_ERROR(ret); | 971 | 125M | RETURN_SUCCESS; | 972 | 125M | } | 973 | 1.08M | if (state->repeat) | 974 | 160k | MARK_POP_KEEP(ctx->lastmark); | 975 | 1.08M | LASTMARK_RESTORE(); | 976 | | | 977 | 1.08M | ptr--; | 978 | 1.08M | ctx->count--; | 979 | 1.08M | } | 980 | 80.8k | if (state->repeat) | 981 | 80.1k | MARK_POP_DISCARD(ctx->lastmark); | 982 | 80.8k | } | 983 | 102k | RETURN_FAILURE; | 984 | | | 985 | 0 | TARGET(SRE_OP_MIN_REPEAT_ONE): | 986 | | /* match repeated sequence (minimizing regexp) */ | 987 | | | 988 | | /* this operator only works if the repeated item is | 989 | | exactly one character wide, and we're not already | 990 | | collecting backtracking points. for other cases, | 991 | | use the MIN_REPEAT operator */ | 992 | | | 993 | | /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ | 994 | |
| 995 | 0 | TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, | 996 | 0 | pattern[1], pattern[2])); | 997 | |
| 998 | 0 | if ((Py_ssize_t) pattern[1] > end - ptr) | 999 | 0 | RETURN_FAILURE; /* cannot match */ | 1000 | | | 1001 | 0 | state->ptr = ptr; | 1002 | |
| 1003 | 0 | if (pattern[1] == 0) | 1004 | 0 | ctx->count = 0; | 1005 | 0 | else { | 1006 | | /* count using pattern min as the maximum */ | 1007 | 0 | ret = SRE(count)(state, pattern+3, pattern[1]); | 1008 | 0 | RETURN_ON_ERROR(ret); | 1009 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1010 | 0 | if (ret < (Py_ssize_t) pattern[1]) | 1011 | | /* didn't match minimum number of times */ | 1012 | 0 | RETURN_FAILURE; | 1013 | | /* advance past minimum matches of repeat */ | 1014 | 0 | ctx->count = ret; | 1015 | 0 | ptr += ctx->count; | 1016 | 0 | } | 1017 | | | 1018 | 0 | if (pattern[pattern[0]] == SRE_OP_SUCCESS && | 1019 | 0 | !(ctx->toplevel && | 1020 | 0 | ((state->match_all && ptr != state->end) || | 1021 | 0 | (state->must_advance && ptr == state->start)))) | 1022 | 0 | { | 1023 | | /* tail is empty. we're finished */ | 1024 | 0 | state->ptr = ptr; | 1025 | 0 | RETURN_SUCCESS; | 1026 | |
| 1027 | 0 | } else { | 1028 | | /* general case */ | 1029 | 0 | LASTMARK_SAVE(); | 1030 | 0 | if (state->repeat) | 1031 | 0 | MARK_PUSH(ctx->lastmark); | 1032 | | | 1033 | 0 | while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT | 1034 | 0 | || ctx->count <= (Py_ssize_t)pattern[2]) { | 1035 | 0 | state->ptr = ptr; | 1036 | 0 | DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, | 1037 | 0 | pattern+pattern[0]); | 1038 | 0 | if (ret) { | 1039 | 0 | if (state->repeat) | 1040 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1041 | 0 | RETURN_ON_ERROR(ret); | 1042 | 0 | RETURN_SUCCESS; | 1043 | 0 | } | 1044 | 0 | if (state->repeat) | 1045 | 0 | MARK_POP_KEEP(ctx->lastmark); | 1046 | 0 | LASTMARK_RESTORE(); | 1047 | |
| 1048 | 0 | state->ptr = ptr; | 1049 | 0 | ret = SRE(count)(state, pattern+3, 1); | 1050 | 0 | RETURN_ON_ERROR(ret); | 1051 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1052 | 0 | if (ret == 0) | 1053 | 0 | break; | 1054 | 0 | assert(ret == 1); | 1055 | 0 | ptr++; | 1056 | 0 | ctx->count++; | 1057 | 0 | } | 1058 | 0 | if (state->repeat) | 1059 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1060 | 0 | } | 1061 | 0 | RETURN_FAILURE; | 1062 | | | 1063 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): | 1064 | | /* match repeated sequence (maximizing regexp) without | 1065 | | backtracking */ | 1066 | | | 1067 | | /* this operator only works if the repeated item is | 1068 | | exactly one character wide, and we're not already | 1069 | | collecting backtracking points. for other cases, | 1070 | | use the MAX_REPEAT operator */ | 1071 | | | 1072 | | /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> | 1073 | | tail */ | 1074 | |
| 1075 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, | 1076 | 0 | ptr, pattern[1], pattern[2])); | 1077 | |
| 1078 | 0 | if (ptr + pattern[1] > end) { | 1079 | 0 | RETURN_FAILURE; /* cannot match */ | 1080 | 0 | } | 1081 | | | 1082 | 0 | state->ptr = ptr; | 1083 | |
| 1084 | 0 | ret = SRE(count)(state, pattern + 3, pattern[2]); | 1085 | 0 | RETURN_ON_ERROR(ret); | 1086 | 0 | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1087 | 0 | ctx->count = ret; | 1088 | 0 | ptr += ctx->count; | 1089 | | | 1090 | | /* when we arrive here, count contains the number of | 1091 | | matches, and ptr points to the tail of the target | 1092 | | string. check if the rest of the pattern matches, | 1093 | | and fail if not. */ | 1094 | | | 1095 | | /* Test for not enough repetitions in match */ | 1096 | 0 | if (ctx->count < (Py_ssize_t) pattern[1]) { | 1097 | 0 | RETURN_FAILURE; | 1098 | 0 | } | 1099 | | | 1100 | | /* Update the pattern to point to the next op code */ | 1101 | 0 | pattern += pattern[0]; | 1102 | | | 1103 | | /* Let the tail be evaluated separately and consider this | 1104 | | match successful. */ | 1105 | 0 | if (*pattern == SRE_OP_SUCCESS && | 1106 | 0 | ptr == state->end && | 1107 | 0 | !(ctx->toplevel && state->must_advance && ptr == state->start)) | 1108 | 0 | { | 1109 | | /* tail is empty. we're finished */ | 1110 | 0 | state->ptr = ptr; | 1111 | 0 | RETURN_SUCCESS; | 1112 | 0 | } | 1113 | | | 1114 | | /* Attempt to match the rest of the string */ | 1115 | 0 | DISPATCH; | 1116 | | | 1117 | 117M | TARGET(SRE_OP_REPEAT): | 1118 | | /* create repeat context. all the hard work is done | 1119 | | by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ | 1120 | | /* <REPEAT> <skip> <1=min> <2=max> | 1121 | | <3=repeat_index> item <UNTIL> tail */ | 1122 | 117M | TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, | 1123 | 117M | pattern[1], pattern[2])); | 1124 | | | 1125 | | /* install new repeat context */ | 1126 | 117M | ctx->u.rep = repeat_pool_malloc(state); | 1127 | 117M | if (!ctx->u.rep) { | 1128 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1129 | 0 | } | 1130 | 117M | ctx->u.rep->count = -1; | 1131 | 117M | ctx->u.rep->pattern = pattern; | 1132 | 117M | ctx->u.rep->prev = state->repeat; | 1133 | 117M | ctx->u.rep->last_ptr = NULL; | 1134 | 117M | state->repeat = ctx->u.rep; | 1135 | | | 1136 | 117M | state->ptr = ptr; | 1137 | 117M | DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); | 1138 | 117M | state->repeat = ctx->u.rep->prev; | 1139 | 117M | repeat_pool_free(state, ctx->u.rep); | 1140 | | | 1141 | 117M | if (ret) { | 1142 | 117M | RETURN_ON_ERROR(ret); | 1143 | 117M | RETURN_SUCCESS; | 1144 | 117M | } | 1145 | 613 | RETURN_FAILURE; | 1146 | | | 1147 | 208M | TARGET(SRE_OP_MAX_UNTIL): | 1148 | | /* maximizing repeat */ | 1149 | | /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ | 1150 | | | 1151 | | /* FIXME: we probably need to deal with zero-width | 1152 | | matches in here... */ | 1153 | | | 1154 | 208M | ctx->u.rep = state->repeat; | 1155 | 208M | if (!ctx->u.rep) | 1156 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1157 | | | 1158 | 208M | state->ptr = ptr; | 1159 | | | 1160 | 208M | ctx->count = ctx->u.rep->count+1; | 1161 | | | 1162 | 208M | TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, | 1163 | 208M | ptr, ctx->count)); | 1164 | | | 1165 | 208M | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1166 | | /* not enough matches */ | 1167 | 0 | ctx->u.rep->count = ctx->count; | 1168 | 0 | DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, | 1169 | 0 | ctx->u.rep->pattern+3); | 1170 | 0 | if (ret) { | 1171 | 0 | RETURN_ON_ERROR(ret); | 1172 | 0 | RETURN_SUCCESS; | 1173 | 0 | } | 1174 | 0 | ctx->u.rep->count = ctx->count-1; | 1175 | 0 | state->ptr = ptr; | 1176 | 0 | RETURN_FAILURE; | 1177 | 0 | } | 1178 | | | 1179 | 208M | if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || | 1180 | 208M | ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && | 1181 | 208M | state->ptr != ctx->u.rep->last_ptr) { | 1182 | | /* we may have enough matches, but if we can | 1183 | | match another item, do so */ | 1184 | 204M | ctx->u.rep->count = ctx->count; | 1185 | 204M | LASTMARK_SAVE(); | 1186 | 204M | MARK_PUSH(ctx->lastmark); | 1187 | | /* zero-width match protection */ | 1188 | 204M | LAST_PTR_PUSH(); | 1189 | 204M | ctx->u.rep->last_ptr = state->ptr; | 1190 | 204M | DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, | 1191 | 204M | ctx->u.rep->pattern+3); | 1192 | 204M | LAST_PTR_POP(); | 1193 | 204M | if (ret) { | 1194 | 90.1M | MARK_POP_DISCARD(ctx->lastmark); | 1195 | 90.1M | RETURN_ON_ERROR(ret); | 1196 | 90.1M | RETURN_SUCCESS; | 1197 | 90.1M | } | 1198 | 114M | MARK_POP(ctx->lastmark); | 1199 | 114M | LASTMARK_RESTORE(); | 1200 | 114M | ctx->u.rep->count = ctx->count-1; | 1201 | 114M | state->ptr = ptr; | 1202 | 114M | } | 1203 | | | 1204 | | /* cannot match more repeated items here. make sure the | 1205 | | tail matches */ | 1206 | 118M | state->repeat = ctx->u.rep->prev; | 1207 | 118M | DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); | 1208 | 118M | state->repeat = ctx->u.rep; // restore repeat before return | 1209 | | | 1210 | 118M | RETURN_ON_SUCCESS(ret); | 1211 | 80.7k | state->ptr = ptr; | 1212 | 80.7k | RETURN_FAILURE; | 1213 | | | 1214 | 0 | TARGET(SRE_OP_MIN_UNTIL): | 1215 | | /* minimizing repeat */ | 1216 | | /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ | 1217 | |
| 1218 | 0 | ctx->u.rep = state->repeat; | 1219 | 0 | if (!ctx->u.rep) | 1220 | 0 | RETURN_ERROR(SRE_ERROR_STATE); | 1221 | | | 1222 | 0 | state->ptr = ptr; | 1223 | |
| 1224 | 0 | ctx->count = ctx->u.rep->count+1; | 1225 | |
| 1226 | 0 | TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, | 1227 | 0 | ptr, ctx->count, ctx->u.rep->pattern)); | 1228 | |
| 1229 | 0 | if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { | 1230 | | /* not enough matches */ | 1231 | 0 | ctx->u.rep->count = ctx->count; | 1232 | 0 | DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, | 1233 | 0 | ctx->u.rep->pattern+3); | 1234 | 0 | if (ret) { | 1235 | 0 | RETURN_ON_ERROR(ret); | 1236 | 0 | RETURN_SUCCESS; | 1237 | 0 | } | 1238 | 0 | ctx->u.rep->count = ctx->count-1; | 1239 | 0 | state->ptr = ptr; | 1240 | 0 | RETURN_FAILURE; | 1241 | 0 | } | 1242 | | | 1243 | | /* see if the tail matches */ | 1244 | 0 | state->repeat = ctx->u.rep->prev; | 1245 | |
| 1246 | 0 | LASTMARK_SAVE(); | 1247 | 0 | if (state->repeat) | 1248 | 0 | MARK_PUSH(ctx->lastmark); | 1249 | | | 1250 | 0 | DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); | 1251 | 0 | SRE_REPEAT *repeat_of_tail = state->repeat; | 1252 | 0 | state->repeat = ctx->u.rep; // restore repeat before return | 1253 | |
| 1254 | 0 | if (ret) { | 1255 | 0 | if (repeat_of_tail) | 1256 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1257 | 0 | RETURN_ON_ERROR(ret); | 1258 | 0 | RETURN_SUCCESS; | 1259 | 0 | } | 1260 | 0 | if (repeat_of_tail) | 1261 | 0 | MARK_POP(ctx->lastmark); | 1262 | 0 | LASTMARK_RESTORE(); | 1263 | |
| 1264 | 0 | state->ptr = ptr; | 1265 | |
| 1266 | 0 | if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] | 1267 | 0 | && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || | 1268 | 0 | state->ptr == ctx->u.rep->last_ptr) | 1269 | 0 | RETURN_FAILURE; | 1270 | | | 1271 | 0 | ctx->u.rep->count = ctx->count; | 1272 | | /* zero-width match protection */ | 1273 | 0 | LAST_PTR_PUSH(); | 1274 | 0 | ctx->u.rep->last_ptr = state->ptr; | 1275 | 0 | DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, | 1276 | 0 | ctx->u.rep->pattern+3); | 1277 | 0 | LAST_PTR_POP(); | 1278 | 0 | if (ret) { | 1279 | 0 | RETURN_ON_ERROR(ret); | 1280 | 0 | RETURN_SUCCESS; | 1281 | 0 | } | 1282 | 0 | ctx->u.rep->count = ctx->count-1; | 1283 | 0 | state->ptr = ptr; | 1284 | 0 | RETURN_FAILURE; | 1285 | | | 1286 | 0 | TARGET(SRE_OP_POSSESSIVE_REPEAT): | 1287 | | /* create possessive repeat contexts. */ | 1288 | | /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern | 1289 | | <SUCCESS> tail */ | 1290 | 0 | TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, | 1291 | 0 | ptr, pattern[1], pattern[2])); | 1292 | | | 1293 | | /* Set the global Input pointer to this context's Input | 1294 | | pointer */ | 1295 | 0 | state->ptr = ptr; | 1296 | | | 1297 | | /* Set state->repeat to non-NULL */ | 1298 | 0 | ctx->u.rep = repeat_pool_malloc(state); | 1299 | 0 | if (!ctx->u.rep) { | 1300 | 0 | RETURN_ERROR(SRE_ERROR_MEMORY); | 1301 | 0 | } | 1302 | 0 | ctx->u.rep->count = -1; | 1303 | 0 | ctx->u.rep->pattern = NULL; | 1304 | 0 | ctx->u.rep->prev = state->repeat; | 1305 | 0 | ctx->u.rep->last_ptr = NULL; | 1306 | 0 | state->repeat = ctx->u.rep; | 1307 | | | 1308 | | /* Initialize Count to 0 */ | 1309 | 0 | ctx->count = 0; | 1310 | | | 1311 | | /* Check for minimum required matches. */ | 1312 | 0 | while (ctx->count < (Py_ssize_t)pattern[1]) { | 1313 | | /* not enough matches */ | 1314 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, | 1315 | 0 | &pattern[3]); | 1316 | 0 | if (ret) { | 1317 | 0 | RETURN_ON_ERROR(ret); | 1318 | 0 | ctx->count++; | 1319 | 0 | } | 1320 | 0 | else { | 1321 | 0 | state->ptr = ptr; | 1322 | | /* Restore state->repeat */ | 1323 | 0 | state->repeat = ctx->u.rep->prev; | 1324 | 0 | repeat_pool_free(state, ctx->u.rep); | 1325 | 0 | RETURN_FAILURE; | 1326 | 0 | } | 1327 | 0 | } | 1328 | | | 1329 | | /* Clear the context's Input stream pointer so that it | 1330 | | doesn't match the global state so that the while loop can | 1331 | | be entered. */ | 1332 | 0 | ptr = NULL; | 1333 | | | 1334 | | /* Keep trying to parse the <pattern> sub-pattern until the | 1335 | | end is reached, creating a new context each time. */ | 1336 | 0 | while ((ctx->count < (Py_ssize_t)pattern[2] || | 1337 | 0 | (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && | 1338 | 0 | state->ptr != ptr) { | 1339 | | /* Save the Capture Group Marker state into the current | 1340 | | Context and back up the current highest number | 1341 | | Capture Group marker. */ | 1342 | 0 | LASTMARK_SAVE(); | 1343 | 0 | MARK_PUSH(ctx->lastmark); | 1344 | | | 1345 | | /* zero-width match protection */ | 1346 | | /* Set the context's Input Stream pointer to be the | 1347 | | current Input Stream pointer from the global | 1348 | | state. When the loop reaches the next iteration, | 1349 | | the context will then store the last known good | 1350 | | position with the global state holding the Input | 1351 | | Input Stream position that has been updated with | 1352 | | the most recent match. Thus, if state's Input | 1353 | | stream remains the same as the one stored in the | 1354 | | current Context, we know we have successfully | 1355 | | matched an empty string and that all subsequent | 1356 | | matches will also be the empty string until the | 1357 | | maximum number of matches are counted, and because | 1358 | | of this, we could immediately stop at that point and | 1359 | | consider this match successful. */ | 1360 | 0 | ptr = state->ptr; | 1361 | | | 1362 | | /* We have not reached the maximin matches, so try to | 1363 | | match once more. */ | 1364 | 0 | DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, | 1365 | 0 | &pattern[3]); | 1366 | | | 1367 | | /* Check to see if the last attempted match | 1368 | | succeeded. */ | 1369 | 0 | if (ret) { | 1370 | | /* Drop the saved highest number Capture Group | 1371 | | marker saved above and use the newly updated | 1372 | | value. */ | 1373 | 0 | MARK_POP_DISCARD(ctx->lastmark); | 1374 | 0 | RETURN_ON_ERROR(ret); | 1375 | | | 1376 | | /* Success, increment the count. */ | 1377 | 0 | ctx->count++; | 1378 | 0 | } | 1379 | | /* Last attempted match failed. */ | 1380 | 0 | else { | 1381 | | /* Restore the previously saved highest number | 1382 | | Capture Group marker since the last iteration | 1383 | | did not match, then restore that to the global | 1384 | | state. */ | 1385 | 0 | MARK_POP(ctx->lastmark); | 1386 | 0 | LASTMARK_RESTORE(); | 1387 | | | 1388 | | /* Restore the global Input Stream pointer | 1389 | | since it can change after jumps. */ | 1390 | 0 | state->ptr = ptr; | 1391 | | | 1392 | | /* We have sufficient matches, so exit loop. */ | 1393 | 0 | break; | 1394 | 0 | } | 1395 | 0 | } | 1396 | | | 1397 | | /* Restore state->repeat */ | 1398 | 0 | state->repeat = ctx->u.rep->prev; | 1399 | 0 | repeat_pool_free(state, ctx->u.rep); | 1400 | | | 1401 | | /* Evaluate Tail */ | 1402 | | /* Jump to end of pattern indicated by skip, and then skip | 1403 | | the SUCCESS op code that follows it. */ | 1404 | 0 | pattern += pattern[0] + 1; | 1405 | 0 | ptr = state->ptr; | 1406 | 0 | DISPATCH; | 1407 | | | 1408 | 0 | TARGET(SRE_OP_ATOMIC_GROUP): | 1409 | | /* Atomic Group Sub Pattern */ | 1410 | | /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ | 1411 | 0 | TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); | 1412 | | | 1413 | | /* Set the global Input pointer to this context's Input | 1414 | | pointer */ | 1415 | 0 | state->ptr = ptr; | 1416 | | | 1417 | | /* Evaluate the Atomic Group in a new context, terminating | 1418 | | when the end of the group, represented by a SUCCESS op | 1419 | | code, is reached. */ | 1420 | | /* Group Pattern begins at an offset of 1 code. */ | 1421 | 0 | DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, | 1422 | 0 | &pattern[1]); | 1423 | | | 1424 | | /* Test Exit Condition */ | 1425 | 0 | RETURN_ON_ERROR(ret); | 1426 | | | 1427 | 0 | if (ret == 0) { | 1428 | | /* Atomic Group failed to Match. */ | 1429 | 0 | state->ptr = ptr; | 1430 | 0 | RETURN_FAILURE; | 1431 | 0 | } | 1432 | | | 1433 | | /* Evaluate Tail */ | 1434 | | /* Jump to end of pattern indicated by skip, and then skip | 1435 | | the SUCCESS op code that follows it. */ | 1436 | 0 | pattern += pattern[0]; | 1437 | 0 | ptr = state->ptr; | 1438 | 0 | DISPATCH; | 1439 | | | 1440 | 0 | TARGET(SRE_OP_GROUPREF): | 1441 | | /* match backreference */ | 1442 | 0 | TRACE(("|%p|%p|GROUPREF %d\n", pattern, | 1443 | 0 | ptr, pattern[0])); | 1444 | 0 | { | 1445 | 0 | int groupref = pattern[0] * 2; | 1446 | 0 | if (groupref >= state->lastmark) { | 1447 | 0 | RETURN_FAILURE; | 1448 | 0 | } else { | 1449 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1450 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1451 | 0 | if (!p || !e || e < p) | 1452 | 0 | RETURN_FAILURE; | 1453 | 0 | while (p < e) { | 1454 | 0 | if (ptr >= end || *ptr != *p) | 1455 | 0 | RETURN_FAILURE; | 1456 | 0 | p++; | 1457 | 0 | ptr++; | 1458 | 0 | } | 1459 | 0 | } | 1460 | 0 | } | 1461 | 0 | pattern++; | 1462 | 0 | DISPATCH; | 1463 | | | 1464 | 0 | TARGET(SRE_OP_GROUPREF_IGNORE): | 1465 | | /* match backreference */ | 1466 | 0 | TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, | 1467 | 0 | ptr, pattern[0])); | 1468 | 0 | { | 1469 | 0 | int groupref = pattern[0] * 2; | 1470 | 0 | if (groupref >= state->lastmark) { | 1471 | 0 | RETURN_FAILURE; | 1472 | 0 | } else { | 1473 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1474 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1475 | 0 | if (!p || !e || e < p) | 1476 | 0 | RETURN_FAILURE; | 1477 | 0 | while (p < e) { | 1478 | 0 | if (ptr >= end || | 1479 | 0 | sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) | 1480 | 0 | RETURN_FAILURE; | 1481 | 0 | p++; | 1482 | 0 | ptr++; | 1483 | 0 | } | 1484 | 0 | } | 1485 | 0 | } | 1486 | 0 | pattern++; | 1487 | 0 | DISPATCH; | 1488 | | | 1489 | 0 | TARGET(SRE_OP_GROUPREF_UNI_IGNORE): | 1490 | | /* match backreference */ | 1491 | 0 | TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, | 1492 | 0 | ptr, pattern[0])); | 1493 | 0 | { | 1494 | 0 | int groupref = pattern[0] * 2; | 1495 | 0 | if (groupref >= state->lastmark) { | 1496 | 0 | RETURN_FAILURE; | 1497 | 0 | } else { | 1498 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1499 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1500 | 0 | if (!p || !e || e < p) | 1501 | 0 | RETURN_FAILURE; | 1502 | 0 | while (p < e) { | 1503 | 0 | if (ptr >= end || | 1504 | 0 | sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) | 1505 | 0 | RETURN_FAILURE; | 1506 | 0 | p++; | 1507 | 0 | ptr++; | 1508 | 0 | } | 1509 | 0 | } | 1510 | 0 | } | 1511 | 0 | pattern++; | 1512 | 0 | DISPATCH; | 1513 | | | 1514 | 0 | TARGET(SRE_OP_GROUPREF_LOC_IGNORE): | 1515 | | /* match backreference */ | 1516 | 0 | TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, | 1517 | 0 | ptr, pattern[0])); | 1518 | 0 | { | 1519 | 0 | int groupref = pattern[0] * 2; | 1520 | 0 | if (groupref >= state->lastmark) { | 1521 | 0 | RETURN_FAILURE; | 1522 | 0 | } else { | 1523 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1524 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1525 | 0 | if (!p || !e || e < p) | 1526 | 0 | RETURN_FAILURE; | 1527 | 0 | while (p < e) { | 1528 | 0 | if (ptr >= end || | 1529 | 0 | sre_lower_locale(*ptr) != sre_lower_locale(*p)) | 1530 | 0 | RETURN_FAILURE; | 1531 | 0 | p++; | 1532 | 0 | ptr++; | 1533 | 0 | } | 1534 | 0 | } | 1535 | 0 | } | 1536 | 0 | pattern++; | 1537 | 0 | DISPATCH; | 1538 | | | 1539 | 0 | TARGET(SRE_OP_GROUPREF_EXISTS): | 1540 | 0 | TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, | 1541 | 0 | ptr, pattern[0])); | 1542 | | /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ | 1543 | 0 | { | 1544 | 0 | int groupref = pattern[0] * 2; | 1545 | 0 | if (groupref >= state->lastmark) { | 1546 | 0 | pattern += pattern[1]; | 1547 | 0 | DISPATCH; | 1548 | 0 | } else { | 1549 | 0 | SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; | 1550 | 0 | SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; | 1551 | 0 | if (!p || !e || e < p) { | 1552 | 0 | pattern += pattern[1]; | 1553 | 0 | DISPATCH; | 1554 | 0 | } | 1555 | 0 | } | 1556 | 0 | } | 1557 | 0 | pattern += 2; | 1558 | 0 | DISPATCH; | 1559 | | | 1560 | 79.5M | TARGET(SRE_OP_ASSERT): | 1561 | | /* assert subpattern */ | 1562 | | /* <ASSERT> <skip> <back> <pattern> */ | 1563 | 79.5M | TRACE(("|%p|%p|ASSERT %d\n", pattern, | 1564 | 79.5M | ptr, pattern[1])); | 1565 | 79.5M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) | 1566 | 0 | RETURN_FAILURE; | 1567 | 79.5M | state->ptr = ptr - pattern[1]; | 1568 | 79.5M | DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); | 1569 | 79.5M | RETURN_ON_FAILURE(ret); | 1570 | 79.0M | pattern += pattern[0]; | 1571 | 79.0M | DISPATCH; | 1572 | | | 1573 | 79.0M | TARGET(SRE_OP_ASSERT_NOT): | 1574 | | /* assert not subpattern */ | 1575 | | /* <ASSERT_NOT> <skip> <back> <pattern> */ | 1576 | 18.1M | TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, | 1577 | 18.1M | ptr, pattern[1])); | 1578 | 18.1M | if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { | 1579 | 18.1M | state->ptr = ptr - pattern[1]; | 1580 | 18.1M | LASTMARK_SAVE(); | 1581 | 18.1M | if (state->repeat) | 1582 | 18.1M | MARK_PUSH(ctx->lastmark); | 1583 | | | 1584 | 36.3M | DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); | 1585 | 36.3M | if (ret) { | 1586 | 5.77k | if (state->repeat) | 1587 | 5.77k | MARK_POP_DISCARD(ctx->lastmark); | 1588 | 5.77k | RETURN_ON_ERROR(ret); | 1589 | 5.77k | RETURN_FAILURE; | 1590 | 5.77k | } | 1591 | 18.1M | if (state->repeat) | 1592 | 18.1M | MARK_POP(ctx->lastmark); | 1593 | 18.1M | LASTMARK_RESTORE(); | 1594 | 18.1M | } | 1595 | 18.1M | pattern += pattern[0]; | 1596 | 18.1M | DISPATCH; | 1597 | | | 1598 | 18.1M | TARGET(SRE_OP_FAILURE): | 1599 | | /* immediate failure */ | 1600 | 0 | TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); | 1601 | 0 | RETURN_FAILURE; | 1602 | | | 1603 | | #if !USE_COMPUTED_GOTOS | 1604 | | default: | 1605 | | #endif | 1606 | | // Also any unused opcodes: | 1607 | 0 | TARGET(SRE_OP_RANGE_UNI_IGNORE): | 1608 | 0 | TARGET(SRE_OP_SUBPATTERN): | 1609 | 0 | TARGET(SRE_OP_RANGE): | 1610 | 0 | TARGET(SRE_OP_NEGATE): | 1611 | 0 | TARGET(SRE_OP_BIGCHARSET): | 1612 | 0 | TARGET(SRE_OP_CHARSET): | 1613 | 0 | TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, | 1614 | 0 | pattern[-1])); | 1615 | 0 | RETURN_ERROR(SRE_ERROR_ILLEGAL); | 1616 | |
| 1617 | 0 | } | 1618 | | | 1619 | 846M | exit: | 1620 | 846M | ctx_pos = ctx->last_ctx_pos; | 1621 | 846M | jump = ctx->jump; | 1622 | 846M | DATA_POP_DISCARD(ctx); | 1623 | 846M | if (ctx_pos == -1) { | 1624 | 125M | state->sigcount = sigcount; | 1625 | 125M | return ret; | 1626 | 125M | } | 1627 | 720M | DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); | 1628 | | | 1629 | 720M | switch (jump) { | 1630 | 204M | case JUMP_MAX_UNTIL_2: | 1631 | 204M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); | 1632 | 204M | goto jump_max_until_2; | 1633 | 118M | case JUMP_MAX_UNTIL_3: | 1634 | 118M | TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); | 1635 | 118M | goto jump_max_until_3; | 1636 | 0 | case JUMP_MIN_UNTIL_2: | 1637 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); | 1638 | 0 | goto jump_min_until_2; | 1639 | 0 | case JUMP_MIN_UNTIL_3: | 1640 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); | 1641 | 0 | goto jump_min_until_3; | 1642 | 52.4M | case JUMP_BRANCH: | 1643 | 52.4M | TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); | 1644 | 52.4M | goto jump_branch; | 1645 | 0 | case JUMP_MAX_UNTIL_1: | 1646 | 0 | TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); | 1647 | 0 | goto jump_max_until_1; | 1648 | 0 | case JUMP_MIN_UNTIL_1: | 1649 | 0 | TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); | 1650 | 0 | goto jump_min_until_1; | 1651 | 0 | case JUMP_POSS_REPEAT_1: | 1652 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); | 1653 | 0 | goto jump_poss_repeat_1; | 1654 | 0 | case JUMP_POSS_REPEAT_2: | 1655 | 0 | TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); | 1656 | 0 | goto jump_poss_repeat_2; | 1657 | 117M | case JUMP_REPEAT: | 1658 | 117M | TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); | 1659 | 117M | goto jump_repeat; | 1660 | 3.32M | case JUMP_REPEAT_ONE_1: | 1661 | 3.32M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); | 1662 | 3.32M | goto jump_repeat_one_1; | 1663 | 126M | case JUMP_REPEAT_ONE_2: | 1664 | 126M | TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); | 1665 | 126M | goto jump_repeat_one_2; | 1666 | 0 | case JUMP_MIN_REPEAT_ONE: | 1667 | 0 | TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); | 1668 | 0 | goto jump_min_repeat_one; | 1669 | 0 | case JUMP_ATOMIC_GROUP: | 1670 | 0 | TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); | 1671 | 0 | goto jump_atomic_group; | 1672 | 79.5M | case JUMP_ASSERT: | 1673 | 79.5M | TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); | 1674 | 79.5M | goto jump_assert; | 1675 | 18.1M | case JUMP_ASSERT_NOT: | 1676 | 18.1M | TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); | 1677 | 18.1M | goto jump_assert_not; | 1678 | 0 | case JUMP_NONE: | 1679 | 0 | TRACE(("|%p|%p|RETURN %zd\n", pattern, | 1680 | 0 | ptr, ret)); | 1681 | 0 | break; | 1682 | 720M | } | 1683 | | | 1684 | 0 | return ret; /* should never get here */ | 1685 | 720M | } |
|
1686 | | |
1687 | | /* need to reset capturing groups between two SRE(match) callings in loops */ |
1688 | | #define RESET_CAPTURE_GROUP() \ |
1689 | 473M | do { state->lastmark = state->lastindex = -1; } while (0) |
1690 | | |
1691 | | LOCAL(Py_ssize_t) |
1692 | | SRE(search)(SRE_STATE* state, SRE_CODE* pattern) |
1693 | 85.6M | { |
1694 | 85.6M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; |
1695 | 85.6M | SRE_CHAR* end = (SRE_CHAR *)state->end; |
1696 | 85.6M | Py_ssize_t status = 0; |
1697 | 85.6M | Py_ssize_t prefix_len = 0; |
1698 | 85.6M | Py_ssize_t prefix_skip = 0; |
1699 | 85.6M | SRE_CODE* prefix = NULL; |
1700 | 85.6M | SRE_CODE* charset = NULL; |
1701 | 85.6M | SRE_CODE* overlap = NULL; |
1702 | 85.6M | int flags = 0; |
1703 | 85.6M | INIT_TRACE(state); |
1704 | | |
1705 | 85.6M | if (ptr > end) |
1706 | 0 | return 0; |
1707 | | |
1708 | 85.6M | if (pattern[0] == SRE_OP_INFO) { |
1709 | | /* optimization info block */ |
1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ |
1711 | | |
1712 | 85.6M | flags = pattern[2]; |
1713 | | |
1714 | 85.6M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { |
1715 | 1.30M | TRACE(("reject (got %tu chars, need %zu)\n", |
1716 | 1.30M | end - ptr, (size_t) pattern[3])); |
1717 | 1.30M | return 0; |
1718 | 1.30M | } |
1719 | 84.3M | if (pattern[3] > 1) { |
1720 | | /* adjust end point (but make sure we leave at least one |
1721 | | character in there, so literal search will work) */ |
1722 | 4.90M | end -= pattern[3] - 1; |
1723 | 4.90M | if (end <= ptr) |
1724 | 0 | end = ptr; |
1725 | 4.90M | } |
1726 | | |
1727 | 84.3M | if (flags & SRE_INFO_PREFIX) { |
1728 | | /* pattern starts with a known prefix */ |
1729 | | /* <length> <skip> <prefix data> <overlap data> */ |
1730 | 4.90M | prefix_len = pattern[5]; |
1731 | 4.90M | prefix_skip = pattern[6]; |
1732 | 4.90M | prefix = pattern + 7; |
1733 | 4.90M | overlap = prefix + prefix_len - 1; |
1734 | 79.4M | } else if (flags & SRE_INFO_CHARSET) |
1735 | | /* pattern starts with a character from a known set */ |
1736 | | /* <charset> */ |
1737 | 75.4M | charset = pattern + 5; |
1738 | | |
1739 | 84.3M | pattern += 1 + pattern[1]; |
1740 | 84.3M | } |
1741 | | |
1742 | 84.3M | TRACE(("prefix = %p %zd %zd\n", |
1743 | 84.3M | prefix, prefix_len, prefix_skip)); |
1744 | 84.3M | TRACE(("charset = %p\n", charset)); |
1745 | | |
1746 | 84.3M | if (prefix_len == 1) { |
1747 | | /* pattern starts with a literal character */ |
1748 | 4.40M | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1749 | | #if SIZEOF_SRE_CHAR < 4 |
1750 | 2.47M | if ((SRE_CODE) c != prefix[0]) |
1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1752 | 2.47M | #endif |
1753 | 2.47M | end = (SRE_CHAR *)state->end; |
1754 | 2.47M | state->must_advance = 0; |
1755 | 5.27M | while (ptr < end) { |
1756 | 105M | while (*ptr != c) { |
1757 | 100M | if (++ptr >= end) |
1758 | 390k | return 0; |
1759 | 100M | } |
1760 | 4.88M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); |
1761 | 4.88M | state->start = ptr; |
1762 | 4.88M | state->ptr = ptr + prefix_skip; |
1763 | 4.88M | if (flags & SRE_INFO_LITERAL) |
1764 | 2.06k | return 1; /* we got all of it */ |
1765 | 4.88M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1766 | 4.88M | if (status != 0) |
1767 | 4.00M | return status; |
1768 | 872k | ++ptr; |
1769 | 872k | RESET_CAPTURE_GROUP(); |
1770 | 872k | } |
1771 | 6.13k | return 0; |
1772 | 2.47M | } |
1773 | | |
1774 | 79.9M | if (prefix_len > 1) { |
1775 | | /* pattern starts with a known prefix. use the overlap |
1776 | | table to skip forward as fast as we possibly can */ |
1777 | 496k | Py_ssize_t i = 0; |
1778 | | |
1779 | 496k | end = (SRE_CHAR *)state->end; |
1780 | 496k | if (prefix_len > end - ptr) |
1781 | 0 | return 0; |
1782 | | #if SIZEOF_SRE_CHAR < 4 |
1783 | 1.28M | for (i = 0; i < prefix_len; i++) |
1784 | 859k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) |
1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ |
1786 | 429k | #endif |
1787 | 1.19M | while (ptr < end) { |
1788 | 1.19M | SRE_CHAR c = (SRE_CHAR) prefix[0]; |
1789 | 8.58M | while (*ptr++ != c) { |
1790 | 7.38M | if (ptr >= end) |
1791 | 315 | return 0; |
1792 | 7.38M | } |
1793 | 1.19M | if (ptr >= end) |
1794 | 30 | return 0; |
1795 | | |
1796 | 1.19M | i = 1; |
1797 | 1.19M | state->must_advance = 0; |
1798 | 1.21M | do { |
1799 | 1.21M | if (*ptr == (SRE_CHAR) prefix[i]) { |
1800 | 1.01M | if (++i != prefix_len) { |
1801 | 0 | if (++ptr >= end) |
1802 | 0 | return 0; |
1803 | 0 | continue; |
1804 | 0 | } |
1805 | | /* found a potential match */ |
1806 | 1.01M | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); |
1807 | 1.01M | state->start = ptr - (prefix_len - 1); |
1808 | 1.01M | state->ptr = ptr - (prefix_len - prefix_skip - 1); |
1809 | 1.01M | if (flags & SRE_INFO_LITERAL) |
1810 | 0 | return 1; /* we got all of it */ |
1811 | 1.01M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); |
1812 | 1.01M | if (status != 0) |
1813 | 495k | return status; |
1814 | | /* close but no cigar -- try again */ |
1815 | 519k | if (++ptr >= end) |
1816 | 19 | return 0; |
1817 | 519k | RESET_CAPTURE_GROUP(); |
1818 | 519k | } |
1819 | 723k | i = overlap[i]; |
1820 | 723k | } while (i != 0); |
1821 | 1.19M | } |
1822 | 0 | return 0; |
1823 | 496k | } |
1824 | | |
1825 | 79.4M | if (charset) { |
1826 | | /* pattern starts with a character from a known set */ |
1827 | 75.4M | end = (SRE_CHAR *)state->end; |
1828 | 75.4M | state->must_advance = 0; |
1829 | 78.1M | for (;;) { |
1830 | 357M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) |
1831 | 279M | ptr++; |
1832 | 78.1M | if (ptr >= end) |
1833 | 3.65M | return 0; |
1834 | 74.5M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); |
1835 | 74.5M | state->start = ptr; |
1836 | 74.5M | state->ptr = ptr; |
1837 | 74.5M | status = SRE(match)(state, pattern, 0); |
1838 | 74.5M | if (status != 0) |
1839 | 71.7M | break; |
1840 | 2.72M | ptr++; |
1841 | 2.72M | RESET_CAPTURE_GROUP(); |
1842 | 2.72M | } |
1843 | 75.4M | } else { |
1844 | | /* general case */ |
1845 | 3.96M | assert(ptr <= end); |
1846 | 3.96M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1847 | 3.96M | state->start = state->ptr = ptr; |
1848 | 3.96M | status = SRE(match)(state, pattern, 1); |
1849 | 3.96M | state->must_advance = 0; |
1850 | 3.96M | if (status == 0 && pattern[0] == SRE_OP_AT && |
1851 | 3.96M | (pattern[1] == SRE_AT_BEGINNING || |
1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) |
1853 | 0 | { |
1854 | 0 | state->start = state->ptr = ptr = end; |
1855 | 0 | return 0; |
1856 | 0 | } |
1857 | 472M | while (status == 0 && ptr < end) { |
1858 | 469M | ptr++; |
1859 | 469M | RESET_CAPTURE_GROUP(); |
1860 | 469M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); |
1861 | 469M | state->start = state->ptr = ptr; |
1862 | 469M | status = SRE(match)(state, pattern, 0); |
1863 | 469M | } |
1864 | 3.96M | } |
1865 | | |
1866 | 75.7M | return status; |
1867 | 79.4M | } Line | Count | Source | 1693 | 32.7M | { | 1694 | 32.7M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 32.7M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 32.7M | Py_ssize_t status = 0; | 1697 | 32.7M | Py_ssize_t prefix_len = 0; | 1698 | 32.7M | Py_ssize_t prefix_skip = 0; | 1699 | 32.7M | SRE_CODE* prefix = NULL; | 1700 | 32.7M | SRE_CODE* charset = NULL; | 1701 | 32.7M | SRE_CODE* overlap = NULL; | 1702 | 32.7M | int flags = 0; | 1703 | 32.7M | INIT_TRACE(state); | 1704 | | | 1705 | 32.7M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 32.7M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 32.7M | flags = pattern[2]; | 1713 | | | 1714 | 32.7M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 1.19M | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 1.19M | end - ptr, (size_t) pattern[3])); | 1717 | 1.19M | return 0; | 1718 | 1.19M | } | 1719 | 31.5M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 1.07M | end -= pattern[3] - 1; | 1723 | 1.07M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 1.07M | } | 1726 | | | 1727 | 31.5M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 1.07M | prefix_len = pattern[5]; | 1731 | 1.07M | prefix_skip = pattern[6]; | 1732 | 1.07M | prefix = pattern + 7; | 1733 | 1.07M | overlap = prefix + prefix_len - 1; | 1734 | 30.4M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 27.6M | charset = pattern + 5; | 1738 | | | 1739 | 31.5M | pattern += 1 + pattern[1]; | 1740 | 31.5M | } | 1741 | | | 1742 | 31.5M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 31.5M | prefix, prefix_len, prefix_skip)); | 1744 | 31.5M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 31.5M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 1.07M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | 1.07M | #if SIZEOF_SRE_CHAR < 4 | 1750 | 1.07M | if ((SRE_CODE) c != prefix[0]) | 1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | 1.07M | #endif | 1753 | 1.07M | end = (SRE_CHAR *)state->end; | 1754 | 1.07M | state->must_advance = 0; | 1755 | 1.25M | while (ptr < end) { | 1756 | 20.3M | while (*ptr != c) { | 1757 | 19.3M | if (++ptr >= end) | 1758 | 279k | return 0; | 1759 | 19.3M | } | 1760 | 968k | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 968k | state->start = ptr; | 1762 | 968k | state->ptr = ptr + prefix_skip; | 1763 | 968k | if (flags & SRE_INFO_LITERAL) | 1764 | 296 | return 1; /* we got all of it */ | 1765 | 968k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 968k | if (status != 0) | 1767 | 787k | return status; | 1768 | 180k | ++ptr; | 1769 | 180k | RESET_CAPTURE_GROUP(); | 1770 | 180k | } | 1771 | 3.67k | return 0; | 1772 | 1.07M | } | 1773 | | | 1774 | 30.4M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 6.87k | Py_ssize_t i = 0; | 1778 | | | 1779 | 6.87k | end = (SRE_CHAR *)state->end; | 1780 | 6.87k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | 6.87k | #if SIZEOF_SRE_CHAR < 4 | 1783 | 20.6k | for (i = 0; i < prefix_len; i++) | 1784 | 13.7k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | 6.87k | #endif | 1787 | 392k | while (ptr < end) { | 1788 | 392k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 2.80M | while (*ptr++ != c) { | 1790 | 2.41M | if (ptr >= end) | 1791 | 62 | return 0; | 1792 | 2.41M | } | 1793 | 392k | if (ptr >= end) | 1794 | 12 | return 0; | 1795 | | | 1796 | 392k | i = 1; | 1797 | 392k | state->must_advance = 0; | 1798 | 393k | do { | 1799 | 393k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 296k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 296k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 296k | state->start = ptr - (prefix_len - 1); | 1808 | 296k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 296k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 296k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 296k | if (status != 0) | 1813 | 6.79k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 289k | if (++ptr >= end) | 1816 | 10 | return 0; | 1817 | 289k | RESET_CAPTURE_GROUP(); | 1818 | 289k | } | 1819 | 386k | i = overlap[i]; | 1820 | 386k | } while (i != 0); | 1821 | 392k | } | 1822 | 0 | return 0; | 1823 | 6.87k | } | 1824 | | | 1825 | 30.4M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 27.6M | end = (SRE_CHAR *)state->end; | 1828 | 27.6M | state->must_advance = 0; | 1829 | 29.5M | for (;;) { | 1830 | 78.1M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 48.6M | ptr++; | 1832 | 29.5M | if (ptr >= end) | 1833 | 2.55M | return 0; | 1834 | 26.9M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 26.9M | state->start = ptr; | 1836 | 26.9M | state->ptr = ptr; | 1837 | 26.9M | status = SRE(match)(state, pattern, 0); | 1838 | 26.9M | if (status != 0) | 1839 | 25.1M | break; | 1840 | 1.82M | ptr++; | 1841 | 1.82M | RESET_CAPTURE_GROUP(); | 1842 | 1.82M | } | 1843 | 27.6M | } else { | 1844 | | /* general case */ | 1845 | 2.77M | assert(ptr <= end); | 1846 | 2.77M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 2.77M | state->start = state->ptr = ptr; | 1848 | 2.77M | status = SRE(match)(state, pattern, 1); | 1849 | 2.77M | state->must_advance = 0; | 1850 | 2.77M | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 2.77M | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 154M | while (status == 0 && ptr < end) { | 1858 | 151M | ptr++; | 1859 | 151M | RESET_CAPTURE_GROUP(); | 1860 | 151M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 151M | state->start = state->ptr = ptr; | 1862 | 151M | status = SRE(match)(state, pattern, 0); | 1863 | 151M | } | 1864 | 2.77M | } | 1865 | | | 1866 | 27.9M | return status; | 1867 | 30.4M | } |
Line | Count | Source | 1693 | 45.4M | { | 1694 | 45.4M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 45.4M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 45.4M | Py_ssize_t status = 0; | 1697 | 45.4M | Py_ssize_t prefix_len = 0; | 1698 | 45.4M | Py_ssize_t prefix_skip = 0; | 1699 | 45.4M | SRE_CODE* prefix = NULL; | 1700 | 45.4M | SRE_CODE* charset = NULL; | 1701 | 45.4M | SRE_CODE* overlap = NULL; | 1702 | 45.4M | int flags = 0; | 1703 | 45.4M | INIT_TRACE(state); | 1704 | | | 1705 | 45.4M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 45.4M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 45.4M | flags = pattern[2]; | 1713 | | | 1714 | 45.4M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 104k | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 104k | end - ptr, (size_t) pattern[3])); | 1717 | 104k | return 0; | 1718 | 104k | } | 1719 | 45.3M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 1.82M | end -= pattern[3] - 1; | 1723 | 1.82M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 1.82M | } | 1726 | | | 1727 | 45.3M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 1.82M | prefix_len = pattern[5]; | 1731 | 1.82M | prefix_skip = pattern[6]; | 1732 | 1.82M | prefix = pattern + 7; | 1733 | 1.82M | overlap = prefix + prefix_len - 1; | 1734 | 43.5M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 42.6M | charset = pattern + 5; | 1738 | | | 1739 | 45.3M | pattern += 1 + pattern[1]; | 1740 | 45.3M | } | 1741 | | | 1742 | 45.3M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 45.3M | prefix, prefix_len, prefix_skip)); | 1744 | 45.3M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 45.3M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 1.40M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | 1.40M | #if SIZEOF_SRE_CHAR < 4 | 1750 | 1.40M | if ((SRE_CODE) c != prefix[0]) | 1751 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | 1.40M | #endif | 1753 | 1.40M | end = (SRE_CHAR *)state->end; | 1754 | 1.40M | state->must_advance = 0; | 1755 | 1.75M | while (ptr < end) { | 1756 | 55.9M | while (*ptr != c) { | 1757 | 54.2M | if (++ptr >= end) | 1758 | 102k | return 0; | 1759 | 54.2M | } | 1760 | 1.64M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 1.64M | state->start = ptr; | 1762 | 1.64M | state->ptr = ptr + prefix_skip; | 1763 | 1.64M | if (flags & SRE_INFO_LITERAL) | 1764 | 557 | return 1; /* we got all of it */ | 1765 | 1.64M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 1.64M | if (status != 0) | 1767 | 1.29M | return status; | 1768 | 349k | ++ptr; | 1769 | 349k | RESET_CAPTURE_GROUP(); | 1770 | 349k | } | 1771 | 2.02k | return 0; | 1772 | 1.40M | } | 1773 | | | 1774 | 43.9M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 422k | Py_ssize_t i = 0; | 1778 | | | 1779 | 422k | end = (SRE_CHAR *)state->end; | 1780 | 422k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | 422k | #if SIZEOF_SRE_CHAR < 4 | 1783 | 1.26M | for (i = 0; i < prefix_len; i++) | 1784 | 845k | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | 0 | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | 422k | #endif | 1787 | 659k | while (ptr < end) { | 1788 | 659k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 4.25M | while (*ptr++ != c) { | 1790 | 3.59M | if (ptr >= end) | 1791 | 132 | return 0; | 1792 | 3.59M | } | 1793 | 659k | if (ptr >= end) | 1794 | 6 | return 0; | 1795 | | | 1796 | 659k | i = 1; | 1797 | 659k | state->must_advance = 0; | 1798 | 659k | do { | 1799 | 659k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 587k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 587k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 587k | state->start = ptr - (prefix_len - 1); | 1808 | 587k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 587k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 587k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 587k | if (status != 0) | 1813 | 422k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 165k | if (++ptr >= end) | 1816 | 4 | return 0; | 1817 | 165k | RESET_CAPTURE_GROUP(); | 1818 | 165k | } | 1819 | 237k | i = overlap[i]; | 1820 | 237k | } while (i != 0); | 1821 | 659k | } | 1822 | 0 | return 0; | 1823 | 422k | } | 1824 | | | 1825 | 43.5M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 42.6M | end = (SRE_CHAR *)state->end; | 1828 | 42.6M | state->must_advance = 0; | 1829 | 43.0M | for (;;) { | 1830 | 198M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 155M | ptr++; | 1832 | 43.0M | if (ptr >= end) | 1833 | 1.03M | return 0; | 1834 | 42.0M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 42.0M | state->start = ptr; | 1836 | 42.0M | state->ptr = ptr; | 1837 | 42.0M | status = SRE(match)(state, pattern, 0); | 1838 | 42.0M | if (status != 0) | 1839 | 41.5M | break; | 1840 | 468k | ptr++; | 1841 | 468k | RESET_CAPTURE_GROUP(); | 1842 | 468k | } | 1843 | 42.6M | } else { | 1844 | | /* general case */ | 1845 | 911k | assert(ptr <= end); | 1846 | 911k | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 911k | state->start = state->ptr = ptr; | 1848 | 911k | status = SRE(match)(state, pattern, 1); | 1849 | 911k | state->must_advance = 0; | 1850 | 911k | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 911k | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 241M | while (status == 0 && ptr < end) { | 1858 | 240M | ptr++; | 1859 | 240M | RESET_CAPTURE_GROUP(); | 1860 | 240M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 240M | state->start = state->ptr = ptr; | 1862 | 240M | status = SRE(match)(state, pattern, 0); | 1863 | 240M | } | 1864 | 911k | } | 1865 | | | 1866 | 42.4M | return status; | 1867 | 43.5M | } |
Line | Count | Source | 1693 | 7.44M | { | 1694 | 7.44M | SRE_CHAR* ptr = (SRE_CHAR *)state->start; | 1695 | 7.44M | SRE_CHAR* end = (SRE_CHAR *)state->end; | 1696 | 7.44M | Py_ssize_t status = 0; | 1697 | 7.44M | Py_ssize_t prefix_len = 0; | 1698 | 7.44M | Py_ssize_t prefix_skip = 0; | 1699 | 7.44M | SRE_CODE* prefix = NULL; | 1700 | 7.44M | SRE_CODE* charset = NULL; | 1701 | 7.44M | SRE_CODE* overlap = NULL; | 1702 | 7.44M | int flags = 0; | 1703 | 7.44M | INIT_TRACE(state); | 1704 | | | 1705 | 7.44M | if (ptr > end) | 1706 | 0 | return 0; | 1707 | | | 1708 | 7.44M | if (pattern[0] == SRE_OP_INFO) { | 1709 | | /* optimization info block */ | 1710 | | /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ | 1711 | | | 1712 | 7.44M | flags = pattern[2]; | 1713 | | | 1714 | 7.44M | if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { | 1715 | 7.36k | TRACE(("reject (got %tu chars, need %zu)\n", | 1716 | 7.36k | end - ptr, (size_t) pattern[3])); | 1717 | 7.36k | return 0; | 1718 | 7.36k | } | 1719 | 7.43M | if (pattern[3] > 1) { | 1720 | | /* adjust end point (but make sure we leave at least one | 1721 | | character in there, so literal search will work) */ | 1722 | 1.99M | end -= pattern[3] - 1; | 1723 | 1.99M | if (end <= ptr) | 1724 | 0 | end = ptr; | 1725 | 1.99M | } | 1726 | | | 1727 | 7.43M | if (flags & SRE_INFO_PREFIX) { | 1728 | | /* pattern starts with a known prefix */ | 1729 | | /* <length> <skip> <prefix data> <overlap data> */ | 1730 | 2.00M | prefix_len = pattern[5]; | 1731 | 2.00M | prefix_skip = pattern[6]; | 1732 | 2.00M | prefix = pattern + 7; | 1733 | 2.00M | overlap = prefix + prefix_len - 1; | 1734 | 5.43M | } else if (flags & SRE_INFO_CHARSET) | 1735 | | /* pattern starts with a character from a known set */ | 1736 | | /* <charset> */ | 1737 | 5.16M | charset = pattern + 5; | 1738 | | | 1739 | 7.43M | pattern += 1 + pattern[1]; | 1740 | 7.43M | } | 1741 | | | 1742 | 7.43M | TRACE(("prefix = %p %zd %zd\n", | 1743 | 7.43M | prefix, prefix_len, prefix_skip)); | 1744 | 7.43M | TRACE(("charset = %p\n", charset)); | 1745 | | | 1746 | 7.43M | if (prefix_len == 1) { | 1747 | | /* pattern starts with a literal character */ | 1748 | 1.93M | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1749 | | #if SIZEOF_SRE_CHAR < 4 | 1750 | | if ((SRE_CODE) c != prefix[0]) | 1751 | | return 0; /* literal can't match: doesn't fit in char width */ | 1752 | | #endif | 1753 | 1.93M | end = (SRE_CHAR *)state->end; | 1754 | 1.93M | state->must_advance = 0; | 1755 | 2.27M | while (ptr < end) { | 1756 | 28.7M | while (*ptr != c) { | 1757 | 26.5M | if (++ptr >= end) | 1758 | 7.90k | return 0; | 1759 | 26.5M | } | 1760 | 2.26M | TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); | 1761 | 2.26M | state->start = ptr; | 1762 | 2.26M | state->ptr = ptr + prefix_skip; | 1763 | 2.26M | if (flags & SRE_INFO_LITERAL) | 1764 | 1.20k | return 1; /* we got all of it */ | 1765 | 2.26M | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1766 | 2.26M | if (status != 0) | 1767 | 1.92M | return status; | 1768 | 342k | ++ptr; | 1769 | 342k | RESET_CAPTURE_GROUP(); | 1770 | 342k | } | 1771 | 429 | return 0; | 1772 | 1.93M | } | 1773 | | | 1774 | 5.50M | if (prefix_len > 1) { | 1775 | | /* pattern starts with a known prefix. use the overlap | 1776 | | table to skip forward as fast as we possibly can */ | 1777 | 66.3k | Py_ssize_t i = 0; | 1778 | | | 1779 | 66.3k | end = (SRE_CHAR *)state->end; | 1780 | 66.3k | if (prefix_len > end - ptr) | 1781 | 0 | return 0; | 1782 | | #if SIZEOF_SRE_CHAR < 4 | 1783 | | for (i = 0; i < prefix_len; i++) | 1784 | | if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) | 1785 | | return 0; /* literal can't match: doesn't fit in char width */ | 1786 | | #endif | 1787 | 147k | while (ptr < end) { | 1788 | 147k | SRE_CHAR c = (SRE_CHAR) prefix[0]; | 1789 | 1.51M | while (*ptr++ != c) { | 1790 | 1.37M | if (ptr >= end) | 1791 | 121 | return 0; | 1792 | 1.37M | } | 1793 | 146k | if (ptr >= end) | 1794 | 12 | return 0; | 1795 | | | 1796 | 146k | i = 1; | 1797 | 146k | state->must_advance = 0; | 1798 | 165k | do { | 1799 | 165k | if (*ptr == (SRE_CHAR) prefix[i]) { | 1800 | 131k | if (++i != prefix_len) { | 1801 | 0 | if (++ptr >= end) | 1802 | 0 | return 0; | 1803 | 0 | continue; | 1804 | 0 | } | 1805 | | /* found a potential match */ | 1806 | 131k | TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); | 1807 | 131k | state->start = ptr - (prefix_len - 1); | 1808 | 131k | state->ptr = ptr - (prefix_len - prefix_skip - 1); | 1809 | 131k | if (flags & SRE_INFO_LITERAL) | 1810 | 0 | return 1; /* we got all of it */ | 1811 | 131k | status = SRE(match)(state, pattern + 2*prefix_skip, 0); | 1812 | 131k | if (status != 0) | 1813 | 66.2k | return status; | 1814 | | /* close but no cigar -- try again */ | 1815 | 64.9k | if (++ptr >= end) | 1816 | 5 | return 0; | 1817 | 64.9k | RESET_CAPTURE_GROUP(); | 1818 | 64.9k | } | 1819 | 99.6k | i = overlap[i]; | 1820 | 99.6k | } while (i != 0); | 1821 | 146k | } | 1822 | 0 | return 0; | 1823 | 66.3k | } | 1824 | | | 1825 | 5.43M | if (charset) { | 1826 | | /* pattern starts with a character from a known set */ | 1827 | 5.16M | end = (SRE_CHAR *)state->end; | 1828 | 5.16M | state->must_advance = 0; | 1829 | 5.59M | for (;;) { | 1830 | 80.6M | while (ptr < end && !SRE(charset)(state, charset, *ptr)) | 1831 | 75.0M | ptr++; | 1832 | 5.59M | if (ptr >= end) | 1833 | 59.8k | return 0; | 1834 | 5.53M | TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); | 1835 | 5.53M | state->start = ptr; | 1836 | 5.53M | state->ptr = ptr; | 1837 | 5.53M | status = SRE(match)(state, pattern, 0); | 1838 | 5.53M | if (status != 0) | 1839 | 5.10M | break; | 1840 | 430k | ptr++; | 1841 | 430k | RESET_CAPTURE_GROUP(); | 1842 | 430k | } | 1843 | 5.16M | } else { | 1844 | | /* general case */ | 1845 | 274k | assert(ptr <= end); | 1846 | 274k | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1847 | 274k | state->start = state->ptr = ptr; | 1848 | 274k | status = SRE(match)(state, pattern, 1); | 1849 | 274k | state->must_advance = 0; | 1850 | 274k | if (status == 0 && pattern[0] == SRE_OP_AT && | 1851 | 274k | (pattern[1] == SRE_AT_BEGINNING || | 1852 | 0 | pattern[1] == SRE_AT_BEGINNING_STRING)) | 1853 | 0 | { | 1854 | 0 | state->start = state->ptr = ptr = end; | 1855 | 0 | return 0; | 1856 | 0 | } | 1857 | 77.6M | while (status == 0 && ptr < end) { | 1858 | 77.4M | ptr++; | 1859 | 77.4M | RESET_CAPTURE_GROUP(); | 1860 | 77.4M | TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); | 1861 | 77.4M | state->start = state->ptr = ptr; | 1862 | 77.4M | status = SRE(match)(state, pattern, 0); | 1863 | 77.4M | } | 1864 | 274k | } | 1865 | | | 1866 | 5.37M | return status; | 1867 | 5.43M | } |
|
1868 | | |
1869 | | #undef SRE_CHAR |
1870 | | #undef SIZEOF_SRE_CHAR |
1871 | | #undef SRE |
1872 | | |
1873 | | /* vim:ts=4:sw=4:et |
1874 | | */ |