Line | Count | Source |
1 | | /********************************************************************** |
2 | | regexec.c - Onigmo (Oniguruma-mod) (regular expression library) |
3 | | **********************************************************************/ |
4 | | /*- |
5 | | * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> |
6 | | * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp> |
7 | | * All rights reserved. |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | */ |
30 | | |
31 | | #include "regint.h" |
32 | | |
33 | | #ifdef RUBY |
34 | | # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
35 | | #else |
36 | | # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
37 | | #endif |
38 | | |
39 | | #ifndef USE_TOKEN_THREADED_VM |
40 | | # ifdef __GNUC__ |
41 | | # define USE_TOKEN_THREADED_VM 1 |
42 | | # else |
43 | | # define USE_TOKEN_THREADED_VM 0 |
44 | | # endif |
45 | | #endif |
46 | | |
47 | | #ifdef RUBY |
48 | 48.9M | # define ENC_DUMMY_FLAG (1<<24) |
49 | | static inline int |
50 | | rb_enc_asciicompat(OnigEncoding enc) |
51 | 48.9M | { |
52 | 48.9M | return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG); |
53 | 48.9M | } |
54 | | # undef ONIGENC_IS_MBC_ASCII_WORD |
55 | | # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ |
56 | 48.9M | (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \ |
57 | 48.9M | onigenc_ascii_is_code_ctype( \ |
58 | 0 | ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) |
59 | | #endif /* RUBY */ |
60 | | |
61 | | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
62 | | # define ONIGENC_IS_MBC_CRNL(enc,p,end) \ |
63 | 0 | (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ |
64 | 0 | ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10) |
65 | | # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ |
66 | 611M | is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) |
67 | | static int |
68 | | is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, |
69 | | const UChar *end, OnigOptionType option, int check_prev) |
70 | 611M | { |
71 | 611M | if (IS_NEWLINE_CRLF(option)) { |
72 | 0 | if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) { |
73 | 0 | if (check_prev) { |
74 | 0 | const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); |
75 | 0 | if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) |
76 | 0 | return 0; |
77 | 0 | else |
78 | 0 | return 1; |
79 | 0 | } |
80 | 0 | else |
81 | 0 | return 1; |
82 | 0 | } |
83 | 0 | else { |
84 | 0 | const UChar *pnext = p + enclen(enc, p, end); |
85 | 0 | if (pnext < end && |
86 | 0 | ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && |
87 | 0 | ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) |
88 | 0 | return 1; |
89 | 0 | if (ONIGENC_IS_MBC_NEWLINE(enc, p, end)) |
90 | 0 | return 1; |
91 | 0 | return 0; |
92 | 0 | } |
93 | 0 | } |
94 | 611M | else { |
95 | 611M | return ONIGENC_IS_MBC_NEWLINE(enc, p, end); |
96 | 611M | } |
97 | 611M | } |
98 | | #else /* USE_CRNL_AS_LINE_TERMINATOR */ |
99 | | # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ |
100 | | ONIGENC_IS_MBC_NEWLINE((enc), (p), (end)) |
101 | | #endif /* USE_CRNL_AS_LINE_TERMINATOR */ |
102 | | |
103 | | #ifdef USE_CAPTURE_HISTORY |
104 | | static void history_tree_free(OnigCaptureTreeNode* node); |
105 | | |
106 | | static void |
107 | | history_tree_clear(OnigCaptureTreeNode* node) |
108 | | { |
109 | | int i; |
110 | | |
111 | | if (IS_NOT_NULL(node)) { |
112 | | for (i = 0; i < node->num_childs; i++) { |
113 | | if (IS_NOT_NULL(node->childs[i])) { |
114 | | history_tree_free(node->childs[i]); |
115 | | } |
116 | | } |
117 | | for (i = 0; i < node->allocated; i++) { |
118 | | node->childs[i] = (OnigCaptureTreeNode* )0; |
119 | | } |
120 | | node->num_childs = 0; |
121 | | node->beg = ONIG_REGION_NOTPOS; |
122 | | node->end = ONIG_REGION_NOTPOS; |
123 | | node->group = -1; |
124 | | xfree(node->childs); |
125 | | node->childs = (OnigCaptureTreeNode** )0; |
126 | | } |
127 | | } |
128 | | |
129 | | static void |
130 | | history_tree_free(OnigCaptureTreeNode* node) |
131 | | { |
132 | | history_tree_clear(node); |
133 | | xfree(node); |
134 | | } |
135 | | |
136 | | static void |
137 | | history_root_free(OnigRegion* r) |
138 | | { |
139 | | if (IS_NOT_NULL(r->history_root)) { |
140 | | history_tree_free(r->history_root); |
141 | | r->history_root = (OnigCaptureTreeNode* )0; |
142 | | } |
143 | | } |
144 | | |
145 | | static OnigCaptureTreeNode* |
146 | | history_node_new(void) |
147 | | { |
148 | | OnigCaptureTreeNode* node; |
149 | | |
150 | | node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); |
151 | | CHECK_NULL_RETURN(node); |
152 | | node->childs = (OnigCaptureTreeNode** )0; |
153 | | node->allocated = 0; |
154 | | node->num_childs = 0; |
155 | | node->group = -1; |
156 | | node->beg = ONIG_REGION_NOTPOS; |
157 | | node->end = ONIG_REGION_NOTPOS; |
158 | | |
159 | | return node; |
160 | | } |
161 | | |
162 | | static int |
163 | | history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) |
164 | | { |
165 | | # define HISTORY_TREE_INIT_ALLOC_SIZE 8 |
166 | | |
167 | | if (parent->num_childs >= parent->allocated) { |
168 | | int n, i; |
169 | | |
170 | | if (IS_NULL(parent->childs)) { |
171 | | n = HISTORY_TREE_INIT_ALLOC_SIZE; |
172 | | parent->childs = |
173 | | (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); |
174 | | CHECK_NULL_RETURN_MEMERR(parent->childs); |
175 | | } |
176 | | else { |
177 | | OnigCaptureTreeNode** tmp; |
178 | | n = parent->allocated * 2; |
179 | | tmp = |
180 | | (OnigCaptureTreeNode** )xrealloc(parent->childs, |
181 | | sizeof(OnigCaptureTreeNode*) * n); |
182 | | if (tmp == 0) { |
183 | | history_tree_clear(parent); |
184 | | return ONIGERR_MEMORY; |
185 | | } |
186 | | parent->childs = tmp; |
187 | | } |
188 | | for (i = parent->allocated; i < n; i++) { |
189 | | parent->childs[i] = (OnigCaptureTreeNode* )0; |
190 | | } |
191 | | parent->allocated = n; |
192 | | } |
193 | | |
194 | | parent->childs[parent->num_childs] = child; |
195 | | parent->num_childs++; |
196 | | return 0; |
197 | | } |
198 | | |
199 | | static OnigCaptureTreeNode* |
200 | | history_tree_clone(OnigCaptureTreeNode* node) |
201 | | { |
202 | | int i, r; |
203 | | OnigCaptureTreeNode *clone, *child; |
204 | | |
205 | | clone = history_node_new(); |
206 | | CHECK_NULL_RETURN(clone); |
207 | | |
208 | | clone->beg = node->beg; |
209 | | clone->end = node->end; |
210 | | for (i = 0; i < node->num_childs; i++) { |
211 | | child = history_tree_clone(node->childs[i]); |
212 | | if (IS_NULL(child)) { |
213 | | history_tree_free(clone); |
214 | | return (OnigCaptureTreeNode* )0; |
215 | | } |
216 | | r = history_tree_add_child(clone, child); |
217 | | if (r != 0) { |
218 | | history_tree_free(child); |
219 | | history_tree_free(clone); |
220 | | return (OnigCaptureTreeNode* )0; |
221 | | } |
222 | | } |
223 | | |
224 | | return clone; |
225 | | } |
226 | | |
227 | | extern OnigCaptureTreeNode* |
228 | | onig_get_capture_tree(OnigRegion* region) |
229 | | { |
230 | | return region->history_root; |
231 | | } |
232 | | #endif /* USE_CAPTURE_HISTORY */ |
233 | | |
234 | | #ifdef USE_MATCH_CACHE |
235 | | |
236 | | /* |
237 | | Glossary for "match cache" |
238 | | |
239 | | "match cache" or "match cache optimization" |
240 | | The `Regexp#match` optimization by using a cache. |
241 | | |
242 | | "cache opcode" |
243 | | A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc). |
244 | | It is corresponding to some cache points. |
245 | | |
246 | | "cache point" |
247 | | A cacheable point on matching. |
248 | | Usually, one-to-one corresponding between a cache opcode and a cache point exists, |
249 | | but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding |
250 | | cache points depending on repetition counts. |
251 | | |
252 | | "match cache point" |
253 | | A pair of a cache point and a position on an input string. |
254 | | We encode a match cache point to an integer value by the following equation: |
255 | | "match cache point" = "position on input string" * "total number of cache points" + "cache point" |
256 | | |
257 | | "match cache buffer" |
258 | | A bit-array for memoizing (recording) match cache points once backtracked. |
259 | | */ |
260 | | |
261 | | static OnigPosition count_num_cache_opcodes_inner( |
262 | | const regex_t* reg, |
263 | | MemNumType current_repeat_mem, int lookaround_nesting, |
264 | | UChar** pp, long* num_cache_opcodes_ptr |
265 | | ) |
266 | 362k | { |
267 | 362k | UChar* p = *pp; |
268 | 362k | UChar* pend = reg->p + reg->used; |
269 | 362k | LengthType len; |
270 | 362k | MemNumType repeat_mem; |
271 | 362k | OnigEncoding enc = reg->enc; |
272 | 362k | long num_cache_opcodes = *num_cache_opcodes_ptr; |
273 | 362k | OnigPosition result; |
274 | | |
275 | 4.42M | while (p < pend) { |
276 | 4.34M | switch (*p++) { |
277 | 0 | case OP_FINISH: |
278 | 77.1k | case OP_END: |
279 | 77.1k | break; |
280 | | |
281 | 342k | case OP_EXACT1: p++; break; |
282 | 124k | case OP_EXACT2: p += 2; break; |
283 | 50.6k | case OP_EXACT3: p += 3; break; |
284 | 56.2k | case OP_EXACT4: p += 4; break; |
285 | 29.5k | case OP_EXACT5: p += 5; break; |
286 | 227k | case OP_EXACTN: |
287 | 227k | GET_LENGTH_INC(len, p); p += len; break; |
288 | 3.57k | case OP_EXACTMB2N1: p += 2; break; |
289 | 384 | case OP_EXACTMB2N2: p += 4; break; |
290 | 1.27k | case OP_EXACTMB2N3: p += 6; break; |
291 | 141 | case OP_EXACTMB2N: |
292 | 141 | GET_LENGTH_INC(len, p); p += len * 2; break; |
293 | 4.24k | case OP_EXACTMB3N: |
294 | 4.24k | GET_LENGTH_INC(len, p); p += len * 3; break; |
295 | 429 | case OP_EXACTMBN: |
296 | 429 | { |
297 | 429 | int mb_len; |
298 | 429 | GET_LENGTH_INC(mb_len, p); |
299 | 429 | GET_LENGTH_INC(len, p); |
300 | 429 | p += mb_len * len; |
301 | 429 | } |
302 | 429 | break; |
303 | | |
304 | 13.6k | case OP_EXACT1_IC: |
305 | 13.6k | len = enclen(enc, p, pend); p += len; break; |
306 | 19.1k | case OP_EXACTN_IC: |
307 | 19.1k | GET_LENGTH_INC(len, p); p += len; break; |
308 | | |
309 | 4.00k | case OP_CCLASS: |
310 | 21.8k | case OP_CCLASS_NOT: |
311 | 21.8k | p += SIZE_BITSET; break; |
312 | 33.7k | case OP_CCLASS_MB: |
313 | 41.6k | case OP_CCLASS_MB_NOT: |
314 | 41.6k | GET_LENGTH_INC(len, p); p += len; break; |
315 | 2.78k | case OP_CCLASS_MIX: |
316 | 10.4k | case OP_CCLASS_MIX_NOT: |
317 | 10.4k | p += SIZE_BITSET; |
318 | 10.4k | GET_LENGTH_INC(len, p); |
319 | 10.4k | p += len; |
320 | 10.4k | break; |
321 | | |
322 | 172k | case OP_ANYCHAR: |
323 | 607k | case OP_ANYCHAR_ML: |
324 | 607k | break; |
325 | 87.0k | case OP_ANYCHAR_STAR: |
326 | 107k | case OP_ANYCHAR_ML_STAR: |
327 | 107k | num_cache_opcodes++; break; |
328 | 71.1k | case OP_ANYCHAR_STAR_PEEK_NEXT: |
329 | 103k | case OP_ANYCHAR_ML_STAR_PEEK_NEXT: |
330 | 103k | p++; num_cache_opcodes++; break; |
331 | | |
332 | 3.00k | case OP_WORD: |
333 | 4.60k | case OP_NOT_WORD: |
334 | 36.4k | case OP_WORD_BOUND: |
335 | 44.1k | case OP_NOT_WORD_BOUND: |
336 | 44.1k | case OP_WORD_BEGIN: |
337 | 44.1k | case OP_WORD_END: |
338 | 44.1k | break; |
339 | | |
340 | 6.04k | case OP_ASCII_WORD: |
341 | 12.4k | case OP_NOT_ASCII_WORD: |
342 | 17.5k | case OP_ASCII_WORD_BOUND: |
343 | 21.6k | case OP_NOT_ASCII_WORD_BOUND: |
344 | 21.6k | case OP_ASCII_WORD_BEGIN: |
345 | 21.6k | case OP_ASCII_WORD_END: |
346 | 21.6k | break; |
347 | | |
348 | 4.92k | case OP_BEGIN_BUF: |
349 | 5.88k | case OP_END_BUF: |
350 | 21.6k | case OP_BEGIN_LINE: |
351 | 41.9k | case OP_END_LINE: |
352 | 42.6k | case OP_SEMI_END_BUF: |
353 | 44.9k | case OP_BEGIN_POSITION: |
354 | 44.9k | break; |
355 | | |
356 | 322 | case OP_BACKREF1: |
357 | 707 | case OP_BACKREF2: |
358 | 1.25k | case OP_BACKREFN: |
359 | 1.70k | case OP_BACKREFN_IC: |
360 | 2.30k | case OP_BACKREF_MULTI: |
361 | 2.62k | case OP_BACKREF_MULTI_IC: |
362 | 3.01k | case OP_BACKREF_WITH_LEVEL: |
363 | 3.01k | goto impossible; |
364 | | |
365 | 3.69k | case OP_MEMORY_START: |
366 | 60.8k | case OP_MEMORY_START_PUSH: |
367 | 69.2k | case OP_MEMORY_END_PUSH: |
368 | 69.2k | case OP_MEMORY_END_PUSH_REC: |
369 | 118k | case OP_MEMORY_END: |
370 | 118k | case OP_MEMORY_END_REC: |
371 | 118k | p += SIZE_MEMNUM; |
372 | | // A memory (capture) in look-around is found. |
373 | 118k | if (lookaround_nesting != 0) { |
374 | 157 | goto impossible; |
375 | 157 | } |
376 | 118k | break; |
377 | | |
378 | 118k | case OP_KEEP: |
379 | 223 | break; |
380 | | |
381 | 0 | case OP_FAIL: |
382 | 0 | break; |
383 | 763k | case OP_JUMP: |
384 | 763k | p += SIZE_RELADDR; |
385 | 763k | break; |
386 | 785k | case OP_PUSH: |
387 | 785k | p += SIZE_RELADDR; |
388 | 785k | num_cache_opcodes++; |
389 | 785k | break; |
390 | 0 | case OP_POP: |
391 | 0 | break; |
392 | 0 | case OP_PUSH_OR_JUMP_EXACT1: |
393 | 81.8k | case OP_PUSH_IF_PEEK_NEXT: |
394 | 81.8k | p += SIZE_RELADDR + 1; num_cache_opcodes++; break; |
395 | 18.5k | case OP_REPEAT: |
396 | 41.3k | case OP_REPEAT_NG: |
397 | 41.3k | if (current_repeat_mem != -1) { |
398 | | // A nested OP_REPEAT is not yet supported. |
399 | 5.19k | goto impossible; |
400 | 5.19k | } |
401 | 36.1k | GET_MEMNUM_INC(repeat_mem, p); |
402 | 36.1k | p += SIZE_RELADDR; |
403 | 36.1k | if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { |
404 | 5.02k | long dummy_num_cache_opcodes = 0; |
405 | 5.02k | result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes); |
406 | 5.02k | if (result < 0 || dummy_num_cache_opcodes < 0) { |
407 | 748 | goto fail; |
408 | 748 | } |
409 | 31.0k | } else { |
410 | 31.0k | if (reg->repeat_range[repeat_mem].lower == 0) { |
411 | 19.8k | num_cache_opcodes++; |
412 | 19.8k | } |
413 | 31.0k | result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes); |
414 | 31.0k | if (result < 0 || num_cache_opcodes < 0) { |
415 | 6.72k | goto fail; |
416 | 6.72k | } |
417 | 24.3k | OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; |
418 | 24.3k | if (repeat_range->lower < repeat_range->upper) { |
419 | 21.8k | num_cache_opcodes++; |
420 | 21.8k | } |
421 | 24.3k | } |
422 | 28.6k | break; |
423 | 28.6k | case OP_REPEAT_INC: |
424 | 28.6k | case OP_REPEAT_INC_NG: |
425 | 28.6k | GET_MEMNUM_INC(repeat_mem, p); |
426 | 28.6k | if (repeat_mem != current_repeat_mem) { |
427 | | // A lone or invalid OP_REPEAT_INC is found. |
428 | 0 | goto impossible; |
429 | 0 | } |
430 | 28.6k | goto exit; |
431 | 28.6k | case OP_REPEAT_INC_SG: |
432 | 644 | case OP_REPEAT_INC_NG_SG: |
433 | 644 | goto impossible; |
434 | 75.9k | case OP_NULL_CHECK_START: |
435 | 75.9k | p += SIZE_MEMNUM; |
436 | 75.9k | break; |
437 | 50.2k | case OP_NULL_CHECK_END: |
438 | 50.2k | case OP_NULL_CHECK_END_MEMST_PUSH: |
439 | 50.2k | p += SIZE_MEMNUM; |
440 | 50.2k | break; |
441 | 5.26k | case OP_NULL_CHECK_END_MEMST: |
442 | 5.26k | p += SIZE_MEMNUM; |
443 | 5.26k | break; |
444 | | |
445 | 47.1k | case OP_PUSH_POS: |
446 | 47.1k | if (lookaround_nesting < 0) { |
447 | | // A look-around nested in a atomic grouping is found. |
448 | 131 | goto impossible; |
449 | 131 | } |
450 | 47.0k | result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); |
451 | 47.0k | if (result < 0 || num_cache_opcodes < 0) { |
452 | 140 | goto fail; |
453 | 140 | } |
454 | 46.9k | break; |
455 | 46.9k | case OP_PUSH_POS_NOT: |
456 | 24.0k | if (lookaround_nesting < 0) { |
457 | | // A look-around nested in a atomic grouping is found. |
458 | 11 | goto impossible; |
459 | 11 | } |
460 | 24.0k | p += SIZE_RELADDR; |
461 | 24.0k | result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); |
462 | 24.0k | if (result < 0 || num_cache_opcodes < 0) { |
463 | 179 | goto fail; |
464 | 179 | } |
465 | 23.8k | break; |
466 | 23.8k | case OP_PUSH_LOOK_BEHIND_NOT: |
467 | 3.48k | if (lookaround_nesting < 0) { |
468 | | // A look-around nested in a atomic grouping is found. |
469 | 239 | goto impossible; |
470 | 239 | } |
471 | 3.24k | p += SIZE_RELADDR; |
472 | 3.24k | p += SIZE_LENGTH; |
473 | 3.24k | result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes); |
474 | 3.24k | if (result < 0 || num_cache_opcodes < 0) { |
475 | 1.20k | goto fail; |
476 | 1.20k | } |
477 | 2.03k | break; |
478 | 145k | case OP_PUSH_STOP_BT: |
479 | 145k | if (lookaround_nesting != 0) { |
480 | | // A nested atomic grouping is found. |
481 | 12.6k | goto impossible; |
482 | 12.6k | } |
483 | 132k | result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes); |
484 | 132k | if (result < 0 || num_cache_opcodes < 0) { |
485 | 15.9k | goto fail; |
486 | 15.9k | } |
487 | 116k | break; |
488 | 116k | case OP_POP_POS: |
489 | 70.6k | case OP_FAIL_POS: |
490 | 72.7k | case OP_FAIL_LOOK_BEHIND_NOT: |
491 | 189k | case OP_POP_STOP_BT: |
492 | 189k | goto exit; |
493 | 4.93k | case OP_LOOK_BEHIND: |
494 | 4.93k | p += SIZE_LENGTH; |
495 | 4.93k | break; |
496 | | |
497 | 487 | case OP_PUSH_ABSENT_POS: |
498 | 487 | case OP_ABSENT_END: |
499 | 487 | case OP_ABSENT: |
500 | 487 | goto impossible; |
501 | | |
502 | 19.5k | case OP_CALL: |
503 | 19.5k | case OP_RETURN: |
504 | 19.5k | goto impossible; |
505 | | |
506 | 164 | case OP_CONDITION: |
507 | 164 | goto impossible; |
508 | | |
509 | 0 | case OP_STATE_CHECK_PUSH: |
510 | 0 | case OP_STATE_CHECK_PUSH_OR_JUMP: |
511 | 0 | case OP_STATE_CHECK: |
512 | 0 | case OP_STATE_CHECK_ANYCHAR_STAR: |
513 | 0 | case OP_STATE_CHECK_ANYCHAR_ML_STAR: |
514 | 0 | goto impossible; |
515 | | |
516 | 0 | case OP_SET_OPTION_PUSH: |
517 | 0 | case OP_SET_OPTION: |
518 | 0 | p += SIZE_OPTION; |
519 | 0 | break; |
520 | | |
521 | 21 | default: |
522 | 21 | goto bytecode_error; |
523 | 4.34M | } |
524 | 4.34M | } |
525 | | |
526 | 295k | exit: |
527 | 295k | *pp = p; |
528 | 295k | *num_cache_opcodes_ptr = num_cache_opcodes; |
529 | 295k | return 0; |
530 | | |
531 | 24.9k | fail: |
532 | 24.9k | *num_cache_opcodes_ptr = num_cache_opcodes; |
533 | 24.9k | return result; |
534 | | |
535 | 42.2k | impossible: |
536 | 42.2k | *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE; |
537 | 42.2k | return 0; |
538 | | |
539 | 21 | bytecode_error: |
540 | 21 | return ONIGERR_UNDEFINED_BYTECODE; |
541 | 362k | } |
542 | | |
543 | | /* count the total number of cache opcodes for allocating a match cache buffer. */ |
544 | | static OnigPosition |
545 | | count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) |
546 | 119k | { |
547 | 119k | UChar* p = reg->p; |
548 | 119k | *num_cache_opcodes_ptr = 0; |
549 | 119k | OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr); |
550 | 119k | if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) { |
551 | 674 | return ONIGERR_UNDEFINED_BYTECODE; |
552 | 674 | } |
553 | | |
554 | 118k | return result; |
555 | 119k | } |
556 | | |
557 | | static OnigPosition |
558 | | init_cache_opcodes_inner( |
559 | | const regex_t* reg, |
560 | | MemNumType current_repeat_mem, int lookaround_nesting, |
561 | | OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr |
562 | | ) |
563 | 129k | { |
564 | 129k | UChar* p = *pp; |
565 | 129k | UChar* pend = reg->p + reg->used; |
566 | 129k | UChar* pbegin; |
567 | 129k | LengthType len; |
568 | 129k | MemNumType repeat_mem; |
569 | 129k | OnigEncoding enc = reg->enc; |
570 | 129k | long cache_point = *num_cache_points_ptr; |
571 | 129k | OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr; |
572 | 129k | OnigPosition result; |
573 | | |
574 | 493k | # define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\ |
575 | 490k | cache_opcodes->addr = pbegin;\ |
576 | 490k | cache_opcodes->cache_point = cache_point;\ |
577 | 490k | cache_opcodes->outer_repeat_mem = current_repeat_mem;\ |
578 | 490k | cache_opcodes->num_cache_points_at_outer_repeat = 0;\ |
579 | 490k | cache_opcodes->num_cache_points_in_outer_repeat = 0;\ |
580 | 490k | cache_opcodes->lookaround_nesting = lookaround_nesting;\ |
581 | 490k | cache_opcodes->match_addr = NULL;\ |
582 | 490k | cache_point += lookaround_nesting != 0 ? 2 : 1;\ |
583 | 490k | cache_opcodes++;\ |
584 | 490k | } |
585 | | |
586 | 1.76M | while (p < pend) { |
587 | 1.72M | pbegin = p; |
588 | 1.72M | switch (*p++) { |
589 | 0 | case OP_FINISH: |
590 | 42.4k | case OP_END: |
591 | 42.4k | break; |
592 | | |
593 | 145k | case OP_EXACT1: p++; break; |
594 | 64.9k | case OP_EXACT2: p += 2; break; |
595 | 12.8k | case OP_EXACT3: p += 3; break; |
596 | 26.2k | case OP_EXACT4: p += 4; break; |
597 | 12.3k | case OP_EXACT5: p += 5; break; |
598 | 75.8k | case OP_EXACTN: |
599 | 75.8k | GET_LENGTH_INC(len, p); p += len; break; |
600 | 2.63k | case OP_EXACTMB2N1: p += 2; break; |
601 | 294 | case OP_EXACTMB2N2: p += 4; break; |
602 | 1.24k | case OP_EXACTMB2N3: p += 6; break; |
603 | 141 | case OP_EXACTMB2N: |
604 | 141 | GET_LENGTH_INC(len, p); p += len * 2; break; |
605 | 2.00k | case OP_EXACTMB3N: |
606 | 2.00k | GET_LENGTH_INC(len, p); p += len * 3; break; |
607 | 397 | case OP_EXACTMBN: |
608 | 397 | { |
609 | 397 | int mb_len; |
610 | 397 | GET_LENGTH_INC(mb_len, p); |
611 | 397 | GET_LENGTH_INC(len, p); |
612 | 397 | p += mb_len * len; |
613 | 397 | } |
614 | 397 | break; |
615 | | |
616 | 2.91k | case OP_EXACT1_IC: |
617 | 2.91k | len = enclen(enc, p, pend); p += len; break; |
618 | 5.54k | case OP_EXACTN_IC: |
619 | 5.54k | GET_LENGTH_INC(len, p); p += len; break; |
620 | | |
621 | 1.85k | case OP_CCLASS: |
622 | 8.75k | case OP_CCLASS_NOT: |
623 | 8.75k | p += SIZE_BITSET; break; |
624 | 283 | case OP_CCLASS_MB: |
625 | 2.51k | case OP_CCLASS_MB_NOT: |
626 | 2.51k | GET_LENGTH_INC(len, p); p += len; break; |
627 | 513 | case OP_CCLASS_MIX: |
628 | 6.58k | case OP_CCLASS_MIX_NOT: |
629 | 6.58k | p += SIZE_BITSET; |
630 | 6.58k | GET_LENGTH_INC(len, p); |
631 | 6.58k | p += len; |
632 | 6.58k | break; |
633 | | |
634 | 148k | case OP_ANYCHAR: |
635 | 189k | case OP_ANYCHAR_ML: |
636 | 189k | break; |
637 | 49.9k | case OP_ANYCHAR_STAR: |
638 | 59.3k | case OP_ANYCHAR_ML_STAR: |
639 | 59.3k | INC_CACHE_OPCODES; |
640 | 59.3k | break; |
641 | 30.5k | case OP_ANYCHAR_STAR_PEEK_NEXT: |
642 | 46.3k | case OP_ANYCHAR_ML_STAR_PEEK_NEXT: |
643 | 46.3k | p++; |
644 | 46.3k | INC_CACHE_OPCODES; |
645 | 46.3k | break; |
646 | | |
647 | 810 | case OP_WORD: |
648 | 2.19k | case OP_NOT_WORD: |
649 | 7.02k | case OP_WORD_BOUND: |
650 | 13.3k | case OP_NOT_WORD_BOUND: |
651 | 13.3k | case OP_WORD_BEGIN: |
652 | 13.3k | case OP_WORD_END: |
653 | 13.3k | break; |
654 | | |
655 | 5.23k | case OP_ASCII_WORD: |
656 | 6.24k | case OP_NOT_ASCII_WORD: |
657 | 7.66k | case OP_ASCII_WORD_BOUND: |
658 | 8.75k | case OP_NOT_ASCII_WORD_BOUND: |
659 | 8.75k | case OP_ASCII_WORD_BEGIN: |
660 | 8.75k | case OP_ASCII_WORD_END: |
661 | 8.75k | break; |
662 | | |
663 | 1.89k | case OP_BEGIN_BUF: |
664 | 2.35k | case OP_END_BUF: |
665 | 4.83k | case OP_BEGIN_LINE: |
666 | 11.3k | case OP_END_LINE: |
667 | 11.6k | case OP_SEMI_END_BUF: |
668 | 12.9k | case OP_BEGIN_POSITION: |
669 | 12.9k | break; |
670 | | |
671 | 0 | case OP_BACKREF1: |
672 | 0 | case OP_BACKREF2: |
673 | 0 | case OP_BACKREFN: |
674 | 0 | case OP_BACKREFN_IC: |
675 | 0 | case OP_BACKREF_MULTI: |
676 | 0 | case OP_BACKREF_MULTI_IC: |
677 | 0 | case OP_BACKREF_WITH_LEVEL: |
678 | 0 | goto unexpected_bytecode_error; |
679 | | |
680 | 1.43k | case OP_MEMORY_START: |
681 | 13.5k | case OP_MEMORY_START_PUSH: |
682 | 13.6k | case OP_MEMORY_END_PUSH: |
683 | 13.6k | case OP_MEMORY_END_PUSH_REC: |
684 | 27.0k | case OP_MEMORY_END: |
685 | 27.0k | case OP_MEMORY_END_REC: |
686 | 27.0k | p += SIZE_MEMNUM; |
687 | 27.0k | if (lookaround_nesting != 0) { |
688 | 0 | goto unexpected_bytecode_error; |
689 | 0 | } |
690 | 27.0k | break; |
691 | | |
692 | 27.0k | case OP_KEEP: |
693 | 70 | break; |
694 | | |
695 | 0 | case OP_FAIL: |
696 | 0 | break; |
697 | 352k | case OP_JUMP: |
698 | 352k | p += SIZE_RELADDR; |
699 | 352k | break; |
700 | 345k | case OP_PUSH: |
701 | 345k | p += SIZE_RELADDR; |
702 | 345k | INC_CACHE_OPCODES; |
703 | 345k | break; |
704 | 0 | case OP_POP: |
705 | 0 | break; |
706 | 0 | case OP_PUSH_OR_JUMP_EXACT1: |
707 | 29.3k | case OP_PUSH_IF_PEEK_NEXT: |
708 | 29.3k | p += SIZE_RELADDR + 1; |
709 | 29.3k | INC_CACHE_OPCODES; |
710 | 29.3k | break; |
711 | 4.64k | case OP_REPEAT: |
712 | 11.3k | case OP_REPEAT_NG: |
713 | 11.3k | GET_MEMNUM_INC(repeat_mem, p); |
714 | 11.3k | p += SIZE_RELADDR; |
715 | 11.3k | if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) { |
716 | 2.65k | long dummy_num_cache_points = 0; |
717 | 2.65k | OnigCacheOpcode* dummy_cache_opcodes = NULL; |
718 | 2.65k | result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points); |
719 | 2.65k | if (result != 0) { |
720 | 0 | goto fail; |
721 | 0 | } |
722 | 8.66k | } else { |
723 | 8.66k | if (reg->repeat_range[repeat_mem].lower == 0) { |
724 | 5.47k | INC_CACHE_OPCODES; |
725 | 5.47k | } |
726 | 8.66k | { |
727 | 8.66k | long num_cache_points_in_repeat = 0; |
728 | 8.66k | long num_cache_points_at_repeat = cache_point; |
729 | 8.66k | OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes; |
730 | 8.66k | result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat); |
731 | 8.66k | if (result != 0) { |
732 | 0 | goto fail; |
733 | 0 | } |
734 | 8.66k | OnigRepeatRange *repeat_range = ®->repeat_range[repeat_mem]; |
735 | 8.66k | if (repeat_range->lower < repeat_range->upper) { |
736 | 7.54k | INC_CACHE_OPCODES; |
737 | 7.54k | cache_point -= lookaround_nesting != 0 ? 2 : 1; |
738 | 7.54k | } |
739 | 8.66k | int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower; |
740 | 8.66k | cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds; |
741 | 26.8k | for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) { |
742 | 18.1k | cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat; |
743 | 18.1k | cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat; |
744 | 18.1k | } |
745 | 8.66k | } |
746 | 8.66k | } |
747 | 11.3k | break; |
748 | 11.3k | case OP_REPEAT_INC: |
749 | 11.3k | case OP_REPEAT_INC_NG: |
750 | 11.3k | p += SIZE_MEMNUM; |
751 | 11.3k | goto exit; |
752 | 0 | case OP_REPEAT_INC_SG: |
753 | 0 | case OP_REPEAT_INC_NG_SG: |
754 | 0 | goto unexpected_bytecode_error; |
755 | 21.2k | case OP_NULL_CHECK_START: |
756 | 21.2k | p += SIZE_MEMNUM; |
757 | 21.2k | break; |
758 | 17.7k | case OP_NULL_CHECK_END: |
759 | 17.7k | case OP_NULL_CHECK_END_MEMST_PUSH: |
760 | 17.7k | p += SIZE_MEMNUM; |
761 | 17.7k | break; |
762 | 3.52k | case OP_NULL_CHECK_END_MEMST: |
763 | 3.52k | p += SIZE_MEMNUM; |
764 | 3.52k | break; |
765 | | |
766 | 17.5k | case OP_PUSH_POS: |
767 | 32.5k | lookaround: |
768 | 32.5k | { |
769 | 32.5k | OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes; |
770 | 32.5k | result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point); |
771 | 32.5k | if (result != 0) { |
772 | 0 | goto fail; |
773 | 0 | } |
774 | 32.5k | UChar* match_addr = p - 1; |
775 | 166k | for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) { |
776 | 133k | if (cache_opcodes_in_lookaround->match_addr == NULL) { |
777 | 133k | cache_opcodes_in_lookaround->match_addr = match_addr; |
778 | 133k | } |
779 | 133k | } |
780 | 32.5k | } |
781 | 0 | break; |
782 | 14.5k | case OP_PUSH_POS_NOT: |
783 | 14.5k | p += SIZE_RELADDR; |
784 | 14.5k | goto lookaround; |
785 | 497 | case OP_PUSH_LOOK_BEHIND_NOT: |
786 | 497 | p += SIZE_RELADDR; |
787 | 497 | p += SIZE_LENGTH; |
788 | 497 | goto lookaround; |
789 | 43.1k | case OP_PUSH_STOP_BT: |
790 | 43.1k | { |
791 | 43.1k | OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes; |
792 | 43.1k | result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point); |
793 | 43.1k | if (result != 0) { |
794 | 0 | goto fail; |
795 | 0 | } |
796 | 43.1k | UChar* match_addr = p - 1; |
797 | 105k | for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) { |
798 | 62.4k | if (cache_opcodes_in_atomic->match_addr == NULL) { |
799 | 62.4k | cache_opcodes_in_atomic->match_addr = match_addr; |
800 | 62.4k | } |
801 | 62.4k | } |
802 | 43.1k | } |
803 | 0 | break; |
804 | 17.5k | case OP_POP_POS: |
805 | 32.0k | case OP_FAIL_POS: |
806 | 32.5k | case OP_FAIL_LOOK_BEHIND_NOT: |
807 | 75.6k | case OP_POP_STOP_BT: |
808 | 75.6k | goto exit; |
809 | 4.32k | case OP_LOOK_BEHIND: |
810 | 4.32k | p += SIZE_LENGTH; |
811 | 4.32k | break; |
812 | | |
813 | 0 | case OP_ABSENT_END: |
814 | 0 | case OP_ABSENT: |
815 | 0 | goto unexpected_bytecode_error; |
816 | | |
817 | 0 | case OP_CALL: |
818 | 0 | case OP_RETURN: |
819 | 0 | goto unexpected_bytecode_error; |
820 | | |
821 | 0 | case OP_CONDITION: |
822 | 0 | goto unexpected_bytecode_error; |
823 | | |
824 | 0 | case OP_STATE_CHECK_PUSH: |
825 | 0 | case OP_STATE_CHECK_PUSH_OR_JUMP: |
826 | 0 | case OP_STATE_CHECK: |
827 | 0 | case OP_STATE_CHECK_ANYCHAR_STAR: |
828 | 0 | case OP_STATE_CHECK_ANYCHAR_ML_STAR: |
829 | 0 | goto unexpected_bytecode_error; |
830 | | |
831 | 0 | case OP_SET_OPTION_PUSH: |
832 | 0 | case OP_SET_OPTION: |
833 | 0 | p += SIZE_OPTION; |
834 | 0 | break; |
835 | | |
836 | 0 | default: |
837 | 0 | goto bytecode_error; |
838 | 1.72M | } |
839 | 1.72M | } |
840 | | |
841 | 129k | exit: |
842 | 129k | *cache_opcodes_ptr = cache_opcodes; |
843 | 129k | *pp = p; |
844 | 129k | *num_cache_points_ptr = cache_point; |
845 | 129k | return 0; |
846 | | |
847 | 0 | fail: |
848 | 0 | return result; |
849 | | |
850 | 0 | unexpected_bytecode_error: |
851 | 0 | return ONIGERR_UNEXPECTED_BYTECODE; |
852 | | |
853 | 0 | bytecode_error: |
854 | 0 | return ONIGERR_UNDEFINED_BYTECODE; |
855 | 129k | } |
856 | | |
857 | | /* collect cache opcodes from the given regex program, and compute the total number of cache points. */ |
858 | | static OnigPosition |
859 | | init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr) |
860 | 42.4k | { |
861 | 42.4k | UChar* p = reg->p; |
862 | 42.4k | *num_cache_points_ptr = 0; |
863 | 42.4k | OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr); |
864 | 42.4k | if (result == 0 && p != reg->p + reg->used) { |
865 | 0 | return ONIGERR_UNDEFINED_BYTECODE; |
866 | 0 | } |
867 | | |
868 | 42.4k | return result; |
869 | 42.4k | } |
870 | | #else |
871 | | static OnigPosition |
872 | | count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes) |
873 | | { |
874 | | *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE; |
875 | | return 0; |
876 | | } |
877 | | #endif /* USE_MATCH_CACHE */ |
878 | | |
879 | | extern int |
880 | | onig_check_linear_time(OnigRegexType* reg) |
881 | 0 | { |
882 | 0 | long num_cache_opcodes = 0; |
883 | 0 | count_num_cache_opcodes(reg, &num_cache_opcodes); |
884 | 0 | return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE; |
885 | 0 | } |
886 | | |
887 | | extern void |
888 | | onig_region_clear(OnigRegion* region) |
889 | 1.17M | { |
890 | 1.17M | int i; |
891 | | |
892 | 13.0M | for (i = 0; i < region->num_regs; i++) { |
893 | 11.8M | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; |
894 | 11.8M | } |
895 | | #ifdef USE_CAPTURE_HISTORY |
896 | | history_root_free(region); |
897 | | #endif |
898 | 1.17M | } |
899 | | |
900 | | extern int |
901 | | onig_region_resize(OnigRegion* region, int n) |
902 | 1.17M | { |
903 | 1.17M | region->num_regs = n; |
904 | | |
905 | 1.17M | if (n < ONIG_NREGION) |
906 | 838k | n = ONIG_NREGION; |
907 | | |
908 | 1.17M | if (region->allocated == 0) { |
909 | 1.17M | region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); |
910 | 1.17M | if (region->beg == 0) |
911 | 0 | return ONIGERR_MEMORY; |
912 | | |
913 | 1.17M | region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); |
914 | 1.17M | if (region->end == 0) { |
915 | 0 | xfree(region->beg); |
916 | 0 | return ONIGERR_MEMORY; |
917 | 0 | } |
918 | | |
919 | 1.17M | region->allocated = n; |
920 | 1.17M | } |
921 | 0 | else if (region->allocated < n) { |
922 | 0 | OnigPosition *tmp; |
923 | |
|
924 | 0 | region->allocated = 0; |
925 | 0 | tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); |
926 | 0 | if (tmp == 0) { |
927 | 0 | xfree(region->beg); |
928 | 0 | xfree(region->end); |
929 | 0 | return ONIGERR_MEMORY; |
930 | 0 | } |
931 | 0 | region->beg = tmp; |
932 | 0 | tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); |
933 | 0 | if (tmp == 0) { |
934 | 0 | xfree(region->beg); |
935 | 0 | xfree(region->end); |
936 | 0 | return ONIGERR_MEMORY; |
937 | 0 | } |
938 | 0 | region->end = tmp; |
939 | |
|
940 | 0 | region->allocated = n; |
941 | 0 | } |
942 | | |
943 | 1.17M | return 0; |
944 | 1.17M | } |
945 | | |
946 | | static int |
947 | | onig_region_resize_clear(OnigRegion* region, int n) |
948 | 1.17M | { |
949 | 1.17M | int r; |
950 | | |
951 | 1.17M | r = onig_region_resize(region, n); |
952 | 1.17M | if (r != 0) return r; |
953 | 1.17M | onig_region_clear(region); |
954 | 1.17M | return 0; |
955 | 1.17M | } |
956 | | |
957 | | extern int |
958 | | onig_region_set(OnigRegion* region, int at, int beg, int end) |
959 | 0 | { |
960 | 0 | if (at < 0) return ONIGERR_INVALID_ARGUMENT; |
961 | | |
962 | 0 | if (at >= region->allocated) { |
963 | 0 | int r = onig_region_resize(region, at + 1); |
964 | 0 | if (r < 0) return r; |
965 | 0 | } |
966 | | |
967 | 0 | region->beg[at] = beg; |
968 | 0 | region->end[at] = end; |
969 | 0 | return 0; |
970 | 0 | } |
971 | | |
972 | | extern void |
973 | | onig_region_init(OnigRegion* region) |
974 | 0 | { |
975 | 0 | region->num_regs = 0; |
976 | 0 | region->allocated = 0; |
977 | 0 | region->beg = (OnigPosition* )0; |
978 | 0 | region->end = (OnigPosition* )0; |
979 | | #ifdef USE_CAPTURE_HISTORY |
980 | | region->history_root = (OnigCaptureTreeNode* )0; |
981 | | #endif |
982 | 0 | } |
983 | | |
984 | | extern OnigRegion* |
985 | | onig_region_new(void) |
986 | 0 | { |
987 | 0 | OnigRegion* r; |
988 | |
|
989 | 0 | r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); |
990 | 0 | if (r) |
991 | 0 | onig_region_init(r); |
992 | 0 | return r; |
993 | 0 | } |
994 | | |
995 | | extern void |
996 | | onig_region_free(OnigRegion* r, int free_self) |
997 | 1.17M | { |
998 | 1.17M | if (r) { |
999 | 1.17M | if (r->allocated > 0) { |
1000 | 1.17M | xfree(r->beg); |
1001 | 1.17M | xfree(r->end); |
1002 | 1.17M | } |
1003 | | #ifdef USE_CAPTURE_HISTORY |
1004 | | history_root_free(r); |
1005 | | #endif |
1006 | 1.17M | if (free_self) { |
1007 | 0 | xfree(r); |
1008 | 0 | } |
1009 | 1.17M | else { |
1010 | 1.17M | memset(r, 0, sizeof(OnigRegion)); |
1011 | 1.17M | } |
1012 | 1.17M | } |
1013 | 1.17M | } |
1014 | | |
1015 | | extern void |
1016 | | onig_region_copy(OnigRegion* to, const OnigRegion* from) |
1017 | 0 | { |
1018 | 0 | #define RREGC_SIZE (sizeof(int) * from->num_regs) |
1019 | 0 | int i, r; |
1020 | |
|
1021 | 0 | if (to == from) return; |
1022 | | |
1023 | 0 | r = onig_region_resize(to, from->num_regs); |
1024 | 0 | if (r) return; |
1025 | | |
1026 | 0 | for (i = 0; i < from->num_regs; i++) { |
1027 | 0 | to->beg[i] = from->beg[i]; |
1028 | 0 | to->end[i] = from->end[i]; |
1029 | 0 | } |
1030 | 0 | to->num_regs = from->num_regs; |
1031 | |
|
1032 | | #ifdef USE_CAPTURE_HISTORY |
1033 | | history_root_free(to); |
1034 | | |
1035 | | if (IS_NOT_NULL(from->history_root)) { |
1036 | | to->history_root = history_tree_clone(from->history_root); |
1037 | | } |
1038 | | #endif |
1039 | 0 | } |
1040 | | |
1041 | | |
1042 | | /** stack **/ |
1043 | 492M | #define INVALID_STACK_INDEX -1 |
1044 | | |
1045 | | /* stack type */ |
1046 | | /* used by normal-POP */ |
1047 | | #define STK_ALT 0x0001 |
1048 | 4.65M | #define STK_LOOK_BEHIND_NOT 0x0002 |
1049 | 3.25M | #define STK_POS_NOT 0x0003 |
1050 | | /* handled by normal-POP */ |
1051 | 3.31G | #define STK_MEM_START 0x0100 |
1052 | 604M | #define STK_MEM_END 0x8200 |
1053 | 850M | #define STK_REPEAT_INC 0x0300 |
1054 | | #define STK_STATE_CHECK_MARK 0x1000 |
1055 | | /* avoided by normal-POP */ |
1056 | 839M | #define STK_NULL_CHECK_START 0x3000 |
1057 | 154M | #define STK_NULL_CHECK_END 0x5000 /* for recursive call */ |
1058 | 29.4M | #define STK_MEM_END_MARK 0x8400 |
1059 | 26.2M | #define STK_POS 0x0500 /* used when POP-POS */ |
1060 | 553M | #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ |
1061 | 2.53G | #define STK_REPEAT 0x0700 |
1062 | 2.88G | #define STK_CALL_FRAME 0x0800 |
1063 | 2.80G | #define STK_RETURN 0x0900 |
1064 | 544M | #define STK_VOID 0x0a00 /* for fill a blank */ |
1065 | 80.8M | #define STK_ABSENT_POS 0x0b00 /* for absent */ |
1066 | 33.4M | #define STK_ABSENT 0x0c00 /* absent inner loop marker */ |
1067 | 1.92G | #define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */ |
1068 | 1.26G | #define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00 |
1069 | | |
1070 | | /* stack type check mask */ |
1071 | 2.82G | #define STK_MASK_POP_USED 0x00ff |
1072 | 873M | #define STK_MASK_TO_VOID_TARGET 0x10ff |
1073 | 735M | #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ |
1074 | | |
1075 | | #ifdef USE_MATCH_CACHE |
1076 | 1.17M | #define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\ |
1077 | 1.17M | (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\ |
1078 | 1.17M | (msa).num_fails = 0;\ |
1079 | 1.17M | (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\ |
1080 | 1.17M | (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\ |
1081 | 1.17M | (msa).num_cache_points = 0;\ |
1082 | 1.17M | (msa).match_cache_buf = (uint8_t*)NULL;\ |
1083 | 1.17M | } while(0) |
1084 | 1.17M | #define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\ |
1085 | 1.17M | xfree((msa).cache_opcodes);\ |
1086 | 1.17M | xfree((msa).match_cache_buf);\ |
1087 | 1.17M | (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\ |
1088 | 1.17M | (msa).match_cache_buf = (uint8_t*)NULL;\ |
1089 | 1.17M | } while(0) |
1090 | | #else |
1091 | | #define MATCH_ARG_INIT_MATCH_CACHE(msa) |
1092 | | #define MATCH_ARG_FREE_MATCH_CACHE(msa) |
1093 | | #endif |
1094 | | |
1095 | | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
1096 | 1.17M | # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ |
1097 | 1.17M | (msa).stack_p = (void* )0;\ |
1098 | 1.17M | (msa).options = (arg_option);\ |
1099 | 1.17M | (msa).region = (arg_region);\ |
1100 | 1.17M | (msa).start = (arg_start);\ |
1101 | 1.17M | (msa).gpos = (arg_gpos);\ |
1102 | 1.17M | (msa).best_len = ONIG_MISMATCH;\ |
1103 | 1.17M | (msa).counter = 0;\ |
1104 | 1.17M | (msa).end_time = 0;\ |
1105 | 1.17M | MATCH_ARG_INIT_MATCH_CACHE(msa);\ |
1106 | 1.17M | } while(0) |
1107 | | #else |
1108 | | # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ |
1109 | | (msa).stack_p = (void* )0;\ |
1110 | | (msa).options = (arg_option);\ |
1111 | | (msa).region = (arg_region);\ |
1112 | | (msa).start = (arg_start);\ |
1113 | | (msa).gpos = (arg_gpos);\ |
1114 | | (msa).counter = 0;\ |
1115 | | (msa).end_time = 0;\ |
1116 | | MATCH_ARG_INIT_MATCH_CACHE(msa);\ |
1117 | | } while(0) |
1118 | | #endif |
1119 | | |
1120 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
1121 | | |
1122 | | # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 |
1123 | | |
1124 | | # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ |
1125 | | if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ |
1126 | | unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ |
1127 | | offset = ((offset) * (state_num)) >> 3;\ |
1128 | | if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ |
1129 | | if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ |
1130 | | (msa).state_check_buff = (void* )xmalloc(size);\ |
1131 | | CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ |
1132 | | }\ |
1133 | | else \ |
1134 | | (msa).state_check_buff = (void* )xalloca(size);\ |
1135 | | xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ |
1136 | | (size_t )(size - (offset))); \ |
1137 | | (msa).state_check_buff_size = size;\ |
1138 | | }\ |
1139 | | else {\ |
1140 | | (msa).state_check_buff = (void* )0;\ |
1141 | | (msa).state_check_buff_size = 0;\ |
1142 | | }\ |
1143 | | }\ |
1144 | | else {\ |
1145 | | (msa).state_check_buff = (void* )0;\ |
1146 | | (msa).state_check_buff_size = 0;\ |
1147 | | }\ |
1148 | | } while(0) |
1149 | | |
1150 | | # define MATCH_ARG_FREE(msa) do {\ |
1151 | | xfree((msa).stack_p);\ |
1152 | | if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ |
1153 | | xfree((msa).state_check_buff);\ |
1154 | | }\ |
1155 | | MATCH_ARG_FREE_MATCH_CACHE(msa);\ |
1156 | | } while(0) |
1157 | | #else /* USE_COMBINATION_EXPLOSION_CHECK */ |
1158 | 1.17M | # define MATCH_ARG_FREE(msa) do {\ |
1159 | 1.17M | xfree((msa).stack_p);\ |
1160 | 1.17M | MATCH_ARG_FREE_MATCH_CACHE(msa);\ |
1161 | 1.17M | } while (0) |
1162 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
1163 | | |
1164 | | |
1165 | | |
1166 | 15.3M | #define MAX_PTR_NUM 100 |
1167 | | |
1168 | 7.67M | #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\ |
1169 | 7.67M | if (ptr_num > MAX_PTR_NUM) {\ |
1170 | 124k | alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\ |
1171 | 124k | heap_addr = alloc_addr;\ |
1172 | 124k | if (msa->stack_p) {\ |
1173 | 8.43k | stk_alloc = (OnigStackType* )(msa->stack_p);\ |
1174 | 8.43k | stk_base = stk_alloc;\ |
1175 | 8.43k | stk = stk_base;\ |
1176 | 8.43k | stk_end = stk_base + msa->stack_n;\ |
1177 | 8.43k | }\ |
1178 | 124k | else {\ |
1179 | 115k | stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\ |
1180 | 115k | stk_base = stk_alloc;\ |
1181 | 115k | stk = stk_base;\ |
1182 | 115k | stk_end = stk_base + (stack_num);\ |
1183 | 115k | }\ |
1184 | 124k | }\ |
1185 | 7.67M | else if (msa->stack_p) {\ |
1186 | 3.49M | alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\ |
1187 | 3.49M | heap_addr = NULL;\ |
1188 | 3.49M | stk_alloc = (OnigStackType* )(msa->stack_p);\ |
1189 | 3.49M | stk_base = stk_alloc;\ |
1190 | 3.49M | stk = stk_base;\ |
1191 | 3.49M | stk_end = stk_base + msa->stack_n;\ |
1192 | 3.49M | }\ |
1193 | 7.54M | else {\ |
1194 | 4.05M | alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\ |
1195 | 4.05M | + sizeof(OnigStackType) * (stack_num));\ |
1196 | 4.05M | heap_addr = NULL;\ |
1197 | 4.05M | stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\ |
1198 | 4.05M | stk_base = stk_alloc;\ |
1199 | 4.05M | stk = stk_base;\ |
1200 | 4.05M | stk_end = stk_base + (stack_num);\ |
1201 | 4.05M | }\ |
1202 | 7.67M | } while(0) |
1203 | | |
1204 | 7.67M | #define STACK_SAVE do{\ |
1205 | 7.67M | if (stk_base != stk_alloc) {\ |
1206 | 175k | msa->stack_p = stk_base;\ |
1207 | 175k | msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ |
1208 | 175k | };\ |
1209 | 7.67M | } while(0) |
1210 | | |
1211 | | static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; |
1212 | | |
1213 | | extern unsigned int |
1214 | | onig_get_match_stack_limit_size(void) |
1215 | 0 | { |
1216 | 0 | return MatchStackLimitSize; |
1217 | 0 | } |
1218 | | |
1219 | | extern int |
1220 | | onig_set_match_stack_limit_size(unsigned int size) |
1221 | 0 | { |
1222 | 0 | MatchStackLimitSize = size; |
1223 | 0 | return 0; |
1224 | 0 | } |
1225 | | |
1226 | | static int |
1227 | | stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, |
1228 | | OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) |
1229 | 465k | { |
1230 | 465k | size_t n; |
1231 | 465k | OnigStackType *x, *stk_base, *stk_end, *stk; |
1232 | | |
1233 | 465k | stk_base = *arg_stk_base; |
1234 | 465k | stk_end = *arg_stk_end; |
1235 | 465k | stk = *arg_stk; |
1236 | | |
1237 | 465k | n = stk_end - stk_base; |
1238 | 465k | if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { |
1239 | 170k | x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); |
1240 | 170k | if (IS_NULL(x)) { |
1241 | 0 | STACK_SAVE; |
1242 | 0 | return ONIGERR_MEMORY; |
1243 | 0 | } |
1244 | 170k | xmemcpy(x, stk_base, n * sizeof(OnigStackType)); |
1245 | 170k | n *= 2; |
1246 | 170k | } |
1247 | 294k | else { |
1248 | 294k | unsigned int limit_size = MatchStackLimitSize; |
1249 | 294k | n *= 2; |
1250 | 294k | if (limit_size != 0 && n > limit_size) { |
1251 | 0 | if ((unsigned int )(stk_end - stk_base) == limit_size) |
1252 | 0 | return ONIGERR_MATCH_STACK_LIMIT_OVER; |
1253 | 0 | else |
1254 | 0 | n = limit_size; |
1255 | 0 | } |
1256 | 294k | x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); |
1257 | 294k | if (IS_NULL(x)) { |
1258 | 0 | STACK_SAVE; |
1259 | 0 | return ONIGERR_MEMORY; |
1260 | 0 | } |
1261 | 294k | } |
1262 | 465k | *arg_stk = x + (stk - stk_base); |
1263 | 465k | *arg_stk_base = x; |
1264 | 465k | *arg_stk_end = x + n; |
1265 | 465k | return 0; |
1266 | 465k | } |
1267 | | |
1268 | 3.15G | #define STACK_ENSURE(n) do {\ |
1269 | 3.15G | if (stk_end - stk < (n)) {\ |
1270 | 465k | int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ |
1271 | 465k | if (r != 0) {\ |
1272 | 0 | STACK_SAVE;\ |
1273 | 0 | xfree(xmalloc_base);\ |
1274 | 0 | return r;\ |
1275 | 0 | }\ |
1276 | 465k | }\ |
1277 | 3.15G | } while(0) |
1278 | | |
1279 | 774M | #define STACK_AT(index) (stk_base + (index)) |
1280 | 385M | #define GET_STACK_INDEX(stk) ((stk) - stk_base) |
1281 | | |
1282 | 369M | #define STACK_PUSH_TYPE(stack_type) do {\ |
1283 | 369M | STACK_ENSURE(1);\ |
1284 | 369M | stk->type = (stack_type);\ |
1285 | 369M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1286 | 369M | STACK_INC;\ |
1287 | 369M | } while(0) |
1288 | | |
1289 | 873M | #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) |
1290 | | |
1291 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
1292 | | # define STATE_CHECK_POS(s,snum) \ |
1293 | | (((s) - str) * num_comb_exp_check + ((snum) - 1)) |
1294 | | # define STATE_CHECK_VAL(v,snum) do {\ |
1295 | | if (state_check_buff != NULL) {\ |
1296 | | ptrdiff_t x = STATE_CHECK_POS(s,snum);\ |
1297 | | (v) = state_check_buff[x/8] & (1<<(x%8));\ |
1298 | | }\ |
1299 | | else (v) = 0;\ |
1300 | | } while(0) |
1301 | | |
1302 | | |
1303 | | # define ELSE_IF_STATE_CHECK_MARK(stk) \ |
1304 | | else if ((stk)->type == STK_STATE_CHECK_MARK) { \ |
1305 | | ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ |
1306 | | state_check_buff[x/8] |= (1<<(x%8)); \ |
1307 | | } |
1308 | | |
1309 | | # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ |
1310 | | STACK_ENSURE(1);\ |
1311 | | stk->type = (stack_type);\ |
1312 | | stk->u.state.pcode = (pat);\ |
1313 | | stk->u.state.pstr = (s);\ |
1314 | | stk->u.state.pstr_prev = (sprev);\ |
1315 | | stk->u.state.state_check = 0;\ |
1316 | | stk->u.state.pkeep = (keep);\ |
1317 | | STACK_INC;\ |
1318 | | } while(0) |
1319 | | |
1320 | | # define STACK_PUSH_ENSURED(stack_type,pat) do {\ |
1321 | | stk->type = (stack_type);\ |
1322 | | stk->u.state.pcode = (pat);\ |
1323 | | stk->u.state.state_check = 0;\ |
1324 | | STACK_INC;\ |
1325 | | } while(0) |
1326 | | |
1327 | | # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ |
1328 | | STACK_ENSURE(1);\ |
1329 | | stk->type = STK_ALT;\ |
1330 | | stk->u.state.pcode = (pat);\ |
1331 | | stk->u.state.pstr = (s);\ |
1332 | | stk->u.state.pstr_prev = (sprev);\ |
1333 | | stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ |
1334 | | stk->u.state.pkeep = (keep);\ |
1335 | | STACK_INC;\ |
1336 | | } while(0) |
1337 | | |
1338 | | # define STACK_PUSH_STATE_CHECK(s,snum) do {\ |
1339 | | if (state_check_buff != NULL) {\ |
1340 | | STACK_ENSURE(1);\ |
1341 | | stk->type = STK_STATE_CHECK_MARK;\ |
1342 | | stk->u.state.pstr = (s);\ |
1343 | | stk->u.state.state_check = (snum);\ |
1344 | | STACK_INC;\ |
1345 | | }\ |
1346 | | } while(0) |
1347 | | |
1348 | | #else /* USE_COMBINATION_EXPLOSION_CHECK */ |
1349 | | |
1350 | | # define ELSE_IF_STATE_CHECK_MARK(stk) |
1351 | | |
1352 | 1.80G | # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ |
1353 | 1.80G | STACK_ENSURE(1);\ |
1354 | 1.80G | stk->type = (stack_type);\ |
1355 | 1.80G | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1356 | 1.80G | stk->u.state.pcode = (pat);\ |
1357 | 1.80G | stk->u.state.pstr = (s);\ |
1358 | 1.80G | stk->u.state.pstr_prev = (sprev);\ |
1359 | 1.80G | stk->u.state.pkeep = (keep);\ |
1360 | 1.80G | STACK_INC;\ |
1361 | 1.80G | } while(0) |
1362 | | |
1363 | 7.67M | # define STACK_PUSH_ENSURED(stack_type,pat) do {\ |
1364 | 7.67M | stk->type = (stack_type);\ |
1365 | 7.67M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1366 | 7.67M | stk->u.state.pcode = (pat);\ |
1367 | 7.67M | STACK_INC;\ |
1368 | 7.67M | } while(0) |
1369 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
1370 | | |
1371 | 1.78G | #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep) |
1372 | 7.19M | #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep) |
1373 | 10.4M | #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep) |
1374 | 74.5M | #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT) |
1375 | 295M | #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) |
1376 | | #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ |
1377 | 9.59M | STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) |
1378 | | |
1379 | 102M | #define STACK_PUSH_REPEAT(id, pat) do {\ |
1380 | 102M | STACK_ENSURE(1);\ |
1381 | 102M | stk->type = STK_REPEAT;\ |
1382 | 102M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1383 | 102M | stk->u.repeat.num = (id);\ |
1384 | 102M | stk->u.repeat.pcode = (pat);\ |
1385 | 102M | stk->u.repeat.count = 0;\ |
1386 | 102M | STACK_INC;\ |
1387 | 102M | } while(0) |
1388 | | |
1389 | 176M | #define STACK_PUSH_REPEAT_INC(sindex) do {\ |
1390 | 176M | STACK_ENSURE(1);\ |
1391 | 176M | stk->type = STK_REPEAT_INC;\ |
1392 | 176M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1393 | 176M | stk->u.repeat_inc.si = (sindex);\ |
1394 | 176M | STACK_INC;\ |
1395 | 176M | } while(0) |
1396 | | |
1397 | 151M | #define STACK_PUSH_MEM_START(mnum, s) do {\ |
1398 | 151M | STACK_ENSURE(1);\ |
1399 | 151M | stk->type = STK_MEM_START;\ |
1400 | 151M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1401 | 151M | stk->u.mem.num = (mnum);\ |
1402 | 151M | stk->u.mem.pstr = (s);\ |
1403 | 151M | stk->u.mem.start = mem_start_stk[mnum];\ |
1404 | 151M | stk->u.mem.end = mem_end_stk[mnum];\ |
1405 | 151M | mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ |
1406 | 151M | mem_end_stk[mnum] = INVALID_STACK_INDEX;\ |
1407 | 151M | STACK_INC;\ |
1408 | 151M | } while(0) |
1409 | | |
1410 | 35.7M | #define STACK_PUSH_MEM_END(mnum, s) do {\ |
1411 | 35.7M | STACK_ENSURE(1);\ |
1412 | 35.7M | stk->type = STK_MEM_END;\ |
1413 | 35.7M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1414 | 35.7M | stk->u.mem.num = (mnum);\ |
1415 | 35.7M | stk->u.mem.pstr = (s);\ |
1416 | 35.7M | stk->u.mem.start = mem_start_stk[mnum];\ |
1417 | 35.7M | stk->u.mem.end = mem_end_stk[mnum];\ |
1418 | 35.7M | mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ |
1419 | 35.7M | STACK_INC;\ |
1420 | 35.7M | } while(0) |
1421 | | |
1422 | 29.4M | #define STACK_PUSH_MEM_END_MARK(mnum) do {\ |
1423 | 29.4M | STACK_ENSURE(1);\ |
1424 | 29.4M | stk->type = STK_MEM_END_MARK;\ |
1425 | 29.4M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1426 | 29.4M | stk->u.mem.num = (mnum);\ |
1427 | 29.4M | STACK_INC;\ |
1428 | 29.4M | } while(0) |
1429 | | |
1430 | 33.8M | #define STACK_GET_MEM_START(mnum, k) do {\ |
1431 | 33.8M | int level = 0;\ |
1432 | 33.8M | k = stk;\ |
1433 | 735M | while (k > stk_base) {\ |
1434 | 735M | k--;\ |
1435 | 735M | if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ |
1436 | 735M | && k->u.mem.num == (mnum)) {\ |
1437 | 64.6M | level++;\ |
1438 | 64.6M | }\ |
1439 | 735M | else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ |
1440 | 98.5M | if (level == 0) break;\ |
1441 | 98.5M | level--;\ |
1442 | 64.6M | }\ |
1443 | 735M | }\ |
1444 | 33.8M | } while(0) |
1445 | | |
1446 | | #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ |
1447 | | int level = 0;\ |
1448 | | while (k < stk) {\ |
1449 | | if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ |
1450 | | if (level == 0) (start) = k->u.mem.pstr;\ |
1451 | | level++;\ |
1452 | | }\ |
1453 | | else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ |
1454 | | level--;\ |
1455 | | if (level == 0) {\ |
1456 | | (end) = k->u.mem.pstr;\ |
1457 | | break;\ |
1458 | | }\ |
1459 | | }\ |
1460 | | k++;\ |
1461 | | }\ |
1462 | | } while(0) |
1463 | | |
1464 | 158M | #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ |
1465 | 158M | STACK_ENSURE(1);\ |
1466 | 158M | stk->type = STK_NULL_CHECK_START;\ |
1467 | 158M | stk->null_check = (OnigStackIndex)(stk - stk_base);\ |
1468 | 158M | stk->u.null_check.num = (cnum);\ |
1469 | 158M | stk->u.null_check.pstr = (s);\ |
1470 | 158M | STACK_INC;\ |
1471 | 158M | } while(0) |
1472 | | |
1473 | 5.53M | #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ |
1474 | 5.53M | STACK_ENSURE(1);\ |
1475 | 5.53M | stk->type = STK_NULL_CHECK_END;\ |
1476 | 5.53M | stk->null_check = (OnigStackIndex)(stk - stk_base);\ |
1477 | 5.53M | stk->u.null_check.num = (cnum);\ |
1478 | 5.53M | STACK_INC;\ |
1479 | 5.53M | } while(0) |
1480 | | |
1481 | 32.2M | #define STACK_PUSH_CALL_FRAME(pat) do {\ |
1482 | 32.2M | STACK_ENSURE(1);\ |
1483 | 32.2M | stk->type = STK_CALL_FRAME;\ |
1484 | 32.2M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1485 | 32.2M | stk->u.call_frame.ret_addr = (pat);\ |
1486 | 32.2M | STACK_INC;\ |
1487 | 32.2M | } while(0) |
1488 | | |
1489 | 12.0M | #define STACK_PUSH_RETURN do {\ |
1490 | 12.0M | STACK_ENSURE(1);\ |
1491 | 12.0M | stk->type = STK_RETURN;\ |
1492 | 12.0M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1493 | 12.0M | STACK_INC;\ |
1494 | 12.0M | } while(0) |
1495 | | |
1496 | 80.8M | #define STACK_PUSH_ABSENT_POS(start, end) do {\ |
1497 | 80.8M | STACK_ENSURE(1);\ |
1498 | 80.8M | stk->type = STK_ABSENT_POS;\ |
1499 | 80.8M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1500 | 80.8M | stk->u.absent_pos.abs_pstr = (start);\ |
1501 | 80.8M | stk->u.absent_pos.end_pstr = (end);\ |
1502 | 80.8M | STACK_INC;\ |
1503 | 80.8M | } while(0) |
1504 | | |
1505 | 85.0M | #define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\ |
1506 | 85.0M | STACK_ENSURE(1);\ |
1507 | 85.0M | stk->type = STK_MATCH_CACHE_POINT;\ |
1508 | 85.0M | stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ |
1509 | 85.0M | stk->u.match_cache_point.index = (match_cache_point_index);\ |
1510 | 85.0M | stk->u.match_cache_point.mask = (match_cache_point_mask);\ |
1511 | 85.0M | STACK_INC;\ |
1512 | 85.0M | } while(0) |
1513 | | |
1514 | | |
1515 | | #ifdef ONIG_DEBUG |
1516 | | # define STACK_BASE_CHECK(p, at) \ |
1517 | | if ((p) < stk_base) {\ |
1518 | | fprintf(stderr, "at %s\n", at);\ |
1519 | | goto stack_error;\ |
1520 | | } |
1521 | | #else |
1522 | | # define STACK_BASE_CHECK(p, at) |
1523 | | #endif |
1524 | | |
1525 | | #ifdef ONIG_DEBUG_MATCH_CACHE |
1526 | | # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask); |
1527 | | #else |
1528 | 82.4M | # define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0) |
1529 | | #endif |
1530 | | |
1531 | | #ifdef USE_MATCH_CACHE |
1532 | 723M | # define INC_NUM_FAILS msa->num_fails++ |
1533 | 1.30G | # define MEMOIZE_MATCH_CACHE_POINT do {\ |
1534 | 1.30G | if (stk->type == STK_MATCH_CACHE_POINT) {\ |
1535 | 54.5M | msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\ |
1536 | 54.5M | MATCH_CACHE_DEBUG_MEMOIZE(stk);\ |
1537 | 54.5M | }\ |
1538 | 1.30G | else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\ |
1539 | 9.95M | memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\ |
1540 | 9.95M | MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ |
1541 | 9.95M | }\ |
1542 | 1.30G | } while(0) |
1543 | 96.1M | # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\ |
1544 | 96.1M | if (stkp->type == STK_MATCH_CACHE_POINT) {\ |
1545 | 6.22M | stkp->type = STK_VOID;\ |
1546 | 6.22M | memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\ |
1547 | 6.22M | MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ |
1548 | 6.22M | }\ |
1549 | 96.1M | } while(0) |
1550 | 13.1M | # define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\ |
1551 | 13.1M | if (stk->type == STK_MATCH_CACHE_POINT) {\ |
1552 | 11.7M | memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\ |
1553 | 11.7M | MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ |
1554 | 11.7M | }\ |
1555 | 13.1M | } while(0) |
1556 | | #else |
1557 | | # define INC_NUM_FAILS ((void) 0) |
1558 | | # define MEMOIZE_MATCH_CACHE_POINT ((void) 0) |
1559 | | # define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0) |
1560 | | #endif |
1561 | | |
1562 | 0 | #define STACK_POP_ONE do {\ |
1563 | 0 | stk--;\ |
1564 | 0 | STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ |
1565 | 0 | } while(0) |
1566 | | |
1567 | 1.51G | #define STACK_POP do {\ |
1568 | 1.51G | switch (pop_level) {\ |
1569 | 519M | case STACK_POP_LEVEL_FREE:\ |
1570 | 709M | while (1) {\ |
1571 | 709M | stk--;\ |
1572 | 709M | STACK_BASE_CHECK(stk, "STACK_POP"); \ |
1573 | 709M | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
1574 | 709M | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1575 | 189M | MEMOIZE_MATCH_CACHE_POINT;\ |
1576 | 189M | }\ |
1577 | 519M | break;\ |
1578 | 331M | case STACK_POP_LEVEL_MEM_START:\ |
1579 | 740M | while (1) {\ |
1580 | 740M | stk--;\ |
1581 | 740M | STACK_BASE_CHECK(stk, "STACK_POP 2"); \ |
1582 | 740M | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
1583 | 740M | else if (stk->type == STK_MEM_START) {\ |
1584 | 85.4M | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1585 | 85.4M | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1586 | 85.4M | }\ |
1587 | 740M | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1588 | 409M | MEMOIZE_MATCH_CACHE_POINT;\ |
1589 | 409M | }\ |
1590 | 331M | break;\ |
1591 | 662M | default:\ |
1592 | 1.37G | while (1) {\ |
1593 | 1.37G | stk--;\ |
1594 | 1.37G | STACK_BASE_CHECK(stk, "STACK_POP 3"); \ |
1595 | 1.37G | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
1596 | 1.37G | else if (stk->type == STK_MEM_START) {\ |
1597 | 63.5M | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1598 | 63.5M | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1599 | 63.5M | }\ |
1600 | 709M | else if (stk->type == STK_REPEAT_INC) {\ |
1601 | 169M | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
1602 | 169M | }\ |
1603 | 646M | else if (stk->type == STK_MEM_END) {\ |
1604 | 34.8M | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1605 | 34.8M | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1606 | 34.8M | }\ |
1607 | 1.37G | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1608 | 709M | MEMOIZE_MATCH_CACHE_POINT;\ |
1609 | 709M | }\ |
1610 | 662M | break;\ |
1611 | 1.51G | }\ |
1612 | 1.51G | } while(0) |
1613 | | |
1614 | 700k | #define STACK_POP_TIL_POS_NOT do {\ |
1615 | 3.25M | while (1) {\ |
1616 | 3.25M | stk--;\ |
1617 | 3.25M | STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ |
1618 | 3.25M | if (stk->type == STK_POS_NOT) break;\ |
1619 | 3.25M | else if (stk->type == STK_MEM_START) {\ |
1620 | 927 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1621 | 927 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1622 | 927 | }\ |
1623 | 2.55M | else if (stk->type == STK_REPEAT_INC) {\ |
1624 | 7.97k | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
1625 | 7.97k | }\ |
1626 | 2.55M | else if (stk->type == STK_MEM_END) {\ |
1627 | 848 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1628 | 848 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1629 | 848 | }\ |
1630 | 2.54M | else if (IS_TO_VOID_TARGET(stk)) {\ |
1631 | 2.21M | INC_NUM_FAILS;\ |
1632 | 2.21M | }\ |
1633 | 3.25M | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1634 | 2.55M | MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\ |
1635 | 2.55M | }\ |
1636 | 700k | } while(0) |
1637 | | |
1638 | 4.65M | #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ |
1639 | 4.65M | while (1) {\ |
1640 | 4.65M | stk--;\ |
1641 | 4.65M | STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ |
1642 | 4.65M | if (stk->type == STK_LOOK_BEHIND_NOT) break;\ |
1643 | 4.65M | else if (stk->type == STK_MEM_START) {\ |
1644 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1645 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1646 | 0 | }\ |
1647 | 267 | else if (stk->type == STK_REPEAT_INC) {\ |
1648 | 0 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
1649 | 0 | }\ |
1650 | 267 | else if (stk->type == STK_MEM_END) {\ |
1651 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1652 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1653 | 0 | }\ |
1654 | 4.65M | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1655 | 267 | }\ |
1656 | 4.65M | } while(0) |
1657 | | |
1658 | 7.95M | #define STACK_POP_TIL_ABSENT do {\ |
1659 | 33.4M | while (1) {\ |
1660 | 33.4M | stk--;\ |
1661 | 33.4M | STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \ |
1662 | 33.4M | if (stk->type == STK_ABSENT) break;\ |
1663 | 33.4M | else if (stk->type == STK_MEM_START) {\ |
1664 | 11.0k | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1665 | 11.0k | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1666 | 11.0k | }\ |
1667 | 25.5M | else if (stk->type == STK_REPEAT_INC) {\ |
1668 | 7.10M | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
1669 | 7.10M | }\ |
1670 | 25.5M | else if (stk->type == STK_MEM_END) {\ |
1671 | 201 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
1672 | 201 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
1673 | 201 | }\ |
1674 | 33.4M | ELSE_IF_STATE_CHECK_MARK(stk);\ |
1675 | 25.5M | }\ |
1676 | 7.95M | } while(0) |
1677 | | |
1678 | 80.8M | #define STACK_POP_ABSENT_POS(start, end) do {\ |
1679 | 80.8M | stk--;\ |
1680 | 80.8M | STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \ |
1681 | 80.8M | (start) = stk->u.absent_pos.abs_pstr;\ |
1682 | 80.8M | (end) = stk->u.absent_pos.end_pstr;\ |
1683 | 80.8M | } while(0) |
1684 | | |
1685 | 6.71M | #define STACK_POS_END(k) do {\ |
1686 | 6.71M | k = stk;\ |
1687 | 100M | while (1) {\ |
1688 | 100M | k--;\ |
1689 | 100M | STACK_BASE_CHECK(k, "STACK_POS_END"); \ |
1690 | 100M | if (IS_TO_VOID_TARGET(k)) {\ |
1691 | 87.2M | INC_NUM_FAILS;\ |
1692 | 87.2M | k->type = STK_VOID;\ |
1693 | 87.2M | }\ |
1694 | 100M | else if (k->type == STK_POS) {\ |
1695 | 6.71M | k->type = STK_VOID;\ |
1696 | 6.71M | break;\ |
1697 | 6.71M | }\ |
1698 | 100M | MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\ |
1699 | 93.6M | }\ |
1700 | 6.71M | } while(0) |
1701 | | |
1702 | 210M | #define STACK_STOP_BT_END do {\ |
1703 | 210M | OnigStackType *k = stk;\ |
1704 | 770M | while (1) {\ |
1705 | 770M | k--;\ |
1706 | 770M | STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ |
1707 | 770M | if (IS_TO_VOID_TARGET(k)) {\ |
1708 | 232M | INC_NUM_FAILS;\ |
1709 | 232M | k->type = STK_VOID;\ |
1710 | 232M | }\ |
1711 | 770M | else if (k->type == STK_STOP_BT) {\ |
1712 | 210M | k->type = STK_VOID;\ |
1713 | 210M | break;\ |
1714 | 210M | }\ |
1715 | 538M | else if (k->type == STK_MATCH_CACHE_POINT) {\ |
1716 | 9.96M | k->type = STK_ATOMIC_MATCH_CACHE_POINT;\ |
1717 | 9.96M | }\ |
1718 | 770M | }\ |
1719 | 210M | } while(0) |
1720 | | |
1721 | 1.73M | #define STACK_STOP_BT_FAIL do {\ |
1722 | 14.8M | while (1) {\ |
1723 | 14.8M | stk--;\ |
1724 | 14.8M | STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \ |
1725 | 14.8M | if (stk->type == STK_STOP_BT) {\ |
1726 | 1.73M | stk->type = STK_VOID;\ |
1727 | 1.73M | break;\ |
1728 | 1.73M | }\ |
1729 | 14.8M | MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\ |
1730 | 13.1M | }\ |
1731 | 1.73M | } while(0) |
1732 | | |
1733 | 89.5M | #define STACK_NULL_CHECK(isnull,id,s) do {\ |
1734 | 89.5M | OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ |
1735 | 297M | while (1) {\ |
1736 | 297M | k--;\ |
1737 | 297M | STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ |
1738 | 297M | if (k->type == STK_NULL_CHECK_START) {\ |
1739 | 91.9M | if (k->u.null_check.num == (id)) {\ |
1740 | 89.5M | (isnull) = (k->u.null_check.pstr == (s));\ |
1741 | 89.5M | break;\ |
1742 | 89.5M | }\ |
1743 | 91.9M | }\ |
1744 | 297M | }\ |
1745 | 89.5M | } while(0) |
1746 | | |
1747 | | #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ |
1748 | | int level = 0;\ |
1749 | | OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ |
1750 | | while (1) {\ |
1751 | | k--;\ |
1752 | | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ |
1753 | | if (k->type == STK_NULL_CHECK_START) {\ |
1754 | | if (k->u.null_check.num == (id)) {\ |
1755 | | if (level == 0) {\ |
1756 | | (isnull) = (k->u.null_check.pstr == (s));\ |
1757 | | break;\ |
1758 | | }\ |
1759 | | else level--;\ |
1760 | | }\ |
1761 | | }\ |
1762 | | else if (k->type == STK_NULL_CHECK_END) {\ |
1763 | | level++;\ |
1764 | | }\ |
1765 | | }\ |
1766 | | } while(0) |
1767 | | |
1768 | 130M | #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ |
1769 | 130M | OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ |
1770 | 211M | while (1) {\ |
1771 | 211M | k--;\ |
1772 | 211M | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ |
1773 | 211M | if (k->type == STK_NULL_CHECK_START) {\ |
1774 | 144M | if (k->u.null_check.num == (id)) {\ |
1775 | 130M | if (k->u.null_check.pstr != (s)) {\ |
1776 | 28.7M | (isnull) = 0;\ |
1777 | 28.7M | break;\ |
1778 | 28.7M | }\ |
1779 | 130M | else {\ |
1780 | 102M | UChar* endp;\ |
1781 | 102M | (isnull) = 1;\ |
1782 | 640M | while (k < stk) {\ |
1783 | 572M | if (k->type == STK_MEM_START) {\ |
1784 | 101M | if (k->u.mem.end == INVALID_STACK_INDEX) {\ |
1785 | 5.41M | (isnull) = 0; break;\ |
1786 | 5.41M | }\ |
1787 | 101M | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ |
1788 | 95.8M | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ |
1789 | 95.8M | else\ |
1790 | 95.8M | endp = (UChar* )k->u.mem.end;\ |
1791 | 95.8M | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ |
1792 | 28.9M | (isnull) = 0; break;\ |
1793 | 28.9M | }\ |
1794 | 95.8M | else if (endp != s) {\ |
1795 | 2.31M | (isnull) = -1; /* empty, but position changed */ \ |
1796 | 2.31M | }\ |
1797 | 95.8M | }\ |
1798 | 572M | k++;\ |
1799 | 538M | }\ |
1800 | 102M | break;\ |
1801 | 102M | }\ |
1802 | 130M | }\ |
1803 | 144M | }\ |
1804 | 211M | }\ |
1805 | 130M | } while(0) |
1806 | | |
1807 | 9.67M | #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ |
1808 | 9.67M | int level = 0;\ |
1809 | 9.67M | OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ |
1810 | 172M | while (1) {\ |
1811 | 172M | k--;\ |
1812 | 172M | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ |
1813 | 172M | if (k->type == STK_NULL_CHECK_START) {\ |
1814 | 23.6M | if (k->u.null_check.num == (id)) {\ |
1815 | 22.3M | if (level == 0) {\ |
1816 | 9.67M | if (k->u.null_check.pstr != (s)) {\ |
1817 | 4.91M | (isnull) = 0;\ |
1818 | 4.91M | break;\ |
1819 | 4.91M | }\ |
1820 | 9.67M | else {\ |
1821 | 4.76M | UChar* endp;\ |
1822 | 4.76M | (isnull) = 1;\ |
1823 | 18.0M | while (k < stk) {\ |
1824 | 13.9M | if (k->type == STK_MEM_START) {\ |
1825 | 1.70M | if (k->u.mem.end == INVALID_STACK_INDEX) {\ |
1826 | 377k | (isnull) = 0; break;\ |
1827 | 377k | }\ |
1828 | 1.70M | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ |
1829 | 1.32M | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ |
1830 | 1.32M | else\ |
1831 | 1.32M | endp = (UChar* )k->u.mem.end;\ |
1832 | 1.32M | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ |
1833 | 244k | (isnull) = 0; break;\ |
1834 | 244k | }\ |
1835 | 1.32M | else if (endp != s) {\ |
1836 | 3.18k | (isnull) = -1; /* empty, but position changed */ \ |
1837 | 3.18k | }\ |
1838 | 1.32M | }\ |
1839 | 13.9M | k++;\ |
1840 | 13.2M | }\ |
1841 | 4.76M | break;\ |
1842 | 4.76M | }\ |
1843 | 9.67M | }\ |
1844 | 22.3M | else {\ |
1845 | 12.6M | level--;\ |
1846 | 12.6M | }\ |
1847 | 22.3M | }\ |
1848 | 23.6M | }\ |
1849 | 172M | else if (k->type == STK_NULL_CHECK_END) {\ |
1850 | 12.6M | if (k->u.null_check.num == (id)) level++;\ |
1851 | 12.6M | }\ |
1852 | 172M | }\ |
1853 | 9.67M | } while(0) |
1854 | | |
1855 | 62.0M | #define STACK_GET_REPEAT(id, k) do {\ |
1856 | 62.0M | int level = 0;\ |
1857 | 62.0M | k = stk;\ |
1858 | 1.21G | while (1) {\ |
1859 | 1.21G | k--;\ |
1860 | 1.21G | STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ |
1861 | 1.21G | if (k->type == STK_REPEAT) {\ |
1862 | 121M | if (level == 0) {\ |
1863 | 121M | if (k->u.repeat.num == (id)) {\ |
1864 | 62.0M | break;\ |
1865 | 62.0M | }\ |
1866 | 121M | }\ |
1867 | 121M | }\ |
1868 | 1.21G | else if (k->type == STK_CALL_FRAME) level--;\ |
1869 | 1.09G | else if (k->type == STK_RETURN) level++;\ |
1870 | 1.21G | }\ |
1871 | 62.0M | } while(0) |
1872 | | |
1873 | 12.0M | #define STACK_RETURN(addr) do {\ |
1874 | 12.0M | int level = 0;\ |
1875 | 12.0M | OnigStackType* k = stk;\ |
1876 | 477M | while (1) {\ |
1877 | 477M | k--;\ |
1878 | 477M | STACK_BASE_CHECK(k, "STACK_RETURN"); \ |
1879 | 477M | if (k->type == STK_CALL_FRAME) {\ |
1880 | 52.1M | if (level == 0) {\ |
1881 | 12.0M | (addr) = k->u.call_frame.ret_addr;\ |
1882 | 12.0M | break;\ |
1883 | 12.0M | }\ |
1884 | 52.1M | else level--;\ |
1885 | 52.1M | }\ |
1886 | 477M | else if (k->type == STK_RETURN)\ |
1887 | 424M | level++;\ |
1888 | 477M | }\ |
1889 | 12.0M | } while(0) |
1890 | | |
1891 | | |
1892 | 30.4M | #define STRING_CMP(s1,s2,len) do {\ |
1893 | 32.8M | while (len-- > 0) {\ |
1894 | 16.9M | if (*s1++ != *s2++) goto fail;\ |
1895 | 16.9M | }\ |
1896 | 30.4M | } while(0) |
1897 | | |
1898 | 4.19M | #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ |
1899 | 4.19M | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ |
1900 | 4.19M | goto fail; \ |
1901 | 4.19M | } while(0) |
1902 | | |
1903 | | static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, |
1904 | | UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) |
1905 | 8.86M | { |
1906 | 8.86M | UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1907 | 8.86M | UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1908 | 8.86M | UChar *p1, *p2, *end1, *s2; |
1909 | 8.86M | int len1, len2; |
1910 | | |
1911 | 8.86M | s2 = *ps2; |
1912 | 8.86M | end1 = s1 + mblen; |
1913 | 12.1M | while (s1 < end1) { |
1914 | 5.89M | len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); |
1915 | 5.89M | len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); |
1916 | 5.89M | if (len1 != len2) return 0; |
1917 | 5.89M | p1 = buf1; |
1918 | 5.89M | p2 = buf2; |
1919 | 9.14M | while (len1-- > 0) { |
1920 | 5.89M | if (*p1 != *p2) return 0; |
1921 | 3.25M | p1++; |
1922 | 3.25M | p2++; |
1923 | 3.25M | } |
1924 | 5.89M | } |
1925 | | |
1926 | 6.22M | *ps2 = s2; |
1927 | 6.22M | return 1; |
1928 | 8.86M | } |
1929 | | |
1930 | 1.55M | #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ |
1931 | 1.55M | is_fail = 0;\ |
1932 | 1.77M | while (len-- > 0) {\ |
1933 | 422k | if (*s1++ != *s2++) {\ |
1934 | 200k | is_fail = 1; break;\ |
1935 | 200k | }\ |
1936 | 422k | }\ |
1937 | 1.55M | } while(0) |
1938 | | |
1939 | 4.18M | #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ |
1940 | 4.18M | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ |
1941 | 4.18M | is_fail = 1; \ |
1942 | 4.18M | else \ |
1943 | 4.18M | is_fail = 0; \ |
1944 | 4.18M | } while(0) |
1945 | | |
1946 | | |
1947 | | #define IS_EMPTY_STR (str == end) |
1948 | 43.4M | #define ON_STR_BEGIN(s) ((s) == str) |
1949 | 25.5M | #define ON_STR_END(s) ((s) == end) |
1950 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
1951 | | # define DATA_ENSURE_CHECK1 (s < right_range) |
1952 | | # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) |
1953 | | # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail |
1954 | | # define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue |
1955 | | # define ABSENT_END_POS right_range |
1956 | | #else |
1957 | 978M | # define DATA_ENSURE_CHECK1 (s < end) |
1958 | 0 | # define DATA_ENSURE_CHECK(n) (s + (n) <= end) |
1959 | 2.77G | # define DATA_ENSURE(n) if (s + (n) > end) goto fail |
1960 | 5.74M | # define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue |
1961 | 171M | # define ABSENT_END_POS end |
1962 | | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ |
1963 | | |
1964 | | int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc); |
1965 | | |
1966 | | static inline int |
1967 | | enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e) |
1968 | 1.16G | { |
1969 | 1.16G | if (enc->max_enc_len == enc->min_enc_len) { |
1970 | 1.14G | return (p < e ? enc->min_enc_len : 0); |
1971 | 1.14G | } |
1972 | 23.6M | else { |
1973 | 23.6M | return onigenc_mbclen_approximate(p, e, enc); |
1974 | 23.6M | } |
1975 | 1.16G | } |
1976 | | |
1977 | | |
1978 | | #ifdef USE_CAPTURE_HISTORY |
1979 | | static int |
1980 | | make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, |
1981 | | OnigStackType* stk_top, UChar* str, regex_t* reg) |
1982 | | { |
1983 | | int n, r; |
1984 | | OnigCaptureTreeNode* child; |
1985 | | OnigStackType* k = *kp; |
1986 | | |
1987 | | while (k < stk_top) { |
1988 | | if (k->type == STK_MEM_START) { |
1989 | | n = k->u.mem.num; |
1990 | | if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && |
1991 | | BIT_STATUS_AT(reg->capture_history, n) != 0) { |
1992 | | child = history_node_new(); |
1993 | | CHECK_NULL_RETURN_MEMERR(child); |
1994 | | child->group = n; |
1995 | | child->beg = k->u.mem.pstr - str; |
1996 | | r = history_tree_add_child(node, child); |
1997 | | if (r != 0) { |
1998 | | history_tree_free(child); |
1999 | | return r; |
2000 | | } |
2001 | | *kp = (k + 1); |
2002 | | r = make_capture_history_tree(child, kp, stk_top, str, reg); |
2003 | | if (r != 0) return r; |
2004 | | |
2005 | | k = *kp; |
2006 | | child->end = k->u.mem.pstr - str; |
2007 | | } |
2008 | | } |
2009 | | else if (k->type == STK_MEM_END) { |
2010 | | if (k->u.mem.num == node->group) { |
2011 | | node->end = k->u.mem.pstr - str; |
2012 | | *kp = k; |
2013 | | return 0; |
2014 | | } |
2015 | | } |
2016 | | k++; |
2017 | | } |
2018 | | |
2019 | | return 1; /* 1: root node ending. */ |
2020 | | } |
2021 | | #endif /* USE_CAPTURE_HISTORY */ |
2022 | | |
2023 | | #ifdef USE_BACKREF_WITH_LEVEL |
2024 | | static int |
2025 | | mem_is_in_memp(int mem, int num, UChar* memp) |
2026 | 40.3M | { |
2027 | 40.3M | int i; |
2028 | 40.3M | MemNumType m; |
2029 | | |
2030 | 40.4M | for (i = 0; i < num; i++) { |
2031 | 40.3M | GET_MEMNUM_INC(m, memp); |
2032 | 40.3M | if (mem == (int )m) return 1; |
2033 | 40.3M | } |
2034 | 66.7k | return 0; |
2035 | 40.3M | } |
2036 | | |
2037 | | static int backref_match_at_nested_level(regex_t* reg, |
2038 | | OnigStackType* top, OnigStackType* stk_base, |
2039 | | int ignore_case, int case_fold_flag, |
2040 | | int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) |
2041 | 20.4M | { |
2042 | 20.4M | UChar *ss, *p, *pstart, *pend = NULL_UCHARP; |
2043 | 20.4M | int level; |
2044 | 20.4M | OnigStackType* k; |
2045 | | |
2046 | 20.4M | level = 0; |
2047 | 20.4M | k = top; |
2048 | 20.4M | k--; |
2049 | 189M | while (k >= stk_base) { |
2050 | 189M | if (k->type == STK_CALL_FRAME) { |
2051 | 11.6M | level--; |
2052 | 11.6M | } |
2053 | 177M | else if (k->type == STK_RETURN) { |
2054 | 1.96M | level++; |
2055 | 1.96M | } |
2056 | 175M | else if (level == nest) { |
2057 | 90.9M | if (k->type == STK_MEM_START) { |
2058 | 20.3M | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { |
2059 | 20.2M | pstart = k->u.mem.pstr; |
2060 | 20.2M | if (pend != NULL_UCHARP) { |
2061 | 20.0M | if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ |
2062 | 20.0M | p = pstart; |
2063 | 20.0M | ss = *s; |
2064 | | |
2065 | 20.0M | if (ignore_case != 0) { |
2066 | 483k | if (string_cmp_ic(reg->enc, case_fold_flag, |
2067 | 483k | pstart, &ss, pend - pstart, send) == 0) |
2068 | 11.0k | return 0; /* or goto next_mem; */ |
2069 | 483k | } |
2070 | 19.5M | else { |
2071 | 19.5M | while (p < pend) { |
2072 | 277k | if (*p++ != *ss++) return 0; /* or goto next_mem; */ |
2073 | 277k | } |
2074 | 19.5M | } |
2075 | | |
2076 | 19.7M | *s = ss; |
2077 | 19.7M | return 1; |
2078 | 20.0M | } |
2079 | 20.2M | } |
2080 | 20.3M | } |
2081 | 70.5M | else if (k->type == STK_MEM_END) { |
2082 | 20.0M | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { |
2083 | 20.0M | pend = k->u.mem.pstr; |
2084 | 20.0M | } |
2085 | 20.0M | } |
2086 | 90.9M | } |
2087 | 169M | k--; |
2088 | 169M | } |
2089 | | |
2090 | 422k | return 0; |
2091 | 20.4M | } |
2092 | | #endif /* USE_BACKREF_WITH_LEVEL */ |
2093 | | |
2094 | | |
2095 | | #ifdef ONIG_DEBUG_STATISTICS |
2096 | | |
2097 | | # ifdef _WIN32 |
2098 | | # include <windows.h> |
2099 | | static LARGE_INTEGER ts, te, freq; |
2100 | | # define GETTIME(t) QueryPerformanceCounter(&(t)) |
2101 | | # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \ |
2102 | | * 1000000 / freq.QuadPart) |
2103 | | # else /* _WIN32 */ |
2104 | | |
2105 | | # define USE_TIMEOFDAY |
2106 | | |
2107 | | # ifdef USE_TIMEOFDAY |
2108 | | # ifdef HAVE_SYS_TIME_H |
2109 | | # include <sys/time.h> |
2110 | | # endif |
2111 | | # ifdef HAVE_UNISTD_H |
2112 | | # include <unistd.h> |
2113 | | # endif |
2114 | | static struct timeval ts, te; |
2115 | | # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) |
2116 | | # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ |
2117 | | (((te).tv_sec - (ts).tv_sec)*1000000)) |
2118 | | # else /* USE_TIMEOFDAY */ |
2119 | | # ifdef HAVE_SYS_TIMES_H |
2120 | | # include <sys/times.h> |
2121 | | # endif |
2122 | | static struct tms ts, te; |
2123 | | # define GETTIME(t) times(&(t)) |
2124 | | # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) |
2125 | | # endif /* USE_TIMEOFDAY */ |
2126 | | |
2127 | | # endif /* _WIN32 */ |
2128 | | |
2129 | | static int OpCounter[256]; |
2130 | | static int OpPrevCounter[256]; |
2131 | | static unsigned long OpTime[256]; |
2132 | | static int OpCurr = OP_FINISH; |
2133 | | static int OpPrevTarget = OP_FAIL; |
2134 | | static int MaxStackDepth = 0; |
2135 | | |
2136 | | # define MOP_IN(opcode) do {\ |
2137 | | if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ |
2138 | | OpCurr = opcode;\ |
2139 | | OpCounter[opcode]++;\ |
2140 | | GETTIME(ts);\ |
2141 | | } while(0) |
2142 | | |
2143 | | # define MOP_OUT do {\ |
2144 | | GETTIME(te);\ |
2145 | | OpTime[OpCurr] += TIMEDIFF(te, ts);\ |
2146 | | } while(0) |
2147 | | |
2148 | | extern void |
2149 | | onig_statistics_init(void) |
2150 | | { |
2151 | | int i; |
2152 | | for (i = 0; i < 256; i++) { |
2153 | | OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; |
2154 | | } |
2155 | | MaxStackDepth = 0; |
2156 | | # ifdef _WIN32 |
2157 | | QueryPerformanceFrequency(&freq); |
2158 | | # endif |
2159 | | } |
2160 | | |
2161 | | extern void |
2162 | | onig_print_statistics(FILE* f) |
2163 | | { |
2164 | | int i; |
2165 | | fprintf(f, " count prev time\n"); |
2166 | | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { |
2167 | | fprintf(f, "%8d: %8d: %10lu: %s\n", |
2168 | | OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); |
2169 | | } |
2170 | | fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); |
2171 | | } |
2172 | | |
2173 | | # define STACK_INC do {\ |
2174 | | stk++;\ |
2175 | | if (stk - stk_base > MaxStackDepth) \ |
2176 | | MaxStackDepth = stk - stk_base;\ |
2177 | | } while(0) |
2178 | | |
2179 | | #else /* ONIG_DEBUG_STATISTICS */ |
2180 | 3.05G | # define STACK_INC stk++ |
2181 | | |
2182 | | # define MOP_IN(opcode) |
2183 | | # define MOP_OUT |
2184 | | #endif /* ONIG_DEBUG_STATISTICS */ |
2185 | | |
2186 | | |
2187 | | #ifdef ONIG_DEBUG_MATCH |
2188 | | static const char * |
2189 | | stack_type_str(int stack_type) |
2190 | | { |
2191 | | switch (stack_type) { |
2192 | | case STK_ALT: return "Alt "; |
2193 | | case STK_LOOK_BEHIND_NOT: return "LBNot "; |
2194 | | case STK_POS_NOT: return "PosNot"; |
2195 | | case STK_MEM_START: return "MemS "; |
2196 | | case STK_MEM_END: return "MemE "; |
2197 | | case STK_REPEAT_INC: return "RepInc"; |
2198 | | case STK_STATE_CHECK_MARK: return "StChMk"; |
2199 | | case STK_NULL_CHECK_START: return "NulChS"; |
2200 | | case STK_NULL_CHECK_END: return "NulChE"; |
2201 | | case STK_MEM_END_MARK: return "MemEMk"; |
2202 | | case STK_POS: return "Pos "; |
2203 | | case STK_STOP_BT: return "StopBt"; |
2204 | | case STK_REPEAT: return "Rep "; |
2205 | | case STK_CALL_FRAME: return "Call "; |
2206 | | case STK_RETURN: return "Ret "; |
2207 | | case STK_VOID: return "Void "; |
2208 | | case STK_ABSENT_POS: return "AbsPos"; |
2209 | | case STK_ABSENT: return "Absent"; |
2210 | | case STK_MATCH_CACHE_POINT: return "MCache"; |
2211 | | default: return " "; |
2212 | | } |
2213 | | } |
2214 | | #endif |
2215 | | #ifdef USE_MATCH_CACHE |
2216 | | |
2217 | | static long |
2218 | | bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p) |
2219 | 249M | { |
2220 | 249M | long l = 0, r = num_cache_opcodes - 1, m = 0; |
2221 | | |
2222 | 942M | while (l <= r) { |
2223 | 799M | m = (l + r) / 2; |
2224 | 799M | if (cache_opcodes[m].addr == p) break; |
2225 | 693M | if (cache_opcodes[m].addr < p) l = m + 1; |
2226 | 363M | else r = m - 1; |
2227 | 693M | } |
2228 | 249M | return m; |
2229 | 249M | } |
2230 | | |
2231 | | static long |
2232 | | find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr) |
2233 | 249M | { |
2234 | 249M | long m; |
2235 | 249M | const OnigCacheOpcode* cache_opcode; |
2236 | 249M | const OnigRepeatRange* range; |
2237 | 249M | const OnigStackType *stkp; |
2238 | 249M | int count = 0; |
2239 | 249M | int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG; |
2240 | 249M | long cache_point; |
2241 | 249M | long num_cache_points_at_outer_repeat; |
2242 | 249M | long num_cache_points_in_outer_repeat; |
2243 | | |
2244 | 249M | m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p); |
2245 | | |
2246 | 249M | if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) { |
2247 | 143M | return -1; |
2248 | 143M | } |
2249 | | |
2250 | 106M | cache_opcode = &cache_opcodes[m]; |
2251 | 106M | *cache_opcode_ptr = &cache_opcodes[m]; |
2252 | 106M | cache_point = cache_opcode->cache_point; |
2253 | 106M | if (cache_opcode->outer_repeat_mem == -1) { |
2254 | 97.2M | return cache_point; |
2255 | 97.2M | } |
2256 | | |
2257 | 8.87M | num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat; |
2258 | 8.87M | num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat; |
2259 | | |
2260 | 8.87M | range = ®->repeat_range[cache_opcode->outer_repeat_mem]; |
2261 | | |
2262 | 8.87M | stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]]; |
2263 | 8.87M | count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count; |
2264 | | |
2265 | 8.87M | if (count < range->lower) { |
2266 | 2.99M | return num_cache_points_at_outer_repeat + |
2267 | 2.99M | num_cache_points_in_outer_repeat * count + |
2268 | 2.99M | cache_point; |
2269 | 2.99M | } |
2270 | | |
2271 | 5.88M | if (range->upper == 0x7fffffff) { |
2272 | 1.20M | return num_cache_points_at_outer_repeat + |
2273 | 1.20M | num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) + |
2274 | 1.20M | cache_point; |
2275 | 1.20M | } |
2276 | | |
2277 | 4.68M | return num_cache_points_at_outer_repeat + |
2278 | 4.68M | num_cache_points_in_outer_repeat * (range->lower - 1) + |
2279 | 4.68M | (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) + |
2280 | 4.68M | cache_point; |
2281 | 5.88M | } |
2282 | | |
2283 | | static int |
2284 | | check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) |
2285 | 12.4M | { |
2286 | 12.4M | if (match_cache_point_mask & 0x80) { |
2287 | 1.02M | return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0; |
2288 | 1.02M | } |
2289 | 11.4M | else { |
2290 | 11.4M | return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0; |
2291 | 11.4M | } |
2292 | 12.4M | } |
2293 | | |
2294 | | static void |
2295 | | memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) |
2296 | 27.9M | { |
2297 | 27.9M | match_cache_buf[match_cache_point_index] |= match_cache_point_mask; |
2298 | 27.9M | if (match_cache_point_mask & 0x80) { |
2299 | 3.67M | match_cache_buf[match_cache_point_index + 1] |= 0x01; |
2300 | 3.67M | } |
2301 | 24.2M | else { |
2302 | 24.2M | match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1; |
2303 | 24.2M | } |
2304 | 27.9M | } |
2305 | | |
2306 | | #endif /* USE_MATCH_CACHE */ |
2307 | | |
2308 | | /* match data(str - end) from position (sstart). */ |
2309 | | /* if sstart == str then set sprev to NULL. */ |
2310 | | static OnigPosition |
2311 | | match_at(regex_t* reg, const UChar* str, const UChar* end, |
2312 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
2313 | | const UChar* right_range, |
2314 | | #endif |
2315 | | const UChar* sstart, UChar* sprev, OnigMatchArg* msa) |
2316 | 7.67M | { |
2317 | 7.67M | static const UChar FinishCode[] = { OP_FINISH }; |
2318 | | |
2319 | 7.67M | int i, num_mem, pop_level; |
2320 | 7.67M | ptrdiff_t n, best_len; |
2321 | 7.67M | LengthType tlen, tlen2; |
2322 | 7.67M | MemNumType mem; |
2323 | 7.67M | RelAddrType addr; |
2324 | 7.67M | OnigOptionType option = reg->options; |
2325 | 7.67M | OnigEncoding encode = reg->enc; |
2326 | 7.67M | OnigCaseFoldType case_fold_flag = reg->case_fold_flag; |
2327 | 7.67M | UChar *s, *q, *sbegin; |
2328 | 7.67M | UChar *p = reg->p; |
2329 | 7.67M | UChar *pbegin = p; |
2330 | 7.67M | UChar *pkeep; |
2331 | 7.67M | char *alloca_base; |
2332 | 7.67M | char *xmalloc_base = NULL; |
2333 | 7.67M | OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end; |
2334 | 7.67M | OnigStackType *stkp; /* used as any purpose. */ |
2335 | 7.67M | OnigStackIndex si; |
2336 | 7.67M | OnigStackIndex *repeat_stk; |
2337 | 7.67M | OnigStackIndex *mem_start_stk, *mem_end_stk; |
2338 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
2339 | | int scv; |
2340 | | unsigned char* state_check_buff = msa->state_check_buff; |
2341 | | int num_comb_exp_check = reg->num_comb_exp_check; |
2342 | | #endif |
2343 | | |
2344 | 7.67M | #if USE_TOKEN_THREADED_VM |
2345 | 7.67M | # define OP_OFFSET 1 |
2346 | 7.67M | # define VM_LOOP JUMP; |
2347 | 7.67M | # define VM_LOOP_END |
2348 | 5.92G | # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; |
2349 | 7.67M | # define DEFAULT L_DEFAULT: |
2350 | 545M | # define NEXT sprev = sbegin; JUMP |
2351 | 5.95G | # define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++]) |
2352 | | |
2353 | 7.67M | RB_GNUC_EXTENSION static const void *oplabels[] = { |
2354 | 7.67M | &&L_OP_FINISH, /* matching process terminator (no more alternative) */ |
2355 | 7.67M | &&L_OP_END, /* pattern code terminator (success end) */ |
2356 | | |
2357 | 7.67M | &&L_OP_EXACT1, /* single byte, N = 1 */ |
2358 | 7.67M | &&L_OP_EXACT2, /* single byte, N = 2 */ |
2359 | 7.67M | &&L_OP_EXACT3, /* single byte, N = 3 */ |
2360 | 7.67M | &&L_OP_EXACT4, /* single byte, N = 4 */ |
2361 | 7.67M | &&L_OP_EXACT5, /* single byte, N = 5 */ |
2362 | 7.67M | &&L_OP_EXACTN, /* single byte */ |
2363 | 7.67M | &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ |
2364 | 7.67M | &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ |
2365 | 7.67M | &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ |
2366 | 7.67M | &&L_OP_EXACTMB2N, /* mb-length = 2 */ |
2367 | 7.67M | &&L_OP_EXACTMB3N, /* mb-length = 3 */ |
2368 | 7.67M | &&L_OP_EXACTMBN, /* other length */ |
2369 | | |
2370 | 7.67M | &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */ |
2371 | 7.67M | &&L_OP_EXACTN_IC, /* single byte, ignore case */ |
2372 | | |
2373 | 7.67M | &&L_OP_CCLASS, |
2374 | 7.67M | &&L_OP_CCLASS_MB, |
2375 | 7.67M | &&L_OP_CCLASS_MIX, |
2376 | 7.67M | &&L_OP_CCLASS_NOT, |
2377 | 7.67M | &&L_OP_CCLASS_MB_NOT, |
2378 | 7.67M | &&L_OP_CCLASS_MIX_NOT, |
2379 | | |
2380 | 7.67M | &&L_OP_ANYCHAR, /* "." */ |
2381 | 7.67M | &&L_OP_ANYCHAR_ML, /* "." multi-line */ |
2382 | 7.67M | &&L_OP_ANYCHAR_STAR, /* ".*" */ |
2383 | 7.67M | &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ |
2384 | 7.67M | &&L_OP_ANYCHAR_STAR_PEEK_NEXT, |
2385 | 7.67M | &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT, |
2386 | | |
2387 | 7.67M | &&L_OP_WORD, |
2388 | 7.67M | &&L_OP_NOT_WORD, |
2389 | 7.67M | &&L_OP_WORD_BOUND, |
2390 | 7.67M | &&L_OP_NOT_WORD_BOUND, |
2391 | 7.67M | # ifdef USE_WORD_BEGIN_END |
2392 | 7.67M | &&L_OP_WORD_BEGIN, |
2393 | 7.67M | &&L_OP_WORD_END, |
2394 | | # else |
2395 | | &&L_DEFAULT, |
2396 | | &&L_DEFAULT, |
2397 | | # endif |
2398 | 7.67M | &&L_OP_ASCII_WORD, |
2399 | 7.67M | &&L_OP_NOT_ASCII_WORD, |
2400 | 7.67M | &&L_OP_ASCII_WORD_BOUND, |
2401 | 7.67M | &&L_OP_NOT_ASCII_WORD_BOUND, |
2402 | 7.67M | # ifdef USE_WORD_BEGIN_END |
2403 | 7.67M | &&L_OP_ASCII_WORD_BEGIN, |
2404 | 7.67M | &&L_OP_ASCII_WORD_END, |
2405 | | # else |
2406 | | &&L_DEFAULT, |
2407 | | &&L_DEFAULT, |
2408 | | # endif |
2409 | | |
2410 | 7.67M | &&L_OP_BEGIN_BUF, |
2411 | 7.67M | &&L_OP_END_BUF, |
2412 | 7.67M | &&L_OP_BEGIN_LINE, |
2413 | 7.67M | &&L_OP_END_LINE, |
2414 | 7.67M | &&L_OP_SEMI_END_BUF, |
2415 | 7.67M | &&L_OP_BEGIN_POSITION, |
2416 | | |
2417 | 7.67M | &&L_OP_BACKREF1, |
2418 | 7.67M | &&L_OP_BACKREF2, |
2419 | 7.67M | &&L_OP_BACKREFN, |
2420 | 7.67M | &&L_OP_BACKREFN_IC, |
2421 | 7.67M | &&L_OP_BACKREF_MULTI, |
2422 | 7.67M | &&L_OP_BACKREF_MULTI_IC, |
2423 | 7.67M | # ifdef USE_BACKREF_WITH_LEVEL |
2424 | 7.67M | &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ |
2425 | | # else |
2426 | | &&L_DEFAULT, |
2427 | | # endif |
2428 | 7.67M | &&L_OP_MEMORY_START, |
2429 | 7.67M | &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */ |
2430 | 7.67M | &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */ |
2431 | 7.67M | # ifdef USE_SUBEXP_CALL |
2432 | 7.67M | &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ |
2433 | | # else |
2434 | | &&L_DEFAULT, |
2435 | | # endif |
2436 | 7.67M | &&L_OP_MEMORY_END, |
2437 | 7.67M | # ifdef USE_SUBEXP_CALL |
2438 | 7.67M | &&L_OP_MEMORY_END_REC, /* push marker to stack */ |
2439 | | # else |
2440 | | &&L_DEFAULT, |
2441 | | # endif |
2442 | | |
2443 | 7.67M | &&L_OP_KEEP, |
2444 | | |
2445 | 7.67M | &&L_OP_FAIL, /* pop stack and move */ |
2446 | 7.67M | &&L_OP_JUMP, |
2447 | 7.67M | &&L_OP_PUSH, |
2448 | 7.67M | &&L_OP_POP, |
2449 | | # ifdef USE_OP_PUSH_OR_JUMP_EXACT |
2450 | | &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ |
2451 | | # else |
2452 | 7.67M | &&L_DEFAULT, |
2453 | 7.67M | # endif |
2454 | 7.67M | &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ |
2455 | 7.67M | &&L_OP_REPEAT, /* {n,m} */ |
2456 | 7.67M | &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */ |
2457 | 7.67M | &&L_OP_REPEAT_INC, |
2458 | 7.67M | &&L_OP_REPEAT_INC_NG, /* non greedy */ |
2459 | 7.67M | &&L_OP_REPEAT_INC_SG, /* search and get in stack */ |
2460 | 7.67M | &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ |
2461 | 7.67M | &&L_OP_NULL_CHECK_START, /* null loop checker start */ |
2462 | 7.67M | &&L_OP_NULL_CHECK_END, /* null loop checker end */ |
2463 | 7.67M | # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
2464 | 7.67M | &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ |
2465 | | # else |
2466 | | &&L_DEFAULT, |
2467 | | # endif |
2468 | 7.67M | # ifdef USE_SUBEXP_CALL |
2469 | 7.67M | &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ |
2470 | | # else |
2471 | | &&L_DEFAULT, |
2472 | | # endif |
2473 | | |
2474 | 7.67M | &&L_OP_PUSH_POS, /* (?=...) start */ |
2475 | 7.67M | &&L_OP_POP_POS, /* (?=...) end */ |
2476 | 7.67M | &&L_OP_PUSH_POS_NOT, /* (?!...) start */ |
2477 | 7.67M | &&L_OP_FAIL_POS, /* (?!...) end */ |
2478 | 7.67M | &&L_OP_PUSH_STOP_BT, /* (?>...) start */ |
2479 | 7.67M | &&L_OP_POP_STOP_BT, /* (?>...) end */ |
2480 | 7.67M | &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ |
2481 | 7.67M | &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ |
2482 | 7.67M | &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ |
2483 | 7.67M | &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */ |
2484 | 7.67M | &&L_OP_ABSENT, /* (?~...) start of inner loop */ |
2485 | 7.67M | &&L_OP_ABSENT_END, /* (?~...) end */ |
2486 | | |
2487 | 7.67M | # ifdef USE_SUBEXP_CALL |
2488 | 7.67M | &&L_OP_CALL, /* \g<name> */ |
2489 | 7.67M | &&L_OP_RETURN, |
2490 | | # else |
2491 | | &&L_DEFAULT, |
2492 | | &&L_DEFAULT, |
2493 | | # endif |
2494 | 7.67M | &&L_OP_CONDITION, |
2495 | | |
2496 | | # ifdef USE_COMBINATION_EXPLOSION_CHECK |
2497 | | &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */ |
2498 | | &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ |
2499 | | &&L_OP_STATE_CHECK, /* check only */ |
2500 | | # else |
2501 | 7.67M | &&L_DEFAULT, |
2502 | 7.67M | &&L_DEFAULT, |
2503 | 7.67M | &&L_DEFAULT, |
2504 | 7.67M | # endif |
2505 | | # ifdef USE_COMBINATION_EXPLOSION_CHECK |
2506 | | &&L_OP_STATE_CHECK_ANYCHAR_STAR, |
2507 | | &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR, |
2508 | | # else |
2509 | 7.67M | &&L_DEFAULT, |
2510 | 7.67M | &&L_DEFAULT, |
2511 | 7.67M | # endif |
2512 | | /* no need: IS_DYNAMIC_OPTION() == 0 */ |
2513 | | # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ |
2514 | | &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */ |
2515 | | &&L_OP_SET_OPTION /* set option */ |
2516 | | # else |
2517 | 7.67M | &&L_DEFAULT, |
2518 | 7.67M | &&L_DEFAULT |
2519 | 7.67M | # endif |
2520 | 7.67M | }; |
2521 | | #else /* USE_TOKEN_THREADED_VM */ |
2522 | | |
2523 | | # define OP_OFFSET 0 |
2524 | | # define VM_LOOP \ |
2525 | | while (1) { \ |
2526 | | OPCODE_EXEC_HOOK; \ |
2527 | | pbegin = p; \ |
2528 | | sbegin = s; \ |
2529 | | switch (*p++) { |
2530 | | # define VM_LOOP_END } sprev = sbegin; } |
2531 | | # define CASE(x) case x: |
2532 | | # define DEFAULT default: |
2533 | | # define NEXT break |
2534 | | # define JUMP continue; break |
2535 | | #endif /* USE_TOKEN_THREADED_VM */ |
2536 | | |
2537 | | |
2538 | 7.67M | #ifdef USE_SUBEXP_CALL |
2539 | | /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, |
2540 | | etc. Additional space is required. */ |
2541 | 15.3M | # define ADD_NUMMEM 1 |
2542 | | #else |
2543 | | /* Stack #0 not is used. */ |
2544 | | # define ADD_NUMMEM 0 |
2545 | | #endif |
2546 | | |
2547 | 7.67M | n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2; |
2548 | | |
2549 | 7.67M | STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE); |
2550 | 7.67M | pop_level = reg->stack_pop_level; |
2551 | 7.67M | num_mem = reg->num_mem; |
2552 | 7.67M | repeat_stk = (OnigStackIndex* )alloca_base; |
2553 | | |
2554 | 7.67M | mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); |
2555 | 7.67M | mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM); |
2556 | 7.67M | { |
2557 | 7.67M | OnigStackIndex *pp = mem_start_stk; |
2558 | 33.5M | for (; pp < repeat_stk + n; pp += 2) { |
2559 | 25.8M | pp[0] = INVALID_STACK_INDEX; |
2560 | 25.8M | pp[1] = INVALID_STACK_INDEX; |
2561 | 25.8M | } |
2562 | 7.67M | } |
2563 | | #ifndef USE_SUBEXP_CALL |
2564 | | mem_start_stk--; /* for index start from 1, |
2565 | | mem_start_stk[1]..mem_start_stk[num_mem] */ |
2566 | | mem_end_stk--; /* for index start from 1, |
2567 | | mem_end_stk[1]..mem_end_stk[num_mem] */ |
2568 | | #endif |
2569 | | |
2570 | | #ifdef ONIG_DEBUG_MATCH |
2571 | | fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n", |
2572 | | (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev); |
2573 | | fprintf(stderr, "size: %d, start offset: %d\n", |
2574 | | (int )(end - str), (int )(sstart - str)); |
2575 | | fprintf(stderr, "\n ofs> str stk:type addr:opcode\n"); |
2576 | | #endif |
2577 | | |
2578 | 7.67M | STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */ |
2579 | 7.67M | best_len = ONIG_MISMATCH; |
2580 | 7.67M | s = (UChar* )sstart; |
2581 | 7.67M | pkeep = (UChar* )sstart; |
2582 | | |
2583 | | |
2584 | | #ifdef ONIG_DEBUG_MATCH |
2585 | | # define OPCODE_EXEC_HOOK \ |
2586 | | if (s) { \ |
2587 | | UChar *op, *q, *bp, buf[50]; \ |
2588 | | int len; \ |
2589 | | op = p - OP_OFFSET; \ |
2590 | | fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \ |
2591 | | bp = buf; \ |
2592 | | q = s; \ |
2593 | | if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \ |
2594 | | for (i = 0; i < 7 && q < end; i++) { \ |
2595 | | len = enclen(encode, q, end); \ |
2596 | | while (len-- > 0) *bp++ = *q++; \ |
2597 | | } \ |
2598 | | if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \ |
2599 | | } \ |
2600 | | xmemcpy(bp, "\"", 1); bp += 1; \ |
2601 | | *bp = 0; \ |
2602 | | fputs((char* )buf, stderr); \ |
2603 | | for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \ |
2604 | | fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \ |
2605 | | stk - stk_base - 1, \ |
2606 | | (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ |
2607 | | (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ |
2608 | | onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \ |
2609 | | fprintf(stderr, "\n"); \ |
2610 | | } |
2611 | | #else |
2612 | 4.55G | # define OPCODE_EXEC_HOOK ((void) 0) |
2613 | 7.67M | #endif |
2614 | | |
2615 | 7.67M | #ifdef USE_MATCH_CACHE |
2616 | | #ifdef ONIG_DEBUG_MATCH_CACHE |
2617 | | #define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask) |
2618 | | #define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n") |
2619 | | #else |
2620 | 106M | #define MATCH_CACHE_DEBUG ((void) 0) |
2621 | 23.3M | #define MATCH_CACHE_DEBUG_HIT ((void) 0) |
2622 | 7.67M | #endif |
2623 | | |
2624 | 23.3M | #define MATCH_CACHE_HIT ((void) 0) |
2625 | | |
2626 | 2.50G | # define CHECK_MATCH_CACHE do {\ |
2627 | 2.50G | if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\ |
2628 | 249M | const OnigCacheOpcode *cache_opcode;\ |
2629 | 249M | long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\ |
2630 | 249M | if (cache_point >= 0) {\ |
2631 | 106M | long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\ |
2632 | 106M | long match_cache_point_index = match_cache_point >> 3;\ |
2633 | 106M | uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\ |
2634 | 106M | MATCH_CACHE_DEBUG;\ |
2635 | 106M | if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\ |
2636 | 23.3M | MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\ |
2637 | 23.3M | if (cache_opcode->lookaround_nesting == 0) goto fail;\ |
2638 | 23.3M | else if (cache_opcode->lookaround_nesting < 0) {\ |
2639 | 3.62M | if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ |
2640 | 1.73M | STACK_STOP_BT_FAIL;\ |
2641 | 1.73M | goto fail;\ |
2642 | 1.73M | }\ |
2643 | 3.62M | else goto fail;\ |
2644 | 3.62M | }\ |
2645 | 12.4M | else {\ |
2646 | 8.87M | if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ |
2647 | 2.34M | p = cache_opcode->match_addr;\ |
2648 | 2.34M | MOP_OUT;\ |
2649 | 2.34M | JUMP;\ |
2650 | 2.34M | }\ |
2651 | 8.87M | else goto fail;\ |
2652 | 8.87M | }\ |
2653 | 23.3M | }\ |
2654 | 106M | STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\ |
2655 | 85.0M | }\ |
2656 | 249M | }\ |
2657 | 2.50G | } while (0) |
2658 | | #else |
2659 | | # define CHECK_MATCH_CACHE ((void) 0) |
2660 | | #endif |
2661 | | |
2662 | 7.67M | VM_LOOP { |
2663 | 7.67M | CASE(OP_END) MOP_IN(OP_END); |
2664 | 1.16M | n = s - sstart; |
2665 | 1.16M | if (n > best_len) { |
2666 | 1.16M | OnigRegion* region; |
2667 | 1.16M | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
2668 | 1.16M | if (IS_FIND_LONGEST(option)) { |
2669 | 0 | if (n > msa->best_len) { |
2670 | 0 | msa->best_len = n; |
2671 | 0 | msa->best_s = (UChar* )sstart; |
2672 | 0 | } |
2673 | 0 | else |
2674 | 0 | goto end_best_len; |
2675 | 0 | } |
2676 | 1.16M | #endif |
2677 | 1.16M | best_len = n; |
2678 | 1.16M | region = msa->region; |
2679 | 1.16M | if (region) { |
2680 | 1.16M | region->beg[0] = ((pkeep > s) ? s : pkeep) - str; |
2681 | 1.16M | region->end[0] = s - str; |
2682 | 11.8M | for (i = 1; i <= num_mem; i++) { |
2683 | 10.6M | if (mem_end_stk[i] != INVALID_STACK_INDEX) { |
2684 | 1.02M | if (BIT_STATUS_AT(reg->bt_mem_start, i)) |
2685 | 423k | region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; |
2686 | 597k | else |
2687 | 597k | region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; |
2688 | | |
2689 | 1.02M | region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) |
2690 | 1.02M | ? STACK_AT(mem_end_stk[i])->u.mem.pstr |
2691 | 1.02M | : (UChar* )((void* )mem_end_stk[i])) - str; |
2692 | 1.02M | } |
2693 | 9.65M | else { |
2694 | 9.65M | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; |
2695 | 9.65M | } |
2696 | 10.6M | } |
2697 | | |
2698 | | #ifdef USE_CAPTURE_HISTORY |
2699 | | if (reg->capture_history != 0) { |
2700 | | int r; |
2701 | | OnigCaptureTreeNode* node; |
2702 | | |
2703 | | if (IS_NULL(region->history_root)) { |
2704 | | region->history_root = node = history_node_new(); |
2705 | | CHECK_NULL_RETURN_MEMERR(node); |
2706 | | } |
2707 | | else { |
2708 | | node = region->history_root; |
2709 | | history_tree_clear(node); |
2710 | | } |
2711 | | |
2712 | | node->group = 0; |
2713 | | node->beg = ((pkeep > s) ? s : pkeep) - str; |
2714 | | node->end = s - str; |
2715 | | |
2716 | | stkp = stk_base; |
2717 | | r = make_capture_history_tree(region->history_root, &stkp, |
2718 | | stk, (UChar* )str, reg); |
2719 | | if (r < 0) { |
2720 | | best_len = r; /* error code */ |
2721 | | goto finish; |
2722 | | } |
2723 | | } |
2724 | | #endif /* USE_CAPTURE_HISTORY */ |
2725 | 1.16M | } /* if (region) */ |
2726 | 1.16M | } /* n > best_len */ |
2727 | | |
2728 | 1.16M | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
2729 | 1.16M | end_best_len: |
2730 | 1.16M | #endif |
2731 | 1.16M | MOP_OUT; |
2732 | | |
2733 | 1.16M | if (IS_FIND_CONDITION(option)) { |
2734 | 0 | if (IS_FIND_NOT_EMPTY(option) && s == sstart) { |
2735 | 0 | best_len = ONIG_MISMATCH; |
2736 | 0 | goto fail; /* for retry */ |
2737 | 0 | } |
2738 | 0 | if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { |
2739 | 0 | goto fail; /* for retry */ |
2740 | 0 | } |
2741 | 0 | } |
2742 | | |
2743 | | /* default behavior: return first-matching result. */ |
2744 | 1.16M | goto finish; |
2745 | | |
2746 | 412M | CASE(OP_EXACT1) MOP_IN(OP_EXACT1); |
2747 | 412M | DATA_ENSURE(1); |
2748 | 408M | if (*p != *s) goto fail; |
2749 | 29.4M | p++; s++; |
2750 | 29.4M | MOP_OUT; |
2751 | 29.4M | NEXT; |
2752 | | |
2753 | 36.5M | CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC); |
2754 | 36.5M | { |
2755 | 36.5M | int len; |
2756 | 36.5M | UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
2757 | | |
2758 | 36.5M | DATA_ENSURE(1); |
2759 | 34.4M | len = ONIGENC_MBC_CASE_FOLD(encode, |
2760 | | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ |
2761 | 34.4M | case_fold_flag, |
2762 | 34.4M | &s, end, lowbuf); |
2763 | 34.4M | DATA_ENSURE(0); |
2764 | 34.4M | q = lowbuf; |
2765 | 35.5M | while (len-- > 0) { |
2766 | 34.4M | if (*p != *q) { |
2767 | 33.4M | goto fail; |
2768 | 33.4M | } |
2769 | 1.05M | p++; q++; |
2770 | 1.05M | } |
2771 | 34.4M | } |
2772 | 1.05M | MOP_OUT; |
2773 | 1.05M | NEXT; |
2774 | | |
2775 | 210M | CASE(OP_EXACT2) MOP_IN(OP_EXACT2); |
2776 | 210M | DATA_ENSURE(2); |
2777 | 201M | if (*p != *s) goto fail; |
2778 | 18.5M | p++; s++; |
2779 | 18.5M | if (*p != *s) goto fail; |
2780 | 224k | sprev = s; |
2781 | 224k | p++; s++; |
2782 | 224k | MOP_OUT; |
2783 | 224k | JUMP; |
2784 | | |
2785 | 45.4M | CASE(OP_EXACT3) MOP_IN(OP_EXACT3); |
2786 | 45.4M | DATA_ENSURE(3); |
2787 | 44.6M | if (*p != *s) goto fail; |
2788 | 852k | p++; s++; |
2789 | 852k | if (*p != *s) goto fail; |
2790 | 590k | p++; s++; |
2791 | 590k | if (*p != *s) goto fail; |
2792 | 361k | sprev = s; |
2793 | 361k | p++; s++; |
2794 | 361k | MOP_OUT; |
2795 | 361k | JUMP; |
2796 | | |
2797 | 29.4M | CASE(OP_EXACT4) MOP_IN(OP_EXACT4); |
2798 | 29.4M | DATA_ENSURE(4); |
2799 | 28.3M | if (*p != *s) goto fail; |
2800 | 1.69M | p++; s++; |
2801 | 1.69M | if (*p != *s) goto fail; |
2802 | 300k | p++; s++; |
2803 | 300k | if (*p != *s) goto fail; |
2804 | 198k | p++; s++; |
2805 | 198k | if (*p != *s) goto fail; |
2806 | 143k | sprev = s; |
2807 | 143k | p++; s++; |
2808 | 143k | MOP_OUT; |
2809 | 143k | JUMP; |
2810 | | |
2811 | 44.5M | CASE(OP_EXACT5) MOP_IN(OP_EXACT5); |
2812 | 44.5M | DATA_ENSURE(5); |
2813 | 44.0M | if (*p != *s) goto fail; |
2814 | 835k | p++; s++; |
2815 | 835k | if (*p != *s) goto fail; |
2816 | 120k | p++; s++; |
2817 | 120k | if (*p != *s) goto fail; |
2818 | 79.4k | p++; s++; |
2819 | 79.4k | if (*p != *s) goto fail; |
2820 | 47.8k | p++; s++; |
2821 | 47.8k | if (*p != *s) goto fail; |
2822 | 45.0k | sprev = s; |
2823 | 45.0k | p++; s++; |
2824 | 45.0k | MOP_OUT; |
2825 | 45.0k | JUMP; |
2826 | | |
2827 | 238M | CASE(OP_EXACTN) MOP_IN(OP_EXACTN); |
2828 | 238M | GET_LENGTH_INC(tlen, p); |
2829 | 238M | DATA_ENSURE(tlen); |
2830 | 268M | while (tlen-- > 0) { |
2831 | 268M | if (*p++ != *s++) goto fail; |
2832 | 268M | } |
2833 | 260k | sprev = s - 1; |
2834 | 260k | MOP_OUT; |
2835 | 260k | JUMP; |
2836 | | |
2837 | 72.3M | CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC); |
2838 | 72.3M | { |
2839 | 72.3M | int len; |
2840 | 72.3M | UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
2841 | | |
2842 | 72.3M | GET_LENGTH_INC(tlen, p); |
2843 | 72.3M | endp = p + tlen; |
2844 | | |
2845 | 76.3M | while (p < endp) { |
2846 | 76.1M | sprev = s; |
2847 | 76.1M | DATA_ENSURE(1); |
2848 | 74.0M | len = ONIGENC_MBC_CASE_FOLD(encode, |
2849 | | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ |
2850 | 74.0M | case_fold_flag, |
2851 | 74.0M | &s, end, lowbuf); |
2852 | 74.0M | DATA_ENSURE(0); |
2853 | 74.0M | q = lowbuf; |
2854 | 78.0M | while (len-- > 0) { |
2855 | 74.0M | if (*p != *q) goto fail; |
2856 | 4.00M | p++; q++; |
2857 | 4.00M | } |
2858 | 74.0M | } |
2859 | 72.3M | } |
2860 | | |
2861 | 112k | MOP_OUT; |
2862 | 112k | JUMP; |
2863 | | |
2864 | 112k | CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1); |
2865 | 106k | DATA_ENSURE(2); |
2866 | 106k | if (*p != *s) goto fail; |
2867 | 0 | p++; s++; |
2868 | 0 | if (*p != *s) goto fail; |
2869 | 0 | p++; s++; |
2870 | 0 | MOP_OUT; |
2871 | 0 | NEXT; |
2872 | |
|
2873 | 13.9k | CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2); |
2874 | 13.9k | DATA_ENSURE(4); |
2875 | 13.6k | if (*p != *s) goto fail; |
2876 | 0 | p++; s++; |
2877 | 0 | if (*p != *s) goto fail; |
2878 | 0 | p++; s++; |
2879 | 0 | sprev = s; |
2880 | 0 | if (*p != *s) goto fail; |
2881 | 0 | p++; s++; |
2882 | 0 | if (*p != *s) goto fail; |
2883 | 0 | p++; s++; |
2884 | 0 | MOP_OUT; |
2885 | 0 | JUMP; |
2886 | |
|
2887 | 105k | CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3); |
2888 | 105k | DATA_ENSURE(6); |
2889 | 105k | if (*p != *s) goto fail; |
2890 | 0 | p++; s++; |
2891 | 0 | if (*p != *s) goto fail; |
2892 | 0 | p++; s++; |
2893 | 0 | if (*p != *s) goto fail; |
2894 | 0 | p++; s++; |
2895 | 0 | if (*p != *s) goto fail; |
2896 | 0 | p++; s++; |
2897 | 0 | sprev = s; |
2898 | 0 | if (*p != *s) goto fail; |
2899 | 0 | p++; s++; |
2900 | 0 | if (*p != *s) goto fail; |
2901 | 0 | p++; s++; |
2902 | 0 | MOP_OUT; |
2903 | 0 | JUMP; |
2904 | |
|
2905 | 16.9k | CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N); |
2906 | 16.9k | GET_LENGTH_INC(tlen, p); |
2907 | 16.9k | DATA_ENSURE(tlen * 2); |
2908 | 16.5k | while (tlen-- > 0) { |
2909 | 16.5k | if (*p != *s) goto fail; |
2910 | 0 | p++; s++; |
2911 | 0 | if (*p != *s) goto fail; |
2912 | 0 | p++; s++; |
2913 | 0 | } |
2914 | 0 | sprev = s - 2; |
2915 | 0 | MOP_OUT; |
2916 | 0 | JUMP; |
2917 | |
|
2918 | 65.7k | CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N); |
2919 | 65.7k | GET_LENGTH_INC(tlen, p); |
2920 | 65.7k | DATA_ENSURE(tlen * 3); |
2921 | 65.1k | while (tlen-- > 0) { |
2922 | 65.1k | if (*p != *s) goto fail; |
2923 | 0 | p++; s++; |
2924 | 0 | if (*p != *s) goto fail; |
2925 | 0 | p++; s++; |
2926 | 0 | if (*p != *s) goto fail; |
2927 | 0 | p++; s++; |
2928 | 0 | } |
2929 | 0 | sprev = s - 3; |
2930 | 0 | MOP_OUT; |
2931 | 0 | JUMP; |
2932 | |
|
2933 | 32.9k | CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN); |
2934 | 32.9k | GET_LENGTH_INC(tlen, p); /* mb-len */ |
2935 | 32.9k | GET_LENGTH_INC(tlen2, p); /* string len */ |
2936 | 32.9k | tlen2 *= tlen; |
2937 | 32.9k | DATA_ENSURE(tlen2); |
2938 | 32.8k | while (tlen2-- > 0) { |
2939 | 32.8k | if (*p != *s) goto fail; |
2940 | 0 | p++; s++; |
2941 | 0 | } |
2942 | 0 | sprev = s - tlen; |
2943 | 0 | MOP_OUT; |
2944 | 0 | JUMP; |
2945 | |
|
2946 | 10.0M | CASE(OP_CCLASS) MOP_IN(OP_CCLASS); |
2947 | 10.0M | DATA_ENSURE(1); |
2948 | 9.65M | if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; |
2949 | 593k | p += SIZE_BITSET; |
2950 | 593k | s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ |
2951 | 593k | MOP_OUT; |
2952 | 593k | NEXT; |
2953 | | |
2954 | 2.14M | CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB); |
2955 | 2.14M | if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; |
2956 | | |
2957 | 126k | cclass_mb: |
2958 | 126k | GET_LENGTH_INC(tlen, p); |
2959 | 126k | { |
2960 | 126k | OnigCodePoint code; |
2961 | 126k | UChar *ss; |
2962 | 126k | int mb_len; |
2963 | | |
2964 | 126k | DATA_ENSURE(1); |
2965 | 0 | mb_len = enclen_approx(encode, s, end); |
2966 | 0 | DATA_ENSURE(mb_len); |
2967 | 0 | ss = s; |
2968 | 0 | s += mb_len; |
2969 | 0 | code = ONIGENC_MBC_TO_CODE(encode, ss, s); |
2970 | |
|
2971 | 0 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS |
2972 | 0 | if (! onig_is_in_code_range(p, code)) goto fail; |
2973 | | #else |
2974 | | q = p; |
2975 | | ALIGNMENT_RIGHT(q); |
2976 | | if (! onig_is_in_code_range(q, code)) goto fail; |
2977 | | #endif |
2978 | 0 | } |
2979 | 0 | p += tlen; |
2980 | 0 | MOP_OUT; |
2981 | 0 | NEXT; |
2982 | |
|
2983 | 431k | CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX); |
2984 | 431k | DATA_ENSURE(1); |
2985 | 406k | if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
2986 | 0 | p += SIZE_BITSET; |
2987 | 0 | goto cclass_mb; |
2988 | 0 | } |
2989 | 406k | else { |
2990 | 406k | if (BITSET_AT(((BitSetRef )p), *s) == 0) |
2991 | 179k | goto fail; |
2992 | | |
2993 | 226k | p += SIZE_BITSET; |
2994 | 226k | GET_LENGTH_INC(tlen, p); |
2995 | 226k | p += tlen; |
2996 | 226k | s++; |
2997 | 226k | } |
2998 | 226k | MOP_OUT; |
2999 | 226k | NEXT; |
3000 | | |
3001 | 86.6M | CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT); |
3002 | 86.6M | DATA_ENSURE(1); |
3003 | 64.2M | if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; |
3004 | 57.8M | p += SIZE_BITSET; |
3005 | 57.8M | s += enclen(encode, s, end); |
3006 | 57.8M | MOP_OUT; |
3007 | 57.8M | NEXT; |
3008 | | |
3009 | 57.8M | CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT); |
3010 | 1.34M | DATA_ENSURE(1); |
3011 | 1.33M | if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
3012 | 1.33M | s++; |
3013 | 1.33M | GET_LENGTH_INC(tlen, p); |
3014 | 1.33M | p += tlen; |
3015 | 1.33M | goto cc_mb_not_success; |
3016 | 1.33M | } |
3017 | | |
3018 | 0 | cclass_mb_not: |
3019 | 0 | GET_LENGTH_INC(tlen, p); |
3020 | 0 | { |
3021 | 0 | OnigCodePoint code; |
3022 | 0 | UChar *ss; |
3023 | 0 | int mb_len = enclen(encode, s, end); |
3024 | |
|
3025 | 0 | if (! DATA_ENSURE_CHECK(mb_len)) { |
3026 | 0 | DATA_ENSURE(1); |
3027 | 0 | s = (UChar* )end; |
3028 | 0 | p += tlen; |
3029 | 0 | goto cc_mb_not_success; |
3030 | 0 | } |
3031 | | |
3032 | 0 | ss = s; |
3033 | 0 | s += mb_len; |
3034 | 0 | code = ONIGENC_MBC_TO_CODE(encode, ss, s); |
3035 | |
|
3036 | 0 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS |
3037 | 0 | if (onig_is_in_code_range(p, code)) goto fail; |
3038 | | #else |
3039 | | q = p; |
3040 | | ALIGNMENT_RIGHT(q); |
3041 | | if (onig_is_in_code_range(q, code)) goto fail; |
3042 | | #endif |
3043 | 0 | } |
3044 | 0 | p += tlen; |
3045 | |
|
3046 | 1.33M | cc_mb_not_success: |
3047 | 1.33M | MOP_OUT; |
3048 | 1.33M | NEXT; |
3049 | | |
3050 | 486M | CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT); |
3051 | 486M | DATA_ENSURE(1); |
3052 | 243M | if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
3053 | 0 | p += SIZE_BITSET; |
3054 | 0 | goto cclass_mb_not; |
3055 | 0 | } |
3056 | 243M | else { |
3057 | 243M | if (BITSET_AT(((BitSetRef )p), *s) != 0) |
3058 | 51.7k | goto fail; |
3059 | | |
3060 | 243M | p += SIZE_BITSET; |
3061 | 243M | GET_LENGTH_INC(tlen, p); |
3062 | 243M | p += tlen; |
3063 | 243M | s++; |
3064 | 243M | } |
3065 | 243M | MOP_OUT; |
3066 | 243M | NEXT; |
3067 | | |
3068 | 243M | CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); |
3069 | 57.6M | DATA_ENSURE(1); |
3070 | 57.4M | n = enclen_approx(encode, s, end); |
3071 | 57.4M | DATA_ENSURE(n); |
3072 | 57.4M | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
3073 | 52.9M | s += n; |
3074 | 52.9M | MOP_OUT; |
3075 | 52.9M | NEXT; |
3076 | | |
3077 | 136M | CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML); |
3078 | 136M | DATA_ENSURE(1); |
3079 | 130M | n = enclen_approx(encode, s, end); |
3080 | 130M | DATA_ENSURE(n); |
3081 | 130M | s += n; |
3082 | 130M | MOP_OUT; |
3083 | 130M | NEXT; |
3084 | | |
3085 | 130M | CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); |
3086 | 300M | while (DATA_ENSURE_CHECK1) { |
3087 | 300M | CHECK_MATCH_CACHE; |
3088 | 295M | STACK_PUSH_ALT(p, s, sprev, pkeep); |
3089 | 295M | n = enclen_approx(encode, s, end); |
3090 | 295M | DATA_ENSURE(n); |
3091 | 295M | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
3092 | 290M | sprev = s; |
3093 | 290M | s += n; |
3094 | 290M | } |
3095 | 867k | MOP_OUT; |
3096 | 867k | JUMP; |
3097 | | |
3098 | 2.28M | CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); |
3099 | 147M | while (DATA_ENSURE_CHECK1) { |
3100 | 146M | CHECK_MATCH_CACHE; |
3101 | 145M | STACK_PUSH_ALT(p, s, sprev, pkeep); |
3102 | 145M | n = enclen_approx(encode, s, end); |
3103 | 145M | if (n > 1) { |
3104 | 0 | DATA_ENSURE(n); |
3105 | 0 | sprev = s; |
3106 | 0 | s += n; |
3107 | 0 | } |
3108 | 145M | else { |
3109 | 145M | sprev = s; |
3110 | 145M | s++; |
3111 | 145M | } |
3112 | 145M | } |
3113 | 573k | MOP_OUT; |
3114 | 573k | JUMP; |
3115 | | |
3116 | 8.77M | CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); |
3117 | 265M | while (DATA_ENSURE_CHECK1) { |
3118 | 261M | CHECK_MATCH_CACHE; |
3119 | 257M | if (*p == *s) { |
3120 | 8.90M | STACK_PUSH_ALT(p + 1, s, sprev, pkeep); |
3121 | 248M | } else { |
3122 | 248M | #ifdef USE_MATCH_CACHE |
3123 | | /* We need to increment num_fails here, for invoking a cache optimization correctly. */ |
3124 | | /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/ |
3125 | 248M | msa->num_fails++; |
3126 | 248M | #endif |
3127 | 248M | } |
3128 | 257M | n = enclen_approx(encode, s, end); |
3129 | 257M | DATA_ENSURE(n); |
3130 | 257M | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
3131 | 256M | sprev = s; |
3132 | 256M | s += n; |
3133 | 256M | } |
3134 | 4.41M | p++; |
3135 | 4.41M | MOP_OUT; |
3136 | 4.41M | NEXT; |
3137 | | |
3138 | 5.33M | CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); |
3139 | 264M | while (DATA_ENSURE_CHECK1) { |
3140 | 262M | CHECK_MATCH_CACHE; |
3141 | 258M | if (*p == *s) { |
3142 | 2.60M | STACK_PUSH_ALT(p + 1, s, sprev, pkeep); |
3143 | 256M | } else { |
3144 | 256M | #ifdef USE_MATCH_CACHE |
3145 | | /* We need to increment num_fails here, for invoking a cache optimization correctly. */ |
3146 | | /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/ |
3147 | 256M | msa->num_fails++; |
3148 | 256M | #endif |
3149 | 256M | } |
3150 | 258M | n = enclen_approx(encode, s, end); |
3151 | 258M | if (n > 1) { |
3152 | 0 | DATA_ENSURE(n); |
3153 | 0 | sprev = s; |
3154 | 0 | s += n; |
3155 | 0 | } |
3156 | 258M | else { |
3157 | 258M | sprev = s; |
3158 | 258M | s++; |
3159 | 258M | } |
3160 | 258M | } |
3161 | 1.70M | p++; |
3162 | 1.70M | MOP_OUT; |
3163 | 1.70M | NEXT; |
3164 | | |
3165 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3166 | | CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); |
3167 | | GET_STATE_CHECK_NUM_INC(mem, p); |
3168 | | while (DATA_ENSURE_CHECK1) { |
3169 | | STATE_CHECK_VAL(scv, mem); |
3170 | | if (scv) goto fail; |
3171 | | |
3172 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); |
3173 | | n = enclen_approx(encode, s, end); |
3174 | | DATA_ENSURE(n); |
3175 | | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
3176 | | sprev = s; |
3177 | | s += n; |
3178 | | } |
3179 | | MOP_OUT; |
3180 | | NEXT; |
3181 | | |
3182 | | CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR) |
3183 | | MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); |
3184 | | |
3185 | | GET_STATE_CHECK_NUM_INC(mem, p); |
3186 | | while (DATA_ENSURE_CHECK1) { |
3187 | | STATE_CHECK_VAL(scv, mem); |
3188 | | if (scv) goto fail; |
3189 | | |
3190 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); |
3191 | | n = enclen_approx(encode, s, end); |
3192 | | if (n > 1) { |
3193 | | DATA_ENSURE(n); |
3194 | | sprev = s; |
3195 | | s += n; |
3196 | | } |
3197 | | else { |
3198 | | sprev = s; |
3199 | | s++; |
3200 | | } |
3201 | | } |
3202 | | MOP_OUT; |
3203 | | NEXT; |
3204 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
3205 | | |
3206 | 4.10M | CASE(OP_WORD) MOP_IN(OP_WORD); |
3207 | 4.10M | DATA_ENSURE(1); |
3208 | 4.10M | if (! ONIGENC_IS_MBC_WORD(encode, s, end)) |
3209 | 101k | goto fail; |
3210 | | |
3211 | 4.00M | s += enclen(encode, s, end); |
3212 | 4.00M | MOP_OUT; |
3213 | 4.00M | NEXT; |
3214 | | |
3215 | 13.1M | CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD); |
3216 | 13.1M | DATA_ENSURE(1); |
3217 | 13.1M | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
3218 | 809k | goto fail; |
3219 | | |
3220 | 12.3M | s += enclen(encode, s, end); |
3221 | 12.3M | MOP_OUT; |
3222 | 12.3M | NEXT; |
3223 | | |
3224 | 12.3M | CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD); |
3225 | 39.7k | DATA_ENSURE(1); |
3226 | 39.6k | if (ONIGENC_IS_MBC_WORD(encode, s, end)) |
3227 | 3.58k | goto fail; |
3228 | | |
3229 | 36.0k | s += enclen(encode, s, end); |
3230 | 36.0k | MOP_OUT; |
3231 | 36.0k | NEXT; |
3232 | | |
3233 | 2.94M | CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD); |
3234 | 2.94M | DATA_ENSURE(1); |
3235 | 2.93M | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
3236 | 512k | goto fail; |
3237 | | |
3238 | 2.42M | s += enclen(encode, s, end); |
3239 | 2.42M | MOP_OUT; |
3240 | 2.42M | NEXT; |
3241 | | |
3242 | 3.73M | CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND); |
3243 | 3.73M | if (ON_STR_BEGIN(s)) { |
3244 | 4.82k | DATA_ENSURE(1); |
3245 | 2.78k | if (! ONIGENC_IS_MBC_WORD(encode, s, end)) |
3246 | 1.45k | goto fail; |
3247 | 2.78k | } |
3248 | 3.72M | else if (ON_STR_END(s)) { |
3249 | 97.6k | if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
3250 | 94.0k | goto fail; |
3251 | 97.6k | } |
3252 | 3.62M | else { |
3253 | 3.62M | if (ONIGENC_IS_MBC_WORD(encode, s, end) |
3254 | 3.62M | == ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
3255 | 1.21M | goto fail; |
3256 | 3.62M | } |
3257 | 2.42M | MOP_OUT; |
3258 | 2.42M | JUMP; |
3259 | | |
3260 | 8.96M | CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND); |
3261 | 8.96M | if (ON_STR_BEGIN(s)) { |
3262 | 4.88k | DATA_ENSURE(1); |
3263 | 3.84k | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
3264 | 2.23k | goto fail; |
3265 | 3.84k | } |
3266 | 8.95M | else if (ON_STR_END(s)) { |
3267 | 3.17k | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
3268 | 2.19k | goto fail; |
3269 | 3.17k | } |
3270 | 8.95M | else { |
3271 | 8.95M | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) |
3272 | 8.95M | == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
3273 | 1.86M | goto fail; |
3274 | 8.95M | } |
3275 | 7.09M | MOP_OUT; |
3276 | 7.09M | JUMP; |
3277 | | |
3278 | 7.09M | CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND); |
3279 | 4.03M | if (ON_STR_BEGIN(s)) { |
3280 | 9.91k | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) |
3281 | 1.00k | goto fail; |
3282 | 9.91k | } |
3283 | 4.02M | else if (ON_STR_END(s)) { |
3284 | 6.47k | if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
3285 | 4.30k | goto fail; |
3286 | 6.47k | } |
3287 | 4.02M | else { |
3288 | 4.02M | if (ONIGENC_IS_MBC_WORD(encode, s, end) |
3289 | 4.02M | != ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
3290 | 693k | goto fail; |
3291 | 4.02M | } |
3292 | 3.34M | MOP_OUT; |
3293 | 3.34M | JUMP; |
3294 | | |
3295 | 7.77M | CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND); |
3296 | 7.77M | if (ON_STR_BEGIN(s)) { |
3297 | 11.3k | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
3298 | 1.20k | goto fail; |
3299 | 11.3k | } |
3300 | 7.76M | else if (ON_STR_END(s)) { |
3301 | 550k | if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
3302 | 539k | goto fail; |
3303 | 550k | } |
3304 | 7.21M | else { |
3305 | 7.21M | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) |
3306 | 7.21M | != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
3307 | 1.52M | goto fail; |
3308 | 7.21M | } |
3309 | 5.71M | MOP_OUT; |
3310 | 5.71M | JUMP; |
3311 | | |
3312 | 5.71M | #ifdef USE_WORD_BEGIN_END |
3313 | 5.71M | CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN); |
3314 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { |
3315 | 0 | if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { |
3316 | 0 | MOP_OUT; |
3317 | 0 | JUMP; |
3318 | 0 | } |
3319 | 0 | } |
3320 | 0 | goto fail; |
3321 | | |
3322 | 0 | CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); |
3323 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { |
3324 | 0 | if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { |
3325 | 0 | MOP_OUT; |
3326 | 0 | JUMP; |
3327 | 0 | } |
3328 | 0 | } |
3329 | 0 | goto fail; |
3330 | | |
3331 | 0 | CASE(OP_WORD_END) MOP_IN(OP_WORD_END); |
3332 | 0 | if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { |
3333 | 0 | if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { |
3334 | 0 | MOP_OUT; |
3335 | 0 | JUMP; |
3336 | 0 | } |
3337 | 0 | } |
3338 | 0 | goto fail; |
3339 | | |
3340 | 0 | CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); |
3341 | 0 | if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { |
3342 | 0 | if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { |
3343 | 0 | MOP_OUT; |
3344 | 0 | JUMP; |
3345 | 0 | } |
3346 | 0 | } |
3347 | 0 | goto fail; |
3348 | 0 | #endif |
3349 | | |
3350 | 7.57M | CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF); |
3351 | 7.57M | if (! ON_STR_BEGIN(s)) goto fail; |
3352 | 2.35k | if (IS_NOTBOS(msa->options)) goto fail; |
3353 | | |
3354 | 2.35k | MOP_OUT; |
3355 | 2.35k | JUMP; |
3356 | | |
3357 | 74.3k | CASE(OP_END_BUF) MOP_IN(OP_END_BUF); |
3358 | 74.3k | if (! ON_STR_END(s)) goto fail; |
3359 | 369 | if (IS_NOTEOS(msa->options)) goto fail; |
3360 | | |
3361 | 369 | MOP_OUT; |
3362 | 369 | JUMP; |
3363 | | |
3364 | 11.2M | CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE); |
3365 | 11.2M | if (ON_STR_BEGIN(s)) { |
3366 | 2.65k | if (IS_NOTBOL(msa->options)) goto fail; |
3367 | 2.65k | MOP_OUT; |
3368 | 2.65k | JUMP; |
3369 | 2.65k | } |
3370 | 11.2M | else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) |
3371 | 35.3k | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
3372 | 35.3k | && !(IS_NEWLINE_CRLF(option) |
3373 | 0 | && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) |
3374 | 35.3k | #endif |
3375 | 35.3k | && !ON_STR_END(s)) { |
3376 | 35.2k | MOP_OUT; |
3377 | 35.2k | JUMP; |
3378 | 35.2k | } |
3379 | 11.2M | goto fail; |
3380 | | |
3381 | 11.2M | CASE(OP_END_LINE) MOP_IN(OP_END_LINE); |
3382 | 427k | if (ON_STR_END(s)) { |
3383 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3384 | | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { |
3385 | | #endif |
3386 | 5.09k | if (IS_NOTEOL(msa->options)) goto fail; |
3387 | 5.09k | MOP_OUT; |
3388 | 5.09k | JUMP; |
3389 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3390 | | } |
3391 | | #endif |
3392 | 5.09k | } |
3393 | 422k | else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { |
3394 | 247 | MOP_OUT; |
3395 | 247 | JUMP; |
3396 | 247 | } |
3397 | 427k | goto fail; |
3398 | | |
3399 | 427k | CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); |
3400 | 390k | if (ON_STR_END(s)) { |
3401 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3402 | | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { |
3403 | | #endif |
3404 | 3.00k | if (IS_NOTEOL(msa->options)) goto fail; |
3405 | 3.00k | MOP_OUT; |
3406 | 3.00k | JUMP; |
3407 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3408 | | } |
3409 | | #endif |
3410 | 3.00k | } |
3411 | 387k | else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { |
3412 | 125k | UChar* ss = s + enclen(encode, s, end); |
3413 | 125k | if (ON_STR_END(ss)) { |
3414 | 294 | MOP_OUT; |
3415 | 294 | JUMP; |
3416 | 294 | } |
3417 | 125k | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
3418 | 125k | else if (IS_NEWLINE_CRLF(option) |
3419 | 0 | && ONIGENC_IS_MBC_CRNL(encode, s, end)) { |
3420 | 0 | ss += enclen(encode, ss, end); |
3421 | 0 | if (ON_STR_END(ss)) { |
3422 | 0 | MOP_OUT; |
3423 | 0 | JUMP; |
3424 | 0 | } |
3425 | 0 | } |
3426 | 125k | #endif |
3427 | 125k | } |
3428 | 390k | goto fail; |
3429 | | |
3430 | 390k | CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); |
3431 | 346k | if (s != msa->gpos) |
3432 | 313k | goto fail; |
3433 | | |
3434 | 32.8k | MOP_OUT; |
3435 | 32.8k | JUMP; |
3436 | | |
3437 | 151M | CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH); |
3438 | 151M | GET_MEMNUM_INC(mem, p); |
3439 | 151M | STACK_PUSH_MEM_START(mem, s); |
3440 | 151M | MOP_OUT; |
3441 | 151M | JUMP; |
3442 | | |
3443 | 151M | CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START); |
3444 | 1.10M | GET_MEMNUM_INC(mem, p); |
3445 | 1.10M | mem_start_stk[mem] = (OnigStackIndex )((void* )s); |
3446 | 1.10M | mem_end_stk[mem] = INVALID_STACK_INDEX; |
3447 | 1.10M | MOP_OUT; |
3448 | 1.10M | JUMP; |
3449 | | |
3450 | 31.3M | CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH); |
3451 | 31.3M | GET_MEMNUM_INC(mem, p); |
3452 | 31.3M | STACK_PUSH_MEM_END(mem, s); |
3453 | 31.3M | MOP_OUT; |
3454 | 31.3M | JUMP; |
3455 | | |
3456 | 125M | CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END); |
3457 | 125M | GET_MEMNUM_INC(mem, p); |
3458 | 125M | mem_end_stk[mem] = (OnigStackIndex )((void* )s); |
3459 | 125M | MOP_OUT; |
3460 | 125M | JUMP; |
3461 | | |
3462 | 125M | CASE(OP_KEEP) MOP_IN(OP_KEEP); |
3463 | 183k | pkeep = s; |
3464 | 183k | MOP_OUT; |
3465 | 183k | JUMP; |
3466 | | |
3467 | 183k | #ifdef USE_SUBEXP_CALL |
3468 | 4.37M | CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC); |
3469 | 4.37M | GET_MEMNUM_INC(mem, p); |
3470 | 4.37M | STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ |
3471 | 4.37M | mem_start_stk[mem] = GET_STACK_INDEX(stkp); |
3472 | 4.37M | STACK_PUSH_MEM_END(mem, s); |
3473 | 4.37M | MOP_OUT; |
3474 | 4.37M | JUMP; |
3475 | | |
3476 | 29.4M | CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC); |
3477 | 29.4M | GET_MEMNUM_INC(mem, p); |
3478 | 29.4M | mem_end_stk[mem] = (OnigStackIndex )((void* )s); |
3479 | 29.4M | STACK_GET_MEM_START(mem, stkp); |
3480 | | |
3481 | 29.4M | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
3482 | 29.4M | mem_start_stk[mem] = GET_STACK_INDEX(stkp); |
3483 | 0 | else |
3484 | 0 | mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); |
3485 | | |
3486 | 29.4M | STACK_PUSH_MEM_END_MARK(mem); |
3487 | 29.4M | MOP_OUT; |
3488 | 29.4M | JUMP; |
3489 | 29.4M | #endif |
3490 | | |
3491 | 31.0M | CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1); |
3492 | 31.0M | mem = 1; |
3493 | 31.0M | goto backref; |
3494 | | |
3495 | 9.74M | CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2); |
3496 | 9.74M | mem = 2; |
3497 | 9.74M | goto backref; |
3498 | | |
3499 | 50.3M | CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN); |
3500 | 50.3M | GET_MEMNUM_INC(mem, p); |
3501 | 91.1M | backref: |
3502 | 91.1M | { |
3503 | 91.1M | int len; |
3504 | 91.1M | UChar *pstart, *pend; |
3505 | | |
3506 | | /* if you want to remove following line, |
3507 | | you should check in parse and compile time. */ |
3508 | 91.1M | if (mem > num_mem) goto fail; |
3509 | 91.1M | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; |
3510 | 30.5M | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; |
3511 | | |
3512 | 30.5M | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
3513 | 30.5M | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
3514 | 0 | else |
3515 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
3516 | | |
3517 | 30.5M | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
3518 | 30.5M | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
3519 | 30.5M | : (UChar* )((void* )mem_end_stk[mem])); |
3520 | 30.5M | n = pend - pstart; |
3521 | 30.5M | DATA_ENSURE(n); |
3522 | 30.4M | sprev = s; |
3523 | 30.4M | STRING_CMP(pstart, s, n); |
3524 | 15.8M | while (sprev + (len = enclen_approx(encode, sprev, end)) < s) |
3525 | 569 | sprev += len; |
3526 | | |
3527 | 15.8M | MOP_OUT; |
3528 | 15.8M | JUMP; |
3529 | 15.8M | } |
3530 | | |
3531 | 4.22M | CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC); |
3532 | 4.22M | GET_MEMNUM_INC(mem, p); |
3533 | 4.22M | { |
3534 | 4.22M | int len; |
3535 | 4.22M | UChar *pstart, *pend; |
3536 | | |
3537 | | /* if you want to remove following line, |
3538 | | you should check in parse and compile time. */ |
3539 | 4.22M | if (mem > num_mem) goto fail; |
3540 | 4.22M | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; |
3541 | 4.19M | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; |
3542 | | |
3543 | 4.19M | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
3544 | 4.19M | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
3545 | 0 | else |
3546 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
3547 | | |
3548 | 4.19M | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
3549 | 4.19M | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
3550 | 4.19M | : (UChar* )((void* )mem_end_stk[mem])); |
3551 | 4.19M | n = pend - pstart; |
3552 | 4.19M | DATA_ENSURE(n); |
3553 | 4.19M | sprev = s; |
3554 | 4.19M | STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); |
3555 | 2.29M | while (sprev + (len = enclen_approx(encode, sprev, end)) < s) |
3556 | 16.6k | sprev += len; |
3557 | | |
3558 | 2.28M | MOP_OUT; |
3559 | 2.28M | JUMP; |
3560 | 2.28M | } |
3561 | 2.28M | NEXT; |
3562 | | |
3563 | 4.75M | CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI); |
3564 | 4.75M | { |
3565 | 4.75M | int len, is_fail; |
3566 | 4.75M | UChar *pstart, *pend, *swork; |
3567 | | |
3568 | 4.75M | GET_LENGTH_INC(tlen, p); |
3569 | 12.0M | for (i = 0; i < tlen; i++) { |
3570 | 8.68M | GET_MEMNUM_INC(mem, p); |
3571 | | |
3572 | 8.68M | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; |
3573 | 1.56M | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; |
3574 | | |
3575 | 1.56M | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
3576 | 1.56M | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
3577 | 0 | else |
3578 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
3579 | | |
3580 | 1.56M | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
3581 | 1.56M | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
3582 | 1.56M | : (UChar* )((void* )mem_end_stk[mem])); |
3583 | 1.56M | n = pend - pstart; |
3584 | 1.56M | DATA_ENSURE_CONTINUE(n); |
3585 | 1.55M | sprev = s; |
3586 | 1.55M | swork = s; |
3587 | 1.55M | STRING_CMP_VALUE(pstart, swork, n, is_fail); |
3588 | 1.55M | if (is_fail) continue; |
3589 | 1.35M | s = swork; |
3590 | 1.35M | while (sprev + (len = enclen_approx(encode, sprev, end)) < s) |
3591 | 3.43k | sprev += len; |
3592 | | |
3593 | 1.35M | p += (SIZE_MEMNUM * (tlen - i - 1)); |
3594 | 1.35M | break; /* success */ |
3595 | 1.55M | } |
3596 | 4.75M | if (i == tlen) goto fail; |
3597 | 1.35M | MOP_OUT; |
3598 | 1.35M | JUMP; |
3599 | 1.35M | } |
3600 | 1.35M | NEXT; |
3601 | | |
3602 | 6.61M | CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC); |
3603 | 6.61M | { |
3604 | 6.61M | int len, is_fail; |
3605 | 6.61M | UChar *pstart, *pend, *swork; |
3606 | | |
3607 | 6.61M | GET_LENGTH_INC(tlen, p); |
3608 | 19.4M | for (i = 0; i < tlen; i++) { |
3609 | 16.3M | GET_MEMNUM_INC(mem, p); |
3610 | | |
3611 | 16.3M | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; |
3612 | 4.18M | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; |
3613 | | |
3614 | 4.18M | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
3615 | 4.18M | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
3616 | 0 | else |
3617 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
3618 | | |
3619 | 4.18M | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
3620 | 4.18M | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
3621 | 4.18M | : (UChar* )((void* )mem_end_stk[mem])); |
3622 | 4.18M | n = pend - pstart; |
3623 | 4.18M | DATA_ENSURE_CONTINUE(n); |
3624 | 4.18M | sprev = s; |
3625 | 4.18M | swork = s; |
3626 | 4.18M | STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); |
3627 | 4.18M | if (is_fail) continue; |
3628 | 3.46M | s = swork; |
3629 | 3.60M | while (sprev + (len = enclen(encode, sprev, end)) < s) |
3630 | 139k | sprev += len; |
3631 | | |
3632 | 3.46M | p += (SIZE_MEMNUM * (tlen - i - 1)); |
3633 | 3.46M | break; /* success */ |
3634 | 4.18M | } |
3635 | 6.61M | if (i == tlen) goto fail; |
3636 | 3.46M | MOP_OUT; |
3637 | 3.46M | JUMP; |
3638 | 3.46M | } |
3639 | | |
3640 | 0 | #ifdef USE_BACKREF_WITH_LEVEL |
3641 | 20.4M | CASE(OP_BACKREF_WITH_LEVEL) |
3642 | 20.4M | { |
3643 | 20.4M | int len; |
3644 | 20.4M | OnigOptionType ic; |
3645 | 20.4M | LengthType level; |
3646 | | |
3647 | 20.4M | GET_OPTION_INC(ic, p); |
3648 | 20.4M | GET_LENGTH_INC(level, p); |
3649 | 20.4M | GET_LENGTH_INC(tlen, p); |
3650 | | |
3651 | 20.4M | sprev = s; |
3652 | 20.4M | if (backref_match_at_nested_level(reg, stk, stk_base, ic, |
3653 | 20.4M | case_fold_flag, (int )level, (int )tlen, p, &s, end)) { |
3654 | 19.7M | while (sprev + (len = enclen(encode, sprev, end)) < s) |
3655 | 6.98k | sprev += len; |
3656 | | |
3657 | 19.7M | p += (SIZE_MEMNUM * tlen); |
3658 | 19.7M | } |
3659 | 706k | else |
3660 | 706k | goto fail; |
3661 | | |
3662 | 19.7M | MOP_OUT; |
3663 | 19.7M | JUMP; |
3664 | 19.7M | } |
3665 | | |
3666 | 0 | #endif |
3667 | | |
3668 | | #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ |
3669 | | CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH); |
3670 | | GET_OPTION_INC(option, p); |
3671 | | STACK_PUSH_ALT(p, s, sprev, pkeep); |
3672 | | p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; |
3673 | | MOP_OUT; |
3674 | | JUMP; |
3675 | | |
3676 | | CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION); |
3677 | | GET_OPTION_INC(option, p); |
3678 | | MOP_OUT; |
3679 | | JUMP; |
3680 | | #endif |
3681 | | |
3682 | 158M | CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START); |
3683 | 158M | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
3684 | 158M | STACK_PUSH_NULL_CHECK_START(mem, s); |
3685 | 158M | MOP_OUT; |
3686 | 158M | JUMP; |
3687 | | |
3688 | 158M | CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END); |
3689 | 89.5M | { |
3690 | 89.5M | int isnull; |
3691 | | |
3692 | 89.5M | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
3693 | 89.5M | STACK_NULL_CHECK(isnull, mem, s); |
3694 | 89.5M | if (isnull) { |
3695 | | #ifdef ONIG_DEBUG_MATCH |
3696 | | fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n", |
3697 | | (int )mem, (uintptr_t )s, s); |
3698 | | #endif |
3699 | 130M | null_check_found: |
3700 | | /* empty loop founded, skip next instruction */ |
3701 | 130M | switch (*p++) { |
3702 | 105M | case OP_JUMP: |
3703 | 110M | case OP_PUSH: |
3704 | 110M | p += SIZE_RELADDR; |
3705 | 110M | break; |
3706 | 5.58M | case OP_REPEAT_INC: |
3707 | 8.54M | case OP_REPEAT_INC_NG: |
3708 | 14.2M | case OP_REPEAT_INC_SG: |
3709 | 19.9M | case OP_REPEAT_INC_NG_SG: |
3710 | 19.9M | p += SIZE_MEMNUM; |
3711 | 19.9M | break; |
3712 | 0 | default: |
3713 | 0 | goto unexpected_bytecode_error; |
3714 | 0 | break; |
3715 | 130M | } |
3716 | 130M | } |
3717 | 89.5M | } |
3718 | 159M | MOP_OUT; |
3719 | 159M | JUMP; |
3720 | | |
3721 | 159M | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
3722 | 159M | CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); |
3723 | 130M | { |
3724 | 130M | int isnull; |
3725 | | |
3726 | 130M | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
3727 | 130M | STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); |
3728 | 130M | if (isnull) { |
3729 | | # ifdef ONIG_DEBUG_MATCH |
3730 | | fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", |
3731 | | (int )mem, (uintptr_t )s, s); |
3732 | | # endif |
3733 | 67.6M | if (isnull == -1) goto fail; |
3734 | 65.3M | goto null_check_found; |
3735 | 67.6M | } |
3736 | 130M | } |
3737 | 63.0M | MOP_OUT; |
3738 | 63.0M | JUMP; |
3739 | 63.0M | #endif |
3740 | | |
3741 | 63.0M | #ifdef USE_SUBEXP_CALL |
3742 | 63.0M | CASE(OP_NULL_CHECK_END_MEMST_PUSH) |
3743 | 9.67M | MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); |
3744 | 9.67M | { |
3745 | 9.67M | int isnull; |
3746 | | |
3747 | 9.67M | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
3748 | 9.67M | # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
3749 | 9.67M | STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); |
3750 | | # else |
3751 | | STACK_NULL_CHECK_REC(isnull, mem, s); |
3752 | | # endif |
3753 | 9.67M | if (isnull) { |
3754 | | # ifdef ONIG_DEBUG_MATCH |
3755 | | fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n", |
3756 | | (int )mem, (uintptr_t )s, s); |
3757 | | # endif |
3758 | 4.14M | if (isnull == -1) goto fail; |
3759 | 4.14M | goto null_check_found; |
3760 | 4.14M | } |
3761 | 5.53M | else { |
3762 | 5.53M | STACK_PUSH_NULL_CHECK_END(mem); |
3763 | 5.53M | } |
3764 | 9.67M | } |
3765 | 5.53M | MOP_OUT; |
3766 | 5.53M | JUMP; |
3767 | 5.53M | #endif |
3768 | | |
3769 | 771M | CASE(OP_JUMP) MOP_IN(OP_JUMP); |
3770 | 771M | GET_RELADDR_INC(addr, p); |
3771 | 771M | p += addr; |
3772 | 771M | MOP_OUT; |
3773 | 771M | CHECK_INTERRUPT_IN_MATCH_AT; |
3774 | 771M | JUMP; |
3775 | | |
3776 | 936M | CASE(OP_PUSH) MOP_IN(OP_PUSH); |
3777 | 936M | GET_RELADDR_INC(addr, p); |
3778 | 936M | CHECK_MATCH_CACHE; |
3779 | 932M | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
3780 | 932M | MOP_OUT; |
3781 | 932M | JUMP; |
3782 | | |
3783 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3784 | | CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH); |
3785 | | GET_STATE_CHECK_NUM_INC(mem, p); |
3786 | | STATE_CHECK_VAL(scv, mem); |
3787 | | if (scv) goto fail; |
3788 | | |
3789 | | GET_RELADDR_INC(addr, p); |
3790 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); |
3791 | | MOP_OUT; |
3792 | | JUMP; |
3793 | | |
3794 | | CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); |
3795 | | GET_STATE_CHECK_NUM_INC(mem, p); |
3796 | | GET_RELADDR_INC(addr, p); |
3797 | | STATE_CHECK_VAL(scv, mem); |
3798 | | if (scv) { |
3799 | | p += addr; |
3800 | | } |
3801 | | else { |
3802 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); |
3803 | | } |
3804 | | MOP_OUT; |
3805 | | JUMP; |
3806 | | |
3807 | | CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK); |
3808 | | GET_STATE_CHECK_NUM_INC(mem, p); |
3809 | | STATE_CHECK_VAL(scv, mem); |
3810 | | if (scv) goto fail; |
3811 | | |
3812 | | STACK_PUSH_STATE_CHECK(s, mem); |
3813 | | MOP_OUT; |
3814 | | JUMP; |
3815 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
3816 | | |
3817 | 932M | CASE(OP_POP) MOP_IN(OP_POP); |
3818 | 0 | STACK_POP_ONE; |
3819 | 0 | #ifdef USE_MATCH_CACHE |
3820 | | /* We need to increment num_fails here, for invoking a cache optimization correctly, */ |
3821 | | /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */ |
3822 | 0 | msa->num_fails++; |
3823 | 0 | #endif |
3824 | 0 | MOP_OUT; |
3825 | 0 | JUMP; |
3826 | |
|
3827 | | #ifdef USE_OP_PUSH_OR_JUMP_EXACT |
3828 | | CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1); |
3829 | | GET_RELADDR_INC(addr, p); |
3830 | | if (*p == *s && DATA_ENSURE_CHECK1) { |
3831 | | p++; |
3832 | | CHECK_MATCH_CACHE; |
3833 | | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
3834 | | MOP_OUT; |
3835 | | JUMP; |
3836 | | } |
3837 | | p += (addr + 1); |
3838 | | MOP_OUT; |
3839 | | JUMP; |
3840 | | #endif |
3841 | |
|
3842 | 403M | CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT); |
3843 | 403M | GET_RELADDR_INC(addr, p); |
3844 | 403M | CHECK_MATCH_CACHE; |
3845 | 402M | if (*p == *s) { |
3846 | 21.2M | p++; |
3847 | 21.2M | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
3848 | 21.2M | MOP_OUT; |
3849 | 21.2M | JUMP; |
3850 | 21.2M | } |
3851 | 402M | p++; |
3852 | 402M | INC_NUM_FAILS; |
3853 | 402M | MOP_OUT; |
3854 | 402M | JUMP; |
3855 | | |
3856 | 402M | CASE(OP_REPEAT) MOP_IN(OP_REPEAT); |
3857 | 24.9M | { |
3858 | 24.9M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3859 | 24.9M | GET_RELADDR_INC(addr, p); |
3860 | | |
3861 | 24.9M | STACK_ENSURE(1); |
3862 | 24.9M | repeat_stk[mem] = GET_STACK_INDEX(stk); |
3863 | 24.9M | STACK_PUSH_REPEAT(mem, p); |
3864 | | |
3865 | 24.9M | if (reg->repeat_range[mem].lower == 0) { |
3866 | 24.1M | CHECK_MATCH_CACHE; |
3867 | 24.0M | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
3868 | 24.0M | } |
3869 | 24.9M | } |
3870 | 24.8M | MOP_OUT; |
3871 | 24.8M | JUMP; |
3872 | | |
3873 | 77.4M | CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG); |
3874 | 77.4M | { |
3875 | 77.4M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3876 | 77.4M | GET_RELADDR_INC(addr, p); |
3877 | | |
3878 | 77.4M | STACK_ENSURE(1); |
3879 | 77.4M | repeat_stk[mem] = GET_STACK_INDEX(stk); |
3880 | 77.4M | STACK_PUSH_REPEAT(mem, p); |
3881 | | |
3882 | 77.4M | if (reg->repeat_range[mem].lower == 0) { |
3883 | 77.2M | CHECK_MATCH_CACHE; |
3884 | 75.6M | STACK_PUSH_ALT(p, s, sprev, pkeep); |
3885 | 75.6M | p += addr; |
3886 | 75.6M | } |
3887 | 77.4M | } |
3888 | 75.8M | MOP_OUT; |
3889 | 75.8M | JUMP; |
3890 | | |
3891 | 75.8M | CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC); |
3892 | 27.8M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3893 | 27.8M | si = repeat_stk[mem]; |
3894 | 27.8M | stkp = STACK_AT(si); |
3895 | | |
3896 | 75.3M | repeat_inc: |
3897 | 75.3M | stkp->u.repeat.count++; |
3898 | 75.3M | if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { |
3899 | | /* end of repeat. Nothing to do. */ |
3900 | 6.16M | } |
3901 | 69.1M | else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { |
3902 | 61.8M | #ifdef USE_MATCH_CACHE |
3903 | 61.8M | if (*pbegin == OP_REPEAT_INC) { |
3904 | 26.1M | #undef MATCH_CACHE_HIT |
3905 | 26.1M | #define MATCH_CACHE_HIT stkp->u.repeat.count--; |
3906 | 26.1M | CHECK_MATCH_CACHE; |
3907 | 26.1M | #undef MATCH_CACHE_HIT |
3908 | 26.1M | #define MATCH_CACHE_HIT ((void) 0) |
3909 | 26.1M | } |
3910 | 61.8M | #endif |
3911 | 61.8M | STACK_PUSH_ALT(p, s, sprev, pkeep); |
3912 | 61.8M | p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ |
3913 | 61.8M | } |
3914 | 7.30M | else { |
3915 | 7.30M | p = stkp->u.repeat.pcode; |
3916 | 7.30M | } |
3917 | 75.3M | STACK_PUSH_REPEAT_INC(si); |
3918 | 75.3M | MOP_OUT; |
3919 | 75.3M | CHECK_INTERRUPT_IN_MATCH_AT; |
3920 | 75.3M | JUMP; |
3921 | | |
3922 | 75.3M | CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG); |
3923 | 47.4M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3924 | 47.4M | STACK_GET_REPEAT(mem, stkp); |
3925 | 47.4M | si = GET_STACK_INDEX(stkp); |
3926 | 47.4M | goto repeat_inc; |
3927 | | |
3928 | 101M | CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG); |
3929 | 101M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3930 | 101M | si = repeat_stk[mem]; |
3931 | 101M | stkp = STACK_AT(si); |
3932 | | |
3933 | 115M | repeat_inc_ng: |
3934 | 115M | stkp->u.repeat.count++; |
3935 | 115M | if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { |
3936 | 72.2M | if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { |
3937 | 65.7M | UChar* pcode = stkp->u.repeat.pcode; |
3938 | | |
3939 | 65.7M | STACK_PUSH_REPEAT_INC(si); |
3940 | 65.7M | if (*pbegin == OP_REPEAT_INC_NG) { |
3941 | 63.1M | CHECK_MATCH_CACHE; |
3942 | 63.1M | } |
3943 | 65.7M | STACK_PUSH_ALT(pcode, s, sprev, pkeep); |
3944 | 65.7M | } |
3945 | 6.46M | else { |
3946 | 6.46M | p = stkp->u.repeat.pcode; |
3947 | 6.46M | STACK_PUSH_REPEAT_INC(si); |
3948 | 6.46M | } |
3949 | 72.2M | } |
3950 | 43.6M | else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { |
3951 | 28.8M | STACK_PUSH_REPEAT_INC(si); |
3952 | 28.8M | } |
3953 | 115M | MOP_OUT; |
3954 | 115M | CHECK_INTERRUPT_IN_MATCH_AT; |
3955 | 115M | JUMP; |
3956 | | |
3957 | 115M | CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG); |
3958 | 14.6M | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
3959 | 14.6M | STACK_GET_REPEAT(mem, stkp); |
3960 | 14.6M | si = GET_STACK_INDEX(stkp); |
3961 | 14.6M | goto repeat_inc_ng; |
3962 | | |
3963 | 7.19M | CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS); |
3964 | 7.19M | STACK_PUSH_POS(s, sprev, pkeep); |
3965 | 7.19M | MOP_OUT; |
3966 | 7.19M | JUMP; |
3967 | | |
3968 | 7.19M | CASE(OP_POP_POS) MOP_IN(OP_POP_POS); |
3969 | 6.71M | { |
3970 | 6.71M | STACK_POS_END(stkp); |
3971 | 6.71M | s = stkp->u.state.pstr; |
3972 | 6.71M | sprev = stkp->u.state.pstr_prev; |
3973 | 6.71M | } |
3974 | 6.71M | MOP_OUT; |
3975 | 6.71M | JUMP; |
3976 | | |
3977 | 10.4M | CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT); |
3978 | 10.4M | GET_RELADDR_INC(addr, p); |
3979 | 10.4M | STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep); |
3980 | 10.4M | MOP_OUT; |
3981 | 10.4M | JUMP; |
3982 | | |
3983 | 10.4M | CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS); |
3984 | 700k | STACK_POP_TIL_POS_NOT; |
3985 | 700k | goto fail; |
3986 | | |
3987 | 295M | CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT); |
3988 | 295M | STACK_PUSH_STOP_BT; |
3989 | 295M | MOP_OUT; |
3990 | 295M | JUMP; |
3991 | | |
3992 | 295M | CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT); |
3993 | 210M | STACK_STOP_BT_END; |
3994 | 210M | MOP_OUT; |
3995 | 210M | JUMP; |
3996 | | |
3997 | 210M | CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND); |
3998 | 84.2k | GET_LENGTH_INC(tlen, p); |
3999 | 84.2k | s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); |
4000 | 84.2k | if (IS_NULL(s)) goto fail; |
4001 | 83.6k | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); |
4002 | 83.6k | MOP_OUT; |
4003 | 83.6k | JUMP; |
4004 | | |
4005 | 9.60M | CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); |
4006 | 9.60M | GET_RELADDR_INC(addr, p); |
4007 | 9.60M | GET_LENGTH_INC(tlen, p); |
4008 | 9.60M | q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); |
4009 | 9.60M | if (IS_NULL(q)) { |
4010 | | /* too short case -> success. ex. /(?<!XXX)a/.match("a") |
4011 | | If you want to change to fail, replace following line. */ |
4012 | 3.98k | p += addr; |
4013 | | /* goto fail; */ |
4014 | 3.98k | } |
4015 | 9.59M | else { |
4016 | 9.59M | STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep); |
4017 | 9.59M | s = q; |
4018 | 9.59M | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); |
4019 | 9.59M | } |
4020 | 9.60M | MOP_OUT; |
4021 | 9.60M | JUMP; |
4022 | | |
4023 | 9.60M | CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); |
4024 | 4.65M | STACK_POP_TIL_LOOK_BEHIND_NOT; |
4025 | 4.65M | goto fail; |
4026 | | |
4027 | 6.26M | CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS); |
4028 | | /* Save the absent-start-pos and the original end-pos. */ |
4029 | 6.26M | STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS); |
4030 | 6.26M | MOP_OUT; |
4031 | 6.26M | JUMP; |
4032 | | |
4033 | 80.8M | CASE(OP_ABSENT) MOP_IN(OP_ABSENT); |
4034 | 80.8M | { |
4035 | 80.8M | const UChar* aend = ABSENT_END_POS; |
4036 | 80.8M | UChar* absent; |
4037 | 80.8M | UChar* selfp = p - 1; |
4038 | | |
4039 | 80.8M | STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */ |
4040 | 80.8M | GET_RELADDR_INC(addr, p); |
4041 | | #ifdef ONIG_DEBUG_MATCH |
4042 | | fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend); |
4043 | | #endif |
4044 | 80.8M | if ((absent > aend) && (s > absent)) { |
4045 | | /* An empty match occurred in (?~...) at the start point. |
4046 | | * Never match. */ |
4047 | 1.57M | STACK_POP; |
4048 | 1.57M | goto fail; |
4049 | 1.57M | } |
4050 | 79.2M | else if ((s >= aend) && (s > absent)) { |
4051 | 4.51M | if (s > aend) { |
4052 | | /* Only one (or less) character matched in the last iteration. |
4053 | | * This is not a possible point. */ |
4054 | 3.19M | goto fail; |
4055 | 3.19M | } |
4056 | | /* All possible points were found. Try matching after (?~...). */ |
4057 | 1.32M | DATA_ENSURE(0); |
4058 | 1.32M | p += addr; |
4059 | 1.32M | } |
4060 | 74.7M | else if (s == end) { |
4061 | | /* At the end of the string, just match with it */ |
4062 | 162k | DATA_ENSURE(0); |
4063 | 162k | p += addr; |
4064 | 162k | } |
4065 | 74.5M | else { |
4066 | 74.5M | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */ |
4067 | 74.5M | n = enclen(encode, s, end); |
4068 | 74.5M | STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */ |
4069 | 74.5M | STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */ |
4070 | 74.5M | STACK_PUSH_ABSENT; |
4071 | 74.5M | ABSENT_END_POS = aend; |
4072 | 74.5M | } |
4073 | 80.8M | } |
4074 | 76.0M | MOP_OUT; |
4075 | 76.0M | JUMP; |
4076 | | |
4077 | 76.0M | CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END); |
4078 | | /* The pattern inside (?~...) was matched. |
4079 | | * Set the end-pos temporary and go to next iteration. */ |
4080 | 7.95M | if (sprev < ABSENT_END_POS) |
4081 | 7.95M | ABSENT_END_POS = sprev; |
4082 | | #ifdef ONIG_DEBUG_MATCH |
4083 | | fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS); |
4084 | | #endif |
4085 | 7.95M | STACK_POP_TIL_ABSENT; |
4086 | 7.95M | goto fail; |
4087 | | |
4088 | 0 | #ifdef USE_SUBEXP_CALL |
4089 | 32.2M | CASE(OP_CALL) MOP_IN(OP_CALL); |
4090 | 32.2M | GET_ABSADDR_INC(addr, p); |
4091 | 32.2M | STACK_PUSH_CALL_FRAME(p); |
4092 | 32.2M | p = reg->p + addr; |
4093 | 32.2M | MOP_OUT; |
4094 | 32.2M | JUMP; |
4095 | | |
4096 | 32.2M | CASE(OP_RETURN) MOP_IN(OP_RETURN); |
4097 | 12.0M | STACK_RETURN(p); |
4098 | 12.0M | STACK_PUSH_RETURN; |
4099 | 12.0M | MOP_OUT; |
4100 | 12.0M | JUMP; |
4101 | 12.0M | #endif |
4102 | | |
4103 | 12.0M | CASE(OP_CONDITION) MOP_IN(OP_CONDITION); |
4104 | 6.72M | GET_MEMNUM_INC(mem, p); |
4105 | 6.72M | GET_RELADDR_INC(addr, p); |
4106 | 6.72M | if ((mem > num_mem) || |
4107 | 6.72M | (mem_end_stk[mem] == INVALID_STACK_INDEX) || |
4108 | 6.72M | (mem_start_stk[mem] == INVALID_STACK_INDEX)) { |
4109 | 936 | p += addr; |
4110 | 936 | } |
4111 | 6.72M | MOP_OUT; |
4112 | 6.72M | JUMP; |
4113 | | |
4114 | 6.72M | CASE(OP_FINISH) |
4115 | 6.50M | goto finish; |
4116 | | |
4117 | 0 | CASE(OP_FAIL) |
4118 | 0 | if (0) { |
4119 | | /* fall */ |
4120 | 1.51G | fail: |
4121 | 1.51G | MOP_OUT; |
4122 | 1.51G | } |
4123 | 1.51G | MOP_IN(OP_FAIL); |
4124 | 1.51G | STACK_POP; |
4125 | 1.51G | p = stk->u.state.pcode; |
4126 | 1.51G | s = stk->u.state.pstr; |
4127 | 1.51G | sprev = stk->u.state.pstr_prev; |
4128 | 1.51G | pkeep = stk->u.state.pkeep; |
4129 | | |
4130 | 1.51G | #ifdef USE_MATCH_CACHE |
4131 | 1.51G | if ( |
4132 | 1.51G | msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED && |
4133 | 574M | ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes |
4134 | 1.51G | ) { |
4135 | 199M | if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) { |
4136 | 119k | msa->match_cache_status = MATCH_CACHE_STATUS_INIT; |
4137 | 119k | OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes); |
4138 | 119k | if (r < 0) goto bytecode_error; |
4139 | 119k | } |
4140 | 199M | if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) { |
4141 | 41.5k | msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED; |
4142 | 41.5k | goto fail_match_cache; |
4143 | 41.5k | } |
4144 | 199M | if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) { |
4145 | 75.8k | goto fail_match_cache; |
4146 | 75.8k | } |
4147 | 199M | if (msa->cache_opcodes == NULL) { |
4148 | 42.4k | msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED; |
4149 | 42.4k | OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode)); |
4150 | 42.4k | if (cache_opcodes == NULL) { |
4151 | 0 | return ONIGERR_MEMORY; |
4152 | 0 | } |
4153 | 42.4k | OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points); |
4154 | 42.4k | if (r < 0) { |
4155 | 0 | if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error; |
4156 | 0 | else goto bytecode_error; |
4157 | 0 | } |
4158 | 42.4k | msa->cache_opcodes = cache_opcodes; |
4159 | | #ifdef ONIG_DEBUG_MATCH_CACHE |
4160 | | fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes); |
4161 | | fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points); |
4162 | | fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes); |
4163 | | for (int i = 0; i < msa->num_cache_opcodes; i++) { |
4164 | | fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr); |
4165 | | } |
4166 | | #endif |
4167 | 42.4k | } |
4168 | 199M | if (msa->match_cache_buf == NULL) { |
4169 | 42.4k | size_t length = (end - str) + 1; |
4170 | 42.4k | size_t num_match_cache_points = (size_t)msa->num_cache_points * length; |
4171 | | #ifdef ONIG_DEBUG_MATCH_CACHE |
4172 | | fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length); |
4173 | | #endif |
4174 | | /* Overflow check */ |
4175 | 42.4k | if (num_match_cache_points / length != (size_t)msa->num_cache_points) { |
4176 | 0 | return ONIGERR_MEMORY; |
4177 | 0 | } |
4178 | 42.4k | if (num_match_cache_points >= LONG_MAX_LIMIT) { |
4179 | 0 | return ONIGERR_MEMORY; |
4180 | 0 | } |
4181 | 42.4k | size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1; |
4182 | 42.4k | uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t)); |
4183 | 42.4k | if (match_cache_buf == NULL) { |
4184 | 0 | return ONIGERR_MEMORY; |
4185 | 0 | } |
4186 | 42.4k | xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t)); |
4187 | 42.4k | msa->match_cache_buf = match_cache_buf; |
4188 | 42.4k | } |
4189 | 199M | } |
4190 | 1.51G | fail_match_cache: |
4191 | 1.51G | #endif |
4192 | | |
4193 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
4194 | | if (stk->u.state.state_check != 0) { |
4195 | | stk->type = STK_STATE_CHECK_MARK; |
4196 | | stk++; |
4197 | | } |
4198 | | #endif |
4199 | | |
4200 | 1.51G | MOP_OUT; |
4201 | 1.51G | CHECK_INTERRUPT_IN_MATCH_AT; |
4202 | 1.51G | JUMP; |
4203 | | |
4204 | 1.51G | DEFAULT |
4205 | 0 | goto bytecode_error; |
4206 | 1.51G | } VM_LOOP_END |
4207 | | |
4208 | 7.67M | finish: |
4209 | 7.67M | STACK_SAVE; |
4210 | 7.67M | xfree(xmalloc_base); |
4211 | 7.67M | return best_len; |
4212 | | |
4213 | | #ifdef ONIG_DEBUG |
4214 | | stack_error: |
4215 | | STACK_SAVE; |
4216 | | xfree(xmalloc_base); |
4217 | | return ONIGERR_STACK_BUG; |
4218 | | #endif |
4219 | | |
4220 | 695 | bytecode_error: |
4221 | 695 | STACK_SAVE; |
4222 | 695 | xfree(xmalloc_base); |
4223 | 695 | return ONIGERR_UNDEFINED_BYTECODE; |
4224 | | |
4225 | 0 | unexpected_bytecode_error: |
4226 | 0 | STACK_SAVE; |
4227 | 0 | xfree(xmalloc_base); |
4228 | 0 | return ONIGERR_UNEXPECTED_BYTECODE; |
4229 | | |
4230 | 0 | timeout: |
4231 | 0 | STACK_SAVE; |
4232 | 0 | xfree(xmalloc_base); |
4233 | 0 | return ONIGERR_TIMEOUT; |
4234 | 1.51G | } |
4235 | | |
4236 | | |
4237 | | static UChar* |
4238 | | slow_search(OnigEncoding enc, UChar* target, UChar* target_end, |
4239 | | const UChar* text, const UChar* text_end, UChar* text_range) |
4240 | 171k | { |
4241 | 171k | UChar *t, *p, *s, *end; |
4242 | | |
4243 | 171k | end = (UChar* )text_end; |
4244 | 171k | end -= target_end - target - 1; |
4245 | 171k | if (end > text_range) |
4246 | 249 | end = text_range; |
4247 | | |
4248 | 171k | s = (UChar* )text; |
4249 | | |
4250 | 171k | if (enc->max_enc_len == enc->min_enc_len) { |
4251 | 136k | int n = enc->max_enc_len; |
4252 | | |
4253 | 1.58M | while (s < end) { |
4254 | 1.58M | if (*s == *target) { |
4255 | 136k | p = s + 1; |
4256 | 136k | t = target + 1; |
4257 | 136k | if (target_end == t || memcmp(t, p, target_end - t) == 0) |
4258 | 136k | return s; |
4259 | 136k | } |
4260 | 1.45M | s += n; |
4261 | 1.45M | } |
4262 | 487 | return (UChar* )NULL; |
4263 | 136k | } |
4264 | 151k | while (s < end) { |
4265 | 151k | if (*s == *target) { |
4266 | 34.2k | p = s + 1; |
4267 | 34.2k | t = target + 1; |
4268 | 34.2k | if (target_end == t || memcmp(t, p, target_end - t) == 0) |
4269 | 34.2k | return s; |
4270 | 34.2k | } |
4271 | 117k | s += enclen(enc, s, text_end); |
4272 | 117k | } |
4273 | | |
4274 | 45 | return (UChar* )NULL; |
4275 | 34.3k | } |
4276 | | |
4277 | | static int |
4278 | | str_lower_case_match(OnigEncoding enc, int case_fold_flag, |
4279 | | const UChar* t, const UChar* tend, |
4280 | | const UChar* p, const UChar* end) |
4281 | 557k | { |
4282 | 557k | int lowlen; |
4283 | 557k | UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
4284 | | |
4285 | 2.06M | while (t < tend) { |
4286 | 1.96M | lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); |
4287 | 1.96M | q = lowbuf; |
4288 | 3.47M | while (lowlen > 0) { |
4289 | 1.96M | if (*t++ != *q++) return 0; |
4290 | 1.50M | lowlen--; |
4291 | 1.50M | } |
4292 | 1.96M | } |
4293 | | |
4294 | 100k | return 1; |
4295 | 557k | } |
4296 | | |
4297 | | static UChar* |
4298 | | slow_search_ic(OnigEncoding enc, int case_fold_flag, |
4299 | | UChar* target, UChar* target_end, |
4300 | | const UChar* text, const UChar* text_end, UChar* text_range) |
4301 | 40.0k | { |
4302 | 40.0k | UChar *s, *end; |
4303 | | |
4304 | 40.0k | end = (UChar* )text_end; |
4305 | 40.0k | end -= target_end - target - 1; |
4306 | 40.0k | if (end > text_range) |
4307 | 564 | end = text_range; |
4308 | | |
4309 | 40.0k | s = (UChar* )text; |
4310 | | |
4311 | 450k | while (s < end) { |
4312 | 450k | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
4313 | 450k | s, text_end)) |
4314 | 39.9k | return s; |
4315 | | |
4316 | 410k | s += enclen(enc, s, text_end); |
4317 | 410k | } |
4318 | | |
4319 | 125 | return (UChar* )NULL; |
4320 | 40.0k | } |
4321 | | |
4322 | | static UChar* |
4323 | | slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, |
4324 | | const UChar* text, const UChar* adjust_text, |
4325 | | const UChar* text_end, const UChar* text_start) |
4326 | 24 | { |
4327 | 24 | UChar *t, *p, *s; |
4328 | | |
4329 | 24 | s = (UChar* )text_end; |
4330 | 24 | s -= (target_end - target); |
4331 | 24 | if (s > text_start) |
4332 | 0 | s = (UChar* )text_start; |
4333 | 24 | else |
4334 | 24 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); |
4335 | | |
4336 | 24 | while (s >= text) { |
4337 | 24 | if (*s == *target) { |
4338 | 24 | p = s + 1; |
4339 | 24 | t = target + 1; |
4340 | 24 | while (t < target_end) { |
4341 | 0 | if (*t != *p++) |
4342 | 0 | break; |
4343 | 0 | t++; |
4344 | 0 | } |
4345 | 24 | if (t == target_end) |
4346 | 24 | return s; |
4347 | 24 | } |
4348 | 0 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
4349 | 0 | } |
4350 | | |
4351 | 0 | return (UChar* )NULL; |
4352 | 24 | } |
4353 | | |
4354 | | static UChar* |
4355 | | slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, |
4356 | | UChar* target, UChar* target_end, |
4357 | | const UChar* text, const UChar* adjust_text, |
4358 | | const UChar* text_end, const UChar* text_start) |
4359 | 10 | { |
4360 | 10 | UChar *s; |
4361 | | |
4362 | 10 | s = (UChar* )text_end; |
4363 | 10 | s -= (target_end - target); |
4364 | 10 | if (s > text_start) |
4365 | 0 | s = (UChar* )text_start; |
4366 | 10 | else |
4367 | 10 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); |
4368 | | |
4369 | 12 | while (s >= text) { |
4370 | 10 | if (str_lower_case_match(enc, case_fold_flag, |
4371 | 10 | target, target_end, s, text_end)) |
4372 | 8 | return s; |
4373 | | |
4374 | 2 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
4375 | 2 | } |
4376 | | |
4377 | 2 | return (UChar* )NULL; |
4378 | 10 | } |
4379 | | |
4380 | | /* Sunday's quick search applied to a multibyte string */ |
4381 | | static UChar* |
4382 | | bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, |
4383 | | const UChar* text, const UChar* text_end, |
4384 | | const UChar* text_range) |
4385 | 0 | { |
4386 | 0 | const UChar *s, *se, *t, *p, *end; |
4387 | 0 | const UChar *tail; |
4388 | 0 | ptrdiff_t skip, tlen1; |
4389 | 0 | OnigEncoding enc = reg->enc; |
4390 | |
|
4391 | | # ifdef ONIG_DEBUG_SEARCH |
4392 | | fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
4393 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
4394 | | # endif |
4395 | |
|
4396 | 0 | tail = target_end - 1; |
4397 | 0 | tlen1 = tail - target; |
4398 | 0 | end = text_range; |
4399 | 0 | if (end + tlen1 > text_end) |
4400 | 0 | end = text_end - tlen1; |
4401 | |
|
4402 | 0 | s = text; |
4403 | |
|
4404 | 0 | while (s < end) { |
4405 | 0 | p = se = s + tlen1; |
4406 | 0 | t = tail; |
4407 | 0 | while (*p == *t) { |
4408 | 0 | if (t == target) return (UChar* )s; |
4409 | 0 | p--; t--; |
4410 | 0 | } |
4411 | 0 | if (s + 1 >= end) break; |
4412 | 0 | skip = reg->map[se[1]]; |
4413 | 0 | t = s; |
4414 | 0 | do { |
4415 | 0 | s += enclen(enc, s, end); |
4416 | 0 | } while ((s - t) < skip && s < end); |
4417 | 0 | } |
4418 | | |
4419 | 0 | return (UChar* )NULL; |
4420 | 0 | } |
4421 | | |
4422 | | /* Sunday's quick search */ |
4423 | | static UChar* |
4424 | | bm_search(regex_t* reg, const UChar* target, const UChar* target_end, |
4425 | | const UChar* text, const UChar* text_end, const UChar* text_range) |
4426 | 65.8k | { |
4427 | 65.8k | const UChar *s, *t, *p, *end; |
4428 | 65.8k | const UChar *tail; |
4429 | 65.8k | ptrdiff_t tlen1; |
4430 | | |
4431 | | # ifdef ONIG_DEBUG_SEARCH |
4432 | | fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
4433 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
4434 | | # endif |
4435 | | |
4436 | 65.8k | tail = target_end - 1; |
4437 | 65.8k | tlen1 = tail - target; |
4438 | 65.8k | end = text_range + tlen1; |
4439 | 65.8k | if (end > text_end) |
4440 | 62.8k | end = text_end; |
4441 | | |
4442 | 65.8k | s = text + tlen1; |
4443 | 269k | while (s < end) { |
4444 | 268k | p = s; |
4445 | 268k | t = tail; |
4446 | 2.00M | while (*p == *t) { |
4447 | 1.79M | if (t == target) return (UChar* )p; |
4448 | 1.73M | p--; t--; |
4449 | 1.73M | } |
4450 | 203k | if (s + 1 >= end) break; |
4451 | 203k | s += reg->map[s[1]]; |
4452 | 203k | } |
4453 | | |
4454 | 639 | return (UChar* )NULL; |
4455 | 65.8k | } |
4456 | | |
4457 | | /* Sunday's quick search applied to a multibyte string (ignore case) */ |
4458 | | static UChar* |
4459 | | bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, |
4460 | | const UChar* text, const UChar* text_end, |
4461 | | const UChar* text_range) |
4462 | 0 | { |
4463 | 0 | const UChar *s, *se, *t, *end; |
4464 | 0 | const UChar *tail; |
4465 | 0 | ptrdiff_t skip, tlen1; |
4466 | 0 | OnigEncoding enc = reg->enc; |
4467 | 0 | int case_fold_flag = reg->case_fold_flag; |
4468 | |
|
4469 | | # ifdef ONIG_DEBUG_SEARCH |
4470 | | fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
4471 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
4472 | | # endif |
4473 | |
|
4474 | 0 | tail = target_end - 1; |
4475 | 0 | tlen1 = tail - target; |
4476 | 0 | end = text_range; |
4477 | 0 | if (end + tlen1 > text_end) |
4478 | 0 | end = text_end - tlen1; |
4479 | |
|
4480 | 0 | s = text; |
4481 | |
|
4482 | 0 | while (s < end) { |
4483 | 0 | se = s + tlen1; |
4484 | 0 | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
4485 | 0 | s, se + 1)) |
4486 | 0 | return (UChar* )s; |
4487 | 0 | if (s + 1 >= end) break; |
4488 | 0 | skip = reg->map[se[1]]; |
4489 | 0 | t = s; |
4490 | 0 | do { |
4491 | 0 | s += enclen(enc, s, end); |
4492 | 0 | } while ((s - t) < skip && s < end); |
4493 | 0 | } |
4494 | | |
4495 | 0 | return (UChar* )NULL; |
4496 | 0 | } |
4497 | | |
4498 | | /* Sunday's quick search (ignore case) */ |
4499 | | static UChar* |
4500 | | bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, |
4501 | | const UChar* text, const UChar* text_end, const UChar* text_range) |
4502 | 60.8k | { |
4503 | 60.8k | const UChar *s, *p, *end; |
4504 | 60.8k | const UChar *tail; |
4505 | 60.8k | ptrdiff_t tlen1; |
4506 | 60.8k | OnigEncoding enc = reg->enc; |
4507 | 60.8k | int case_fold_flag = reg->case_fold_flag; |
4508 | | |
4509 | | # ifdef ONIG_DEBUG_SEARCH |
4510 | | fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
4511 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
4512 | | # endif |
4513 | | |
4514 | 60.8k | tail = target_end - 1; |
4515 | 60.8k | tlen1 = tail - target; |
4516 | 60.8k | end = text_range + tlen1; |
4517 | 60.8k | if (end > text_end) |
4518 | 58.3k | end = text_end; |
4519 | | |
4520 | 60.8k | s = text + tlen1; |
4521 | 107k | while (s < end) { |
4522 | 107k | p = s - tlen1; |
4523 | 107k | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
4524 | 107k | p, s + 1)) |
4525 | 60.7k | return (UChar* )p; |
4526 | 46.6k | if (s + 1 >= end) break; |
4527 | 46.5k | s += reg->map[s[1]]; |
4528 | 46.5k | } |
4529 | | |
4530 | 122 | return (UChar* )NULL; |
4531 | 60.8k | } |
4532 | | |
4533 | | static UChar* |
4534 | | map_search(OnigEncoding enc, UChar map[], |
4535 | | const UChar* text, const UChar* text_range, const UChar* text_end) |
4536 | 222k | { |
4537 | 222k | const UChar *s = text; |
4538 | | |
4539 | 2.21M | while (s < text_range) { |
4540 | 2.21M | if (map[*s]) return (UChar* )s; |
4541 | | |
4542 | 1.99M | s += enclen(enc, s, text_end); |
4543 | 1.99M | } |
4544 | 731 | return (UChar* )NULL; |
4545 | 222k | } |
4546 | | |
4547 | | static UChar* |
4548 | | map_search_backward(OnigEncoding enc, UChar map[], |
4549 | | const UChar* text, const UChar* adjust_text, |
4550 | | const UChar* text_start, const UChar* text_end) |
4551 | 7 | { |
4552 | 7 | const UChar *s = text_start; |
4553 | | |
4554 | 13 | while (s >= text) { |
4555 | 10 | if (map[*s]) return (UChar* )s; |
4556 | | |
4557 | 6 | s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
4558 | 6 | } |
4559 | 3 | return (UChar* )NULL; |
4560 | 7 | } |
4561 | | |
4562 | | extern OnigPosition |
4563 | | onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, |
4564 | | OnigOptionType option) |
4565 | 0 | { |
4566 | 0 | ptrdiff_t r; |
4567 | 0 | UChar *prev; |
4568 | 0 | OnigMatchArg msa; |
4569 | |
|
4570 | 0 | MATCH_ARG_INIT(msa, option, region, at, at); |
4571 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
4572 | | { |
4573 | | ptrdiff_t offset = at - str; |
4574 | | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); |
4575 | | } |
4576 | | #endif |
4577 | |
|
4578 | 0 | if (region) { |
4579 | 0 | r = onig_region_resize_clear(region, reg->num_mem + 1); |
4580 | 0 | } |
4581 | 0 | else |
4582 | 0 | r = 0; |
4583 | |
|
4584 | 0 | if (r == 0) { |
4585 | 0 | prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); |
4586 | 0 | r = match_at(reg, str, end, |
4587 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
4588 | | end, |
4589 | | #endif |
4590 | 0 | at, prev, &msa); |
4591 | 0 | } |
4592 | |
|
4593 | 0 | MATCH_ARG_FREE(msa); |
4594 | 0 | return r; |
4595 | 0 | } |
4596 | | |
4597 | | static int |
4598 | | forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, |
4599 | | UChar* range, UChar** low, UChar** high, UChar** low_prev) |
4600 | 467k | { |
4601 | 467k | UChar *p, *pprev = (UChar* )NULL; |
4602 | 467k | size_t input_len = end - str; |
4603 | | |
4604 | | #ifdef ONIG_DEBUG_SEARCH |
4605 | | fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", |
4606 | | (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); |
4607 | | #endif |
4608 | | |
4609 | 467k | if (reg->dmin > input_len) { |
4610 | 146 | return 0; |
4611 | 146 | } |
4612 | | |
4613 | 467k | p = s; |
4614 | 467k | if (reg->dmin != 0) { |
4615 | 17.1k | if ((OnigDistance)(end - p) <= reg->dmin) return 0; /* fail */ |
4616 | 17.1k | if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { |
4617 | 16.7k | p += reg->dmin; |
4618 | 16.7k | } |
4619 | 369 | else { |
4620 | 369 | UChar *q = p + reg->dmin; |
4621 | | |
4622 | 6.23k | while (p < q) p += enclen(reg->enc, p, end); |
4623 | 369 | } |
4624 | 17.1k | } |
4625 | | |
4626 | 560k | retry: |
4627 | 560k | switch (reg->optimize) { |
4628 | 171k | case ONIG_OPTIMIZE_EXACT: |
4629 | 171k | p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); |
4630 | 171k | break; |
4631 | 40.0k | case ONIG_OPTIMIZE_EXACT_IC: |
4632 | 40.0k | p = slow_search_ic(reg->enc, reg->case_fold_flag, |
4633 | 40.0k | reg->exact, reg->exact_end, p, end, range); |
4634 | 40.0k | break; |
4635 | | |
4636 | 65.8k | case ONIG_OPTIMIZE_EXACT_BM: |
4637 | 65.8k | p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); |
4638 | 65.8k | break; |
4639 | | |
4640 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: |
4641 | 0 | p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); |
4642 | 0 | break; |
4643 | | |
4644 | 60.8k | case ONIG_OPTIMIZE_EXACT_BM_IC: |
4645 | 60.8k | p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range); |
4646 | 60.8k | break; |
4647 | | |
4648 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: |
4649 | 0 | p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range); |
4650 | 0 | break; |
4651 | | |
4652 | 222k | case ONIG_OPTIMIZE_MAP: |
4653 | 222k | p = map_search(reg->enc, reg->map, p, range, end); |
4654 | 222k | break; |
4655 | 560k | } |
4656 | | |
4657 | 560k | if (p && p < range) { |
4658 | 558k | if ((OnigDistance)(p - s) < reg->dmin) { |
4659 | 92.7k | retry_gate: |
4660 | 92.7k | pprev = p; |
4661 | 92.7k | p += enclen(reg->enc, p, end); |
4662 | 92.7k | goto retry; |
4663 | 0 | } |
4664 | | |
4665 | 558k | if (reg->sub_anchor) { |
4666 | 93.5k | UChar* prev; |
4667 | | |
4668 | 93.5k | switch (reg->sub_anchor) { |
4669 | 93.5k | case ANCHOR_BEGIN_LINE: |
4670 | 93.5k | if (!ON_STR_BEGIN(p)) { |
4671 | 93.4k | prev = onigenc_get_prev_char_head(reg->enc, |
4672 | 93.4k | (pprev ? pprev : str), p, end); |
4673 | 93.4k | if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) |
4674 | 92.7k | goto retry_gate; |
4675 | 93.4k | } |
4676 | 791 | break; |
4677 | | |
4678 | 791 | case ANCHOR_END_LINE: |
4679 | 0 | if (ON_STR_END(p)) { |
4680 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
4681 | | prev = (UChar* )onigenc_get_prev_char_head(reg->enc, |
4682 | | (pprev ? pprev : str), p); |
4683 | | if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) |
4684 | | goto retry_gate; |
4685 | | #endif |
4686 | 0 | } |
4687 | 0 | else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) |
4688 | 0 | goto retry_gate; |
4689 | 0 | break; |
4690 | 93.5k | } |
4691 | 93.5k | } |
4692 | | |
4693 | 465k | if (reg->dmax == 0) { |
4694 | 330k | *low = p; |
4695 | 330k | if (low_prev) { |
4696 | 330k | if (*low > s) |
4697 | 40.3k | *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); |
4698 | 289k | else |
4699 | 289k | *low_prev = onigenc_get_prev_char_head(reg->enc, |
4700 | 289k | (pprev ? pprev : str), p, end); |
4701 | 330k | } |
4702 | 330k | *high = p; |
4703 | 330k | } |
4704 | 135k | else { |
4705 | 135k | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
4706 | 76.3k | if ((OnigDistance)(p - str) < reg->dmax) { |
4707 | 4.62k | *low = (UChar* )str; |
4708 | 4.62k | if (low_prev) |
4709 | 4.62k | *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); |
4710 | 4.62k | } |
4711 | 71.7k | else { |
4712 | 71.7k | *low = p - reg->dmax; |
4713 | 71.7k | if (*low > s) { |
4714 | 7.04k | *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, |
4715 | 7.04k | *low, end, (const UChar** )low_prev); |
4716 | 7.04k | if (low_prev && IS_NULL(*low_prev)) |
4717 | 7.04k | *low_prev = onigenc_get_prev_char_head(reg->enc, |
4718 | 7.04k | (pprev ? pprev : s), *low, end); |
4719 | 7.04k | } |
4720 | 64.6k | else { |
4721 | 64.6k | if (low_prev) |
4722 | 64.6k | *low_prev = onigenc_get_prev_char_head(reg->enc, |
4723 | 64.6k | (pprev ? pprev : str), *low, end); |
4724 | 64.6k | } |
4725 | 71.7k | } |
4726 | 76.3k | } |
4727 | | /* no needs to adjust *high, *high is used as range check only */ |
4728 | 135k | if ((OnigDistance)(p - str) < reg->dmin) |
4729 | 0 | *high = (UChar* )str; |
4730 | 135k | else |
4731 | 135k | *high = p - reg->dmin; |
4732 | 135k | } |
4733 | | |
4734 | | #ifdef ONIG_DEBUG_SEARCH |
4735 | | fprintf(stderr, |
4736 | | "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n", |
4737 | | *low - str, *high - str, reg->dmin, reg->dmax); |
4738 | | #endif |
4739 | 465k | return 1; /* success */ |
4740 | 558k | } |
4741 | | |
4742 | 2.14k | return 0; /* fail */ |
4743 | 560k | } |
4744 | | |
4745 | | #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 |
4746 | | |
4747 | | static int |
4748 | | backward_search_range(regex_t* reg, const UChar* str, const UChar* end, |
4749 | | UChar* s, const UChar* range, UChar* adjrange, |
4750 | | UChar** low, UChar** high) |
4751 | 41 | { |
4752 | 41 | UChar *p; |
4753 | 41 | size_t input_len = end - str; |
4754 | | |
4755 | 41 | if (reg->dmin > input_len) { |
4756 | 0 | return 0; |
4757 | 0 | } |
4758 | | |
4759 | 41 | p = s; |
4760 | | |
4761 | 41 | retry: |
4762 | 41 | switch (reg->optimize) { |
4763 | 24 | case ONIG_OPTIMIZE_EXACT: |
4764 | 24 | exact_method: |
4765 | 24 | p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, |
4766 | 24 | range, adjrange, end, p); |
4767 | 24 | break; |
4768 | | |
4769 | 10 | case ONIG_OPTIMIZE_EXACT_IC: |
4770 | 10 | case ONIG_OPTIMIZE_EXACT_BM_IC: |
4771 | 10 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: |
4772 | 10 | p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, |
4773 | 10 | reg->exact, reg->exact_end, |
4774 | 10 | range, adjrange, end, p); |
4775 | 10 | break; |
4776 | | |
4777 | 0 | case ONIG_OPTIMIZE_EXACT_BM: |
4778 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: |
4779 | 0 | goto exact_method; |
4780 | 0 | break; |
4781 | | |
4782 | 7 | case ONIG_OPTIMIZE_MAP: |
4783 | 7 | p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); |
4784 | 7 | break; |
4785 | 41 | } |
4786 | | |
4787 | 41 | if (p) { |
4788 | 36 | if (reg->sub_anchor) { |
4789 | 0 | UChar* prev; |
4790 | |
|
4791 | 0 | switch (reg->sub_anchor) { |
4792 | 0 | case ANCHOR_BEGIN_LINE: |
4793 | 0 | if (!ON_STR_BEGIN(p)) { |
4794 | 0 | prev = onigenc_get_prev_char_head(reg->enc, str, p, end); |
4795 | 0 | if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { |
4796 | 0 | p = prev; |
4797 | 0 | goto retry; |
4798 | 0 | } |
4799 | 0 | } |
4800 | 0 | break; |
4801 | | |
4802 | 0 | case ANCHOR_END_LINE: |
4803 | 0 | if (ON_STR_END(p)) { |
4804 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
4805 | | prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); |
4806 | | if (IS_NULL(prev)) goto fail; |
4807 | | if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { |
4808 | | p = prev; |
4809 | | goto retry; |
4810 | | } |
4811 | | #endif |
4812 | 0 | } |
4813 | 0 | else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { |
4814 | 0 | p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); |
4815 | 0 | if (IS_NULL(p)) goto fail; |
4816 | 0 | goto retry; |
4817 | 0 | } |
4818 | 0 | break; |
4819 | 0 | } |
4820 | 0 | } |
4821 | | |
4822 | 36 | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
4823 | 35 | if ((OnigDistance)(p - str) < reg->dmax) |
4824 | 0 | *low = (UChar* )str; |
4825 | 35 | else |
4826 | 35 | *low = p - reg->dmax; |
4827 | | |
4828 | 35 | if (reg->dmin != 0) { |
4829 | 0 | if ((OnigDistance)(p - str) < reg->dmin) |
4830 | 0 | *high = (UChar* )str; |
4831 | 0 | else |
4832 | 0 | *high = p - reg->dmin; |
4833 | 0 | } |
4834 | 35 | else { |
4835 | 35 | *high = p; |
4836 | 35 | } |
4837 | | |
4838 | 35 | *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); |
4839 | 35 | } |
4840 | | |
4841 | | #ifdef ONIG_DEBUG_SEARCH |
4842 | | fprintf(stderr, "backward_search_range: low: %d, high: %d\n", |
4843 | | (int )(*low - str), (int )(*high - str)); |
4844 | | #endif |
4845 | 36 | return 1; /* success */ |
4846 | 36 | } |
4847 | | |
4848 | 5 | fail: |
4849 | | #ifdef ONIG_DEBUG_SEARCH |
4850 | | fprintf(stderr, "backward_search_range: fail.\n"); |
4851 | | #endif |
4852 | 5 | return 0; /* fail */ |
4853 | 41 | } |
4854 | | |
4855 | | |
4856 | | extern OnigPosition |
4857 | | onig_search(regex_t* reg, const UChar* str, const UChar* end, |
4858 | | const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) |
4859 | 1.17M | { |
4860 | 1.17M | return onig_search_gpos(reg, str, end, start, start, range, region, option); |
4861 | 1.17M | } |
4862 | | |
4863 | | extern OnigPosition |
4864 | | onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, |
4865 | | const UChar* global_pos, |
4866 | | const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) |
4867 | 1.17M | { |
4868 | 1.17M | ptrdiff_t r; |
4869 | 1.17M | UChar *s, *prev; |
4870 | 1.17M | OnigMatchArg msa; |
4871 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
4872 | | const UChar *orig_start = start; |
4873 | | const UChar *orig_range = range; |
4874 | | #endif |
4875 | | |
4876 | | #ifdef ONIG_DEBUG_SEARCH |
4877 | | fprintf(stderr, |
4878 | | "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", |
4879 | | (uintptr_t )str, str, end - str, start - str, range - str); |
4880 | | #endif |
4881 | | |
4882 | 1.17M | if (region) { |
4883 | 1.17M | r = onig_region_resize_clear(region, reg->num_mem + 1); |
4884 | 1.17M | if (r) goto finish_no_msa; |
4885 | 1.17M | } |
4886 | | |
4887 | 1.17M | if (start > end || start < str) goto mismatch_no_msa; |
4888 | | |
4889 | | |
4890 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
4891 | | # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
4892 | | # define MATCH_AND_RETURN_CHECK(upper_range) \ |
4893 | | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ |
4894 | | switch (r) { \ |
4895 | | case ONIG_MISMATCH: \ |
4896 | | break; \ |
4897 | | case ONIGERR_TIMEOUT: \ |
4898 | | goto timeout; \ |
4899 | | default: \ |
4900 | | if (r >= 0) { \ |
4901 | | if (! IS_FIND_LONGEST(reg->options)) { \ |
4902 | | goto match; \ |
4903 | | }\ |
4904 | | }\ |
4905 | | else goto finish; /* error */ \ |
4906 | | } |
4907 | | # else |
4908 | | # define MATCH_AND_RETURN_CHECK(upper_range) \ |
4909 | | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ |
4910 | | switch (r) { \ |
4911 | | case ONIG_MISMATCH: \ |
4912 | | break; \ |
4913 | | case ONIGERR_TIMEOUT: \ |
4914 | | goto timeout; \ |
4915 | | default: \ |
4916 | | if (r >= 0) { \ |
4917 | | goto match; \ |
4918 | | }\ |
4919 | | else goto finish; /* error */ \ |
4920 | | } |
4921 | | # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ |
4922 | | #else |
4923 | 1.17M | # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
4924 | 1.17M | # define MATCH_AND_RETURN_CHECK(none) \ |
4925 | 7.67M | r = match_at(reg, str, end, s, prev, &msa);\ |
4926 | 7.67M | switch (r) { \ |
4927 | 6.50M | case ONIG_MISMATCH: \ |
4928 | 6.50M | break; \ |
4929 | 0 | case ONIGERR_TIMEOUT: \ |
4930 | 0 | goto timeout; \ |
4931 | 1.16M | default: \ |
4932 | 1.16M | if (r >= 0) { \ |
4933 | 1.16M | if (! IS_FIND_LONGEST(reg->options)) { \ |
4934 | 1.16M | goto match; \ |
4935 | 1.16M | } \ |
4936 | 1.16M | } \ |
4937 | 1.16M | else goto finish; /* error */ \ |
4938 | 7.67M | } |
4939 | | # else |
4940 | | # define MATCH_AND_RETURN_CHECK(none) \ |
4941 | | r = match_at(reg, str, end, s, prev, &msa);\ |
4942 | | switch (r) { \ |
4943 | | case ONIG_MISMATCH: \ |
4944 | | break; \ |
4945 | | case ONIGERR_TIMEOUT: \ |
4946 | | goto timeout; \ |
4947 | | default: \ |
4948 | | if (r >= 0) { \ |
4949 | | goto match; \ |
4950 | | } \ |
4951 | | else goto finish; /* error */ \ |
4952 | | } |
4953 | | # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ |
4954 | 1.17M | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ |
4955 | | |
4956 | | |
4957 | | /* anchor optimize: resume search range */ |
4958 | 1.17M | if (reg->anchor != 0 && str < end) { |
4959 | 19.9k | UChar *min_semi_end, *max_semi_end; |
4960 | | |
4961 | 19.9k | if (reg->anchor & ANCHOR_BEGIN_POSITION) { |
4962 | | /* search start-position only */ |
4963 | 3.97k | begin_position: |
4964 | 3.97k | if (range > start) |
4965 | 3.96k | { |
4966 | 3.96k | if (global_pos > start) |
4967 | 0 | { |
4968 | 0 | if (global_pos < range) |
4969 | 0 | range = global_pos + 1; |
4970 | 0 | } |
4971 | 3.96k | else |
4972 | 3.96k | range = start + 1; |
4973 | 3.96k | } |
4974 | 13 | else |
4975 | 13 | range = start; |
4976 | 3.97k | } |
4977 | 15.9k | else if (reg->anchor & ANCHOR_BEGIN_BUF) { |
4978 | | /* search str-position only */ |
4979 | 20 | if (range > start) { |
4980 | 17 | if (start != str) goto mismatch_no_msa; |
4981 | 17 | range = str + 1; |
4982 | 17 | } |
4983 | 3 | else { |
4984 | 3 | if (range <= str) { |
4985 | 0 | start = str; |
4986 | 0 | range = str; |
4987 | 0 | } |
4988 | 3 | else |
4989 | 3 | goto mismatch_no_msa; |
4990 | 3 | } |
4991 | 20 | } |
4992 | 15.9k | else if (reg->anchor & ANCHOR_END_BUF) { |
4993 | 21 | min_semi_end = max_semi_end = (UChar* )end; |
4994 | | |
4995 | 284 | end_buf: |
4996 | 284 | if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin) |
4997 | 0 | goto mismatch_no_msa; |
4998 | | |
4999 | 284 | if (range > start) { |
5000 | 276 | if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) { |
5001 | 50 | start = min_semi_end - reg->anchor_dmax; |
5002 | 50 | if (start < end) |
5003 | 40 | start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); |
5004 | 50 | } |
5005 | 276 | if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) { |
5006 | 255 | if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin) |
5007 | 0 | goto mismatch_no_msa; |
5008 | 255 | else |
5009 | 255 | range = max_semi_end - reg->anchor_dmin + 1; |
5010 | 255 | } |
5011 | | |
5012 | 276 | if (start > range) goto mismatch_no_msa; |
5013 | | /* If start == range, match with empty at end. |
5014 | | Backward search is used. */ |
5015 | 276 | } |
5016 | 8 | else { |
5017 | 8 | if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) { |
5018 | 0 | range = min_semi_end - reg->anchor_dmax; |
5019 | 0 | } |
5020 | 8 | if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) { |
5021 | 6 | if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin) |
5022 | 0 | goto mismatch_no_msa; |
5023 | 6 | else { |
5024 | 6 | start = max_semi_end - reg->anchor_dmin; |
5025 | 6 | start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); |
5026 | 6 | } |
5027 | 6 | } |
5028 | 8 | if (range > start) goto mismatch_no_msa; |
5029 | 8 | } |
5030 | 284 | } |
5031 | 15.9k | else if (reg->anchor & ANCHOR_SEMI_END_BUF) { |
5032 | 265 | UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); |
5033 | | |
5034 | 265 | max_semi_end = (UChar* )end; |
5035 | 265 | if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { |
5036 | 200 | min_semi_end = pre_end; |
5037 | | |
5038 | 200 | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
5039 | 200 | pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); |
5040 | 200 | if (IS_NOT_NULL(pre_end) && |
5041 | 200 | IS_NEWLINE_CRLF(reg->options) && |
5042 | 0 | ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { |
5043 | 0 | min_semi_end = pre_end; |
5044 | 0 | } |
5045 | 200 | #endif |
5046 | 200 | if (min_semi_end > str && start <= min_semi_end) { |
5047 | 198 | goto end_buf; |
5048 | 198 | } |
5049 | 200 | } |
5050 | 65 | else { |
5051 | 65 | min_semi_end = (UChar* )end; |
5052 | 65 | goto end_buf; |
5053 | 65 | } |
5054 | 265 | } |
5055 | 15.6k | else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { |
5056 | 11 | goto begin_position; |
5057 | 11 | } |
5058 | 19.9k | } |
5059 | 1.15M | else if (str == end) { /* empty string */ |
5060 | 8.80k | static const UChar address_for_empty_string[] = ""; |
5061 | | |
5062 | | #ifdef ONIG_DEBUG_SEARCH |
5063 | | fprintf(stderr, "onig_search: empty string.\n"); |
5064 | | #endif |
5065 | | |
5066 | 8.80k | if (reg->threshold_len == 0) { |
5067 | 3.56k | start = end = str = address_for_empty_string; |
5068 | 3.56k | s = (UChar* )start; |
5069 | 3.56k | prev = (UChar* )NULL; |
5070 | | |
5071 | 3.56k | MATCH_ARG_INIT(msa, option, region, start, start); |
5072 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
5073 | | msa.state_check_buff = (void* )0; |
5074 | | msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ |
5075 | | #endif |
5076 | 3.56k | MATCH_AND_RETURN_CHECK(end); |
5077 | 1.43k | goto mismatch; |
5078 | 3.56k | } |
5079 | 5.23k | goto mismatch_no_msa; |
5080 | 8.80k | } |
5081 | | |
5082 | | #ifdef ONIG_DEBUG_SEARCH |
5083 | | fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", |
5084 | | (int )(end - str), (int )(start - str), (int )(range - str)); |
5085 | | #endif |
5086 | | |
5087 | 1.16M | MATCH_ARG_INIT(msa, option, region, start, global_pos); |
5088 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
5089 | | { |
5090 | | ptrdiff_t offset = (MIN(start, range) - str); |
5091 | | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); |
5092 | | } |
5093 | | #endif |
5094 | | |
5095 | 1.16M | s = (UChar* )start; |
5096 | 1.16M | if (range > start) { /* forward search */ |
5097 | 1.16M | if (s > str) |
5098 | 1.15M | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
5099 | 12.7k | else |
5100 | 12.7k | prev = (UChar* )NULL; |
5101 | | |
5102 | 1.16M | if (reg->optimize != ONIG_OPTIMIZE_NONE) { |
5103 | 188k | UChar *sch_range, *low, *high, *low_prev; |
5104 | | |
5105 | 188k | if (reg->dmax != 0) { |
5106 | 82.1k | if (reg->dmax == ONIG_INFINITE_DISTANCE) |
5107 | 59.5k | sch_range = (UChar* )end; |
5108 | 22.5k | else { |
5109 | 22.5k | if ((OnigDistance)(end - range) < reg->dmax) |
5110 | 19.4k | sch_range = (UChar* )end; |
5111 | 3.13k | else { |
5112 | 3.13k | sch_range = (UChar* )range + reg->dmax; |
5113 | 3.13k | } |
5114 | 22.5k | } |
5115 | 82.1k | } |
5116 | 106k | else |
5117 | 106k | sch_range = (UChar* )range; |
5118 | | |
5119 | 188k | if ((end - start) < reg->threshold_len) |
5120 | 171 | goto mismatch; |
5121 | | |
5122 | 188k | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
5123 | 408k | do { |
5124 | 408k | if (! forward_search_range(reg, str, end, s, sch_range, |
5125 | 408k | &low, &high, &low_prev)) goto mismatch; |
5126 | 406k | if (s < low) { |
5127 | 47.4k | s = low; |
5128 | 47.4k | prev = low_prev; |
5129 | 47.4k | } |
5130 | 761k | while (s <= high) { |
5131 | 481k | MATCH_AND_RETURN_CHECK(orig_range); |
5132 | 354k | prev = s; |
5133 | 354k | s += enclen(reg->enc, s, end); |
5134 | 354k | } |
5135 | 406k | } while (s < range); |
5136 | 133 | goto mismatch; |
5137 | 128k | } |
5138 | 59.5k | else { /* check only. */ |
5139 | 59.5k | if (! forward_search_range(reg, str, end, s, sch_range, |
5140 | 59.5k | &low, &high, (UChar** )NULL)) goto mismatch; |
5141 | | |
5142 | 59.1k | if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { |
5143 | 20.2k | do { |
5144 | 20.2k | MATCH_AND_RETURN_CHECK(orig_range); |
5145 | 18.5k | prev = s; |
5146 | 18.5k | s += enclen(reg->enc, s, end); |
5147 | | |
5148 | 18.5k | if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { |
5149 | 58.0k | while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) |
5150 | 56.8k | && s < range) { |
5151 | 56.8k | prev = s; |
5152 | 56.8k | s += enclen(reg->enc, s, end); |
5153 | 56.8k | } |
5154 | 1.29k | } |
5155 | 18.5k | } while (s < range); |
5156 | 57 | goto mismatch; |
5157 | 1.73k | } |
5158 | 59.1k | } |
5159 | 188k | } |
5160 | | |
5161 | 7.16M | do { |
5162 | 7.16M | MATCH_AND_RETURN_CHECK(orig_range); |
5163 | 6.13M | prev = s; |
5164 | 6.13M | s += enclen(reg->enc, s, end); |
5165 | 6.13M | } while (s < range); |
5166 | | |
5167 | 2.64k | if (s == range) { /* because empty match with /$/. */ |
5168 | 2.64k | MATCH_AND_RETURN_CHECK(orig_range); |
5169 | 2.61k | } |
5170 | 2.64k | } |
5171 | 3.30k | else { /* backward search */ |
5172 | 3.30k | if (reg->optimize != ONIG_OPTIMIZE_NONE) { |
5173 | 226 | UChar *low, *high, *adjrange, *sch_start; |
5174 | 226 | const UChar *min_range; |
5175 | | |
5176 | 226 | if (range < end) |
5177 | 41 | adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); |
5178 | 185 | else |
5179 | 185 | adjrange = (UChar* )end; |
5180 | | |
5181 | 226 | if ((OnigDistance)(end - range) > reg->dmin) |
5182 | 41 | min_range = range + reg->dmin; |
5183 | 185 | else |
5184 | 185 | min_range = end; |
5185 | | |
5186 | 226 | if (reg->dmax != ONIG_INFINITE_DISTANCE && |
5187 | 146 | end - range >= reg->threshold_len) { |
5188 | 40 | do { |
5189 | 40 | if ((OnigDistance)(end - s) > reg->dmax) |
5190 | 14 | sch_start = s + reg->dmax; |
5191 | 26 | else |
5192 | 26 | sch_start = (UChar* )end; |
5193 | | |
5194 | 40 | if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, |
5195 | 40 | &low, &high) <= 0) |
5196 | 5 | goto mismatch; |
5197 | | |
5198 | 35 | if (s > high) |
5199 | 0 | s = high; |
5200 | | |
5201 | 106 | while (s >= low) { |
5202 | 71 | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
5203 | 71 | MATCH_AND_RETURN_CHECK(orig_start); |
5204 | 71 | s = prev; |
5205 | 71 | } |
5206 | 35 | } while (s >= range); |
5207 | 35 | goto mismatch; |
5208 | 40 | } |
5209 | 186 | else { /* check only. */ |
5210 | 186 | if (end - range < reg->threshold_len) goto mismatch; |
5211 | | |
5212 | 1 | if (reg->dmax != 0) { |
5213 | 1 | if (reg->dmax == ONIG_INFINITE_DISTANCE) |
5214 | 1 | sch_start = (UChar* )end; |
5215 | 0 | else { |
5216 | 0 | if ((OnigDistance)(end - s) > reg->dmax) { |
5217 | 0 | sch_start = s + reg->dmax; |
5218 | 0 | sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, |
5219 | 0 | start, sch_start, end); |
5220 | 0 | } else |
5221 | 0 | sch_start = (UChar* )end; |
5222 | 0 | } |
5223 | 1 | } |
5224 | 0 | else |
5225 | 0 | sch_start = (UChar* )s; |
5226 | | |
5227 | 1 | if (backward_search_range(reg, str, end, sch_start, min_range, adjrange, |
5228 | 1 | &low, &high) <= 0) goto mismatch; |
5229 | 1 | } |
5230 | 226 | } |
5231 | | |
5232 | 3.08k | do { |
5233 | 3.08k | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
5234 | 3.08k | MATCH_AND_RETURN_CHECK(orig_start); |
5235 | 224 | s = prev; |
5236 | 224 | } while (s >= range); |
5237 | 3.08k | } |
5238 | | |
5239 | 7.16k | mismatch: |
5240 | 7.16k | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
5241 | 7.16k | if (IS_FIND_LONGEST(reg->options)) { |
5242 | 0 | if (msa.best_len >= 0) { |
5243 | 0 | s = msa.best_s; |
5244 | 0 | goto match; |
5245 | 0 | } |
5246 | 0 | } |
5247 | 7.16k | #endif |
5248 | 7.16k | r = ONIG_MISMATCH; |
5249 | | |
5250 | 7.86k | finish: |
5251 | 7.86k | MATCH_ARG_FREE(msa); |
5252 | | |
5253 | | /* If result is mismatch and no FIND_NOT_EMPTY option, |
5254 | | then the region is not set in match_at(). */ |
5255 | 7.86k | if (IS_FIND_NOT_EMPTY(reg->options) && region) { |
5256 | 0 | onig_region_clear(region); |
5257 | 0 | } |
5258 | | |
5259 | | #ifdef ONIG_DEBUG |
5260 | | if (r != ONIG_MISMATCH) |
5261 | | fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); |
5262 | | #endif |
5263 | 7.86k | return r; |
5264 | | |
5265 | 5.24k | mismatch_no_msa: |
5266 | 5.24k | r = ONIG_MISMATCH; |
5267 | 5.24k | finish_no_msa: |
5268 | | #ifdef ONIG_DEBUG |
5269 | | if (r != ONIG_MISMATCH) |
5270 | | fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); |
5271 | | #endif |
5272 | 5.24k | return r; |
5273 | | |
5274 | 1.16M | match: |
5275 | 1.16M | MATCH_ARG_FREE(msa); |
5276 | 1.16M | return s - str; |
5277 | | |
5278 | 0 | timeout: |
5279 | 0 | MATCH_ARG_FREE(msa); |
5280 | 0 | return ONIGERR_TIMEOUT; |
5281 | 5.24k | } |
5282 | | |
5283 | | extern OnigPosition |
5284 | | onig_scan(regex_t* reg, const UChar* str, const UChar* end, |
5285 | | OnigRegion* region, OnigOptionType option, |
5286 | | int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), |
5287 | | void* callback_arg) |
5288 | 0 | { |
5289 | 0 | OnigPosition r; |
5290 | 0 | OnigPosition n; |
5291 | 0 | int rs; |
5292 | 0 | const UChar* start; |
5293 | |
|
5294 | 0 | n = 0; |
5295 | 0 | start = str; |
5296 | 0 | while (1) { |
5297 | 0 | r = onig_search(reg, str, end, start, end, region, option); |
5298 | 0 | if (r >= 0) { |
5299 | 0 | rs = scan_callback(n, r, region, callback_arg); |
5300 | 0 | n++; |
5301 | 0 | if (rs != 0) |
5302 | 0 | return rs; |
5303 | | |
5304 | 0 | if (region->end[0] == start - str) { |
5305 | 0 | if (start >= end) break; |
5306 | 0 | start += enclen(reg->enc, start, end); |
5307 | 0 | } |
5308 | 0 | else |
5309 | 0 | start = str + region->end[0]; |
5310 | | |
5311 | 0 | if (start > end) |
5312 | 0 | break; |
5313 | 0 | } |
5314 | 0 | else if (r == ONIG_MISMATCH) { |
5315 | 0 | break; |
5316 | 0 | } |
5317 | 0 | else { /* error */ |
5318 | 0 | return r; |
5319 | 0 | } |
5320 | 0 | } |
5321 | | |
5322 | 0 | return n; |
5323 | 0 | } |
5324 | | |
5325 | | extern OnigEncoding |
5326 | | onig_get_encoding(const regex_t* reg) |
5327 | 0 | { |
5328 | 0 | return reg->enc; |
5329 | 0 | } |
5330 | | |
5331 | | extern OnigOptionType |
5332 | | onig_get_options(const regex_t* reg) |
5333 | 0 | { |
5334 | 0 | return reg->options; |
5335 | 0 | } |
5336 | | |
5337 | | extern OnigCaseFoldType |
5338 | | onig_get_case_fold_flag(const regex_t* reg) |
5339 | 0 | { |
5340 | 0 | return reg->case_fold_flag; |
5341 | 0 | } |
5342 | | |
5343 | | extern const OnigSyntaxType* |
5344 | | onig_get_syntax(const regex_t* reg) |
5345 | 0 | { |
5346 | 0 | return reg->syntax; |
5347 | 0 | } |
5348 | | |
5349 | | extern int |
5350 | | onig_number_of_captures(const regex_t* reg) |
5351 | 0 | { |
5352 | 0 | return reg->num_mem; |
5353 | 0 | } |
5354 | | |
5355 | | extern int |
5356 | | onig_number_of_capture_histories(const regex_t* reg) |
5357 | 0 | { |
5358 | | #ifdef USE_CAPTURE_HISTORY |
5359 | | int i, n; |
5360 | | |
5361 | | n = 0; |
5362 | | for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { |
5363 | | if (BIT_STATUS_AT(reg->capture_history, i) != 0) |
5364 | | n++; |
5365 | | } |
5366 | | return n; |
5367 | | #else |
5368 | 0 | return 0; |
5369 | 0 | #endif |
5370 | 0 | } |
5371 | | |
5372 | | extern void |
5373 | | onig_copy_encoding(OnigEncodingType *to, OnigEncoding from) |
5374 | 0 | { |
5375 | 0 | *to = *from; |
5376 | 0 | } |