/src/fluent-bit/lib/onigmo/regexec.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************** |
2 | | regexec.c - Onigmo (Oniguruma-mod) (regular expression library) |
3 | | **********************************************************************/ |
4 | | /*- |
5 | | * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> |
6 | | * Copyright (c) 2011-2019 K.Takata <kentkt AT csc DOT jp> |
7 | | * All rights reserved. |
8 | | * |
9 | | * Redistribution and use in source and binary forms, with or without |
10 | | * modification, are permitted provided that the following conditions |
11 | | * are met: |
12 | | * 1. Redistributions of source code must retain the above copyright |
13 | | * notice, this list of conditions and the following disclaimer. |
14 | | * 2. Redistributions in binary form must reproduce the above copyright |
15 | | * notice, this list of conditions and the following disclaimer in the |
16 | | * documentation and/or other materials provided with the distribution. |
17 | | * |
18 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
22 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 | | * SUCH DAMAGE. |
29 | | */ |
30 | | |
31 | | #include "regint.h" |
32 | | |
33 | | #ifdef RUBY |
34 | | # undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
35 | | #else |
36 | | # define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
37 | | #endif |
38 | | |
39 | | #ifndef USE_TOKEN_THREADED_VM |
40 | | # ifdef __GNUC__ |
41 | | # define USE_TOKEN_THREADED_VM 1 |
42 | | # else |
43 | | # define USE_TOKEN_THREADED_VM 0 |
44 | | # endif |
45 | | #endif |
46 | | |
47 | | #ifdef RUBY |
48 | | # define ENC_DUMMY_FLAG (1<<24) |
49 | | static inline int |
50 | | rb_enc_asciicompat(OnigEncoding enc) |
51 | | { |
52 | | return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG); |
53 | | } |
54 | | # undef ONIGENC_IS_MBC_ASCII_WORD |
55 | | # define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \ |
56 | | (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \ |
57 | | onigenc_ascii_is_code_ctype( \ |
58 | | ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)) |
59 | | #endif /* RUBY */ |
60 | | |
61 | | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
62 | | # define ONIGENC_IS_MBC_CRNL(enc,p,end) \ |
63 | 0 | (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ |
64 | 0 | ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10) |
65 | | # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ |
66 | 816k | is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev)) |
67 | | static int |
68 | | is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start, |
69 | | const UChar *end, OnigOptionType option, int check_prev) |
70 | 816k | { |
71 | 816k | if (IS_NEWLINE_CRLF(option)) { |
72 | 0 | if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) { |
73 | 0 | if (check_prev) { |
74 | 0 | const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end); |
75 | 0 | if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d) |
76 | 0 | return 0; |
77 | 0 | else |
78 | 0 | return 1; |
79 | 0 | } |
80 | 0 | else |
81 | 0 | return 1; |
82 | 0 | } |
83 | 0 | else { |
84 | 0 | const UChar *pnext = p + enclen(enc, p, end); |
85 | 0 | if (pnext < end && |
86 | 0 | ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d && |
87 | 0 | ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a) |
88 | 0 | return 1; |
89 | 0 | if (ONIGENC_IS_MBC_NEWLINE(enc, p, end)) |
90 | 0 | return 1; |
91 | 0 | return 0; |
92 | 0 | } |
93 | 0 | } |
94 | 816k | else { |
95 | 816k | return ONIGENC_IS_MBC_NEWLINE(enc, p, end); |
96 | 816k | } |
97 | 816k | } |
98 | | #else /* USE_CRNL_AS_LINE_TERMINATOR */ |
99 | | # define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \ |
100 | | ONIGENC_IS_MBC_NEWLINE((enc), (p), (end)) |
101 | | #endif /* USE_CRNL_AS_LINE_TERMINATOR */ |
102 | | |
103 | | #ifdef USE_CAPTURE_HISTORY |
104 | | static void history_tree_free(OnigCaptureTreeNode* node); |
105 | | |
106 | | static void |
107 | | history_tree_clear(OnigCaptureTreeNode* node) |
108 | 0 | { |
109 | 0 | int i; |
110 | |
|
111 | 0 | if (IS_NOT_NULL(node)) { |
112 | 0 | for (i = 0; i < node->num_childs; i++) { |
113 | 0 | if (IS_NOT_NULL(node->childs[i])) { |
114 | 0 | history_tree_free(node->childs[i]); |
115 | 0 | } |
116 | 0 | } |
117 | 0 | for (i = 0; i < node->allocated; i++) { |
118 | 0 | node->childs[i] = (OnigCaptureTreeNode* )0; |
119 | 0 | } |
120 | 0 | node->num_childs = 0; |
121 | 0 | node->beg = ONIG_REGION_NOTPOS; |
122 | 0 | node->end = ONIG_REGION_NOTPOS; |
123 | 0 | node->group = -1; |
124 | 0 | xfree(node->childs); |
125 | 0 | node->childs = (OnigCaptureTreeNode** )0; |
126 | 0 | } |
127 | 0 | } |
128 | | |
129 | | static void |
130 | | history_tree_free(OnigCaptureTreeNode* node) |
131 | 0 | { |
132 | 0 | history_tree_clear(node); |
133 | 0 | xfree(node); |
134 | 0 | } |
135 | | |
136 | | static void |
137 | | history_root_free(OnigRegion* r) |
138 | 550 | { |
139 | 550 | if (IS_NOT_NULL(r->history_root)) { |
140 | 0 | history_tree_free(r->history_root); |
141 | 0 | r->history_root = (OnigCaptureTreeNode* )0; |
142 | 0 | } |
143 | 550 | } |
144 | | |
145 | | static OnigCaptureTreeNode* |
146 | | history_node_new(void) |
147 | 0 | { |
148 | 0 | OnigCaptureTreeNode* node; |
149 | |
|
150 | 0 | node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); |
151 | 0 | CHECK_NULL_RETURN(node); |
152 | 0 | node->childs = (OnigCaptureTreeNode** )0; |
153 | 0 | node->allocated = 0; |
154 | 0 | node->num_childs = 0; |
155 | 0 | node->group = -1; |
156 | 0 | node->beg = ONIG_REGION_NOTPOS; |
157 | 0 | node->end = ONIG_REGION_NOTPOS; |
158 | |
|
159 | 0 | return node; |
160 | 0 | } |
161 | | |
162 | | static int |
163 | | history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) |
164 | 0 | { |
165 | 0 | # define HISTORY_TREE_INIT_ALLOC_SIZE 8 |
166 | |
|
167 | 0 | if (parent->num_childs >= parent->allocated) { |
168 | 0 | int n, i; |
169 | |
|
170 | 0 | if (IS_NULL(parent->childs)) { |
171 | 0 | n = HISTORY_TREE_INIT_ALLOC_SIZE; |
172 | 0 | parent->childs = |
173 | 0 | (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); |
174 | 0 | CHECK_NULL_RETURN_MEMERR(parent->childs); |
175 | 0 | } |
176 | 0 | else { |
177 | 0 | OnigCaptureTreeNode** tmp; |
178 | 0 | n = parent->allocated * 2; |
179 | 0 | tmp = |
180 | 0 | (OnigCaptureTreeNode** )xrealloc(parent->childs, |
181 | 0 | sizeof(OnigCaptureTreeNode*) * n); |
182 | 0 | if (tmp == 0) { |
183 | 0 | history_tree_clear(parent); |
184 | 0 | return ONIGERR_MEMORY; |
185 | 0 | } |
186 | 0 | parent->childs = tmp; |
187 | 0 | } |
188 | 0 | for (i = parent->allocated; i < n; i++) { |
189 | 0 | parent->childs[i] = (OnigCaptureTreeNode* )0; |
190 | 0 | } |
191 | 0 | parent->allocated = n; |
192 | 0 | } |
193 | | |
194 | 0 | parent->childs[parent->num_childs] = child; |
195 | 0 | parent->num_childs++; |
196 | 0 | return 0; |
197 | 0 | } |
198 | | |
199 | | static OnigCaptureTreeNode* |
200 | | history_tree_clone(OnigCaptureTreeNode* node) |
201 | 0 | { |
202 | 0 | int i, r; |
203 | 0 | OnigCaptureTreeNode *clone, *child; |
204 | |
|
205 | 0 | clone = history_node_new(); |
206 | 0 | CHECK_NULL_RETURN(clone); |
207 | | |
208 | 0 | clone->beg = node->beg; |
209 | 0 | clone->end = node->end; |
210 | 0 | for (i = 0; i < node->num_childs; i++) { |
211 | 0 | child = history_tree_clone(node->childs[i]); |
212 | 0 | if (IS_NULL(child)) { |
213 | 0 | history_tree_free(clone); |
214 | 0 | return (OnigCaptureTreeNode* )0; |
215 | 0 | } |
216 | 0 | r = history_tree_add_child(clone, child); |
217 | 0 | if (r != 0) { |
218 | 0 | history_tree_free(child); |
219 | 0 | history_tree_free(clone); |
220 | 0 | return (OnigCaptureTreeNode* )0; |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | 0 | return clone; |
225 | 0 | } |
226 | | |
227 | | extern OnigCaptureTreeNode* |
228 | | onig_get_capture_tree(OnigRegion* region) |
229 | 0 | { |
230 | 0 | return region->history_root; |
231 | 0 | } |
232 | | #endif /* USE_CAPTURE_HISTORY */ |
233 | | |
234 | | extern void |
235 | | onig_region_clear(OnigRegion* region) |
236 | 275 | { |
237 | 275 | int i; |
238 | | |
239 | 1.65k | for (i = 0; i < region->num_regs; i++) { |
240 | 1.37k | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; |
241 | 1.37k | } |
242 | 275 | #ifdef USE_CAPTURE_HISTORY |
243 | 275 | history_root_free(region); |
244 | 275 | #endif |
245 | 275 | } |
246 | | |
247 | | extern int |
248 | | onig_region_resize(OnigRegion* region, int n) |
249 | 275 | { |
250 | 275 | region->num_regs = n; |
251 | | |
252 | 275 | if (n < ONIG_NREGION) |
253 | 275 | n = ONIG_NREGION; |
254 | | |
255 | 275 | if (region->allocated == 0) { |
256 | 275 | region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); |
257 | 275 | if (region->beg == 0) |
258 | 0 | return ONIGERR_MEMORY; |
259 | | |
260 | 275 | region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition)); |
261 | 275 | if (region->end == 0) { |
262 | 0 | xfree(region->beg); |
263 | 0 | return ONIGERR_MEMORY; |
264 | 0 | } |
265 | | |
266 | 275 | region->allocated = n; |
267 | 275 | } |
268 | 0 | else if (region->allocated < n) { |
269 | 0 | OnigPosition *tmp; |
270 | |
|
271 | 0 | region->allocated = 0; |
272 | 0 | tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition)); |
273 | 0 | if (tmp == 0) { |
274 | 0 | xfree(region->beg); |
275 | 0 | xfree(region->end); |
276 | 0 | return ONIGERR_MEMORY; |
277 | 0 | } |
278 | 0 | region->beg = tmp; |
279 | 0 | tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition)); |
280 | 0 | if (tmp == 0) { |
281 | 0 | xfree(region->beg); |
282 | 0 | xfree(region->end); |
283 | 0 | return ONIGERR_MEMORY; |
284 | 0 | } |
285 | 0 | region->end = tmp; |
286 | |
|
287 | 0 | region->allocated = n; |
288 | 0 | } |
289 | | |
290 | 275 | return 0; |
291 | 275 | } |
292 | | |
293 | | static int |
294 | | onig_region_resize_clear(OnigRegion* region, int n) |
295 | 275 | { |
296 | 275 | int r; |
297 | | |
298 | 275 | r = onig_region_resize(region, n); |
299 | 275 | if (r != 0) return r; |
300 | 275 | onig_region_clear(region); |
301 | 275 | return 0; |
302 | 275 | } |
303 | | |
304 | | extern int |
305 | | onig_region_set(OnigRegion* region, int at, int beg, int end) |
306 | 0 | { |
307 | 0 | if (at < 0) return ONIGERR_INVALID_ARGUMENT; |
308 | | |
309 | 0 | if (at >= region->allocated) { |
310 | 0 | int r = onig_region_resize(region, at + 1); |
311 | 0 | if (r < 0) return r; |
312 | 0 | } |
313 | | |
314 | 0 | region->beg[at] = beg; |
315 | 0 | region->end[at] = end; |
316 | 0 | return 0; |
317 | 0 | } |
318 | | |
319 | | extern void |
320 | | onig_region_init(OnigRegion* region) |
321 | 275 | { |
322 | 275 | region->num_regs = 0; |
323 | 275 | region->allocated = 0; |
324 | 275 | region->beg = (OnigPosition* )0; |
325 | 275 | region->end = (OnigPosition* )0; |
326 | 275 | #ifdef USE_CAPTURE_HISTORY |
327 | 275 | region->history_root = (OnigCaptureTreeNode* )0; |
328 | 275 | #endif |
329 | 275 | } |
330 | | |
331 | | extern OnigRegion* |
332 | | onig_region_new(void) |
333 | 275 | { |
334 | 275 | OnigRegion* r; |
335 | | |
336 | 275 | r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); |
337 | 275 | if (r) |
338 | 275 | onig_region_init(r); |
339 | 275 | return r; |
340 | 275 | } |
341 | | |
342 | | extern void |
343 | | onig_region_free(OnigRegion* r, int free_self) |
344 | 275 | { |
345 | 275 | if (r) { |
346 | 275 | if (r->allocated > 0) { |
347 | 275 | if (r->beg) xfree(r->beg); |
348 | 275 | if (r->end) xfree(r->end); |
349 | 275 | r->allocated = 0; |
350 | 275 | } |
351 | 275 | #ifdef USE_CAPTURE_HISTORY |
352 | 275 | history_root_free(r); |
353 | 275 | #endif |
354 | 275 | if (free_self) xfree(r); |
355 | 275 | } |
356 | 275 | } |
357 | | |
358 | | extern void |
359 | | onig_region_copy(OnigRegion* to, const OnigRegion* from) |
360 | 0 | { |
361 | 0 | #define RREGC_SIZE (sizeof(int) * from->num_regs) |
362 | 0 | int i, r; |
363 | |
|
364 | 0 | if (to == from) return; |
365 | | |
366 | 0 | r = onig_region_resize(to, from->num_regs); |
367 | 0 | if (r) return; |
368 | | |
369 | 0 | for (i = 0; i < from->num_regs; i++) { |
370 | 0 | to->beg[i] = from->beg[i]; |
371 | 0 | to->end[i] = from->end[i]; |
372 | 0 | } |
373 | 0 | to->num_regs = from->num_regs; |
374 | |
|
375 | 0 | #ifdef USE_CAPTURE_HISTORY |
376 | 0 | history_root_free(to); |
377 | |
|
378 | 0 | if (IS_NOT_NULL(from->history_root)) { |
379 | 0 | to->history_root = history_tree_clone(from->history_root); |
380 | 0 | } |
381 | 0 | #endif |
382 | 0 | } |
383 | | |
384 | | |
385 | | /** stack **/ |
386 | 400k | #define INVALID_STACK_INDEX -1 |
387 | | |
388 | | /* stack type */ |
389 | | /* used by normal-POP */ |
390 | | #define STK_ALT 0x0001 |
391 | 0 | #define STK_LOOK_BEHIND_NOT 0x0002 |
392 | 0 | #define STK_POS_NOT 0x0003 |
393 | | /* handled by normal-POP */ |
394 | 0 | #define STK_MEM_START 0x0100 |
395 | 0 | #define STK_MEM_END 0x8200 |
396 | 0 | #define STK_REPEAT_INC 0x0300 |
397 | | #define STK_STATE_CHECK_MARK 0x1000 |
398 | | /* avoided by normal-POP */ |
399 | 0 | #define STK_NULL_CHECK_START 0x3000 |
400 | 0 | #define STK_NULL_CHECK_END 0x5000 /* for recursive call */ |
401 | 0 | #define STK_MEM_END_MARK 0x8400 |
402 | 0 | #define STK_POS 0x0500 /* used when POP-POS */ |
403 | 0 | #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ |
404 | 0 | #define STK_REPEAT 0x0700 |
405 | 0 | #define STK_CALL_FRAME 0x0800 |
406 | 0 | #define STK_RETURN 0x0900 |
407 | 0 | #define STK_VOID 0x0a00 /* for fill a blank */ |
408 | 0 | #define STK_ABSENT_POS 0x0b00 /* for absent */ |
409 | 0 | #define STK_ABSENT 0x0c00 /* absent inner loop marker */ |
410 | | |
411 | | /* stack type check mask */ |
412 | 43.9k | #define STK_MASK_POP_USED 0x00ff |
413 | 0 | #define STK_MASK_TO_VOID_TARGET 0x10ff |
414 | 0 | #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ |
415 | | |
416 | | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
417 | 275 | # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ |
418 | 275 | (msa).stack_p = (void* )0;\ |
419 | 275 | (msa).options = (arg_option);\ |
420 | 275 | (msa).region = (arg_region);\ |
421 | 275 | (msa).start = (arg_start);\ |
422 | 275 | (msa).gpos = (arg_gpos);\ |
423 | 275 | (msa).best_len = ONIG_MISMATCH;\ |
424 | 275 | (msa).counter = 0;\ |
425 | 275 | } while(0) |
426 | | #else |
427 | | # define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\ |
428 | | (msa).stack_p = (void* )0;\ |
429 | | (msa).options = (arg_option);\ |
430 | | (msa).region = (arg_region);\ |
431 | | (msa).start = (arg_start);\ |
432 | | (msa).gpos = (arg_gpos);\ |
433 | | (msa).counter = 0;\ |
434 | | } while(0) |
435 | | #endif |
436 | | |
437 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
438 | | |
439 | | # define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 |
440 | | |
441 | | # define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ |
442 | | if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ |
443 | | unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ |
444 | | offset = ((offset) * (state_num)) >> 3;\ |
445 | | if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ |
446 | | if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ |
447 | | (msa).state_check_buff = (void* )xmalloc(size);\ |
448 | | CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ |
449 | | }\ |
450 | | else \ |
451 | | (msa).state_check_buff = (void* )xalloca(size);\ |
452 | | xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ |
453 | | (size_t )(size - (offset))); \ |
454 | | (msa).state_check_buff_size = size;\ |
455 | | }\ |
456 | | else {\ |
457 | | (msa).state_check_buff = (void* )0;\ |
458 | | (msa).state_check_buff_size = 0;\ |
459 | | }\ |
460 | | }\ |
461 | | else {\ |
462 | | (msa).state_check_buff = (void* )0;\ |
463 | | (msa).state_check_buff_size = 0;\ |
464 | | }\ |
465 | | } while(0) |
466 | | |
467 | | # define MATCH_ARG_FREE(msa) do {\ |
468 | | if ((msa).stack_p) xfree((msa).stack_p);\ |
469 | | if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ |
470 | | if ((msa).state_check_buff) xfree((msa).state_check_buff);\ |
471 | | }\ |
472 | | } while(0) |
473 | | #else /* USE_COMBINATION_EXPLOSION_CHECK */ |
474 | 275 | # define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) |
475 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
476 | | |
477 | | |
478 | | |
479 | 78.8k | #define MAX_PTR_NUM 100 |
480 | | |
481 | 39.4k | #define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\ |
482 | 39.4k | if (ptr_num > MAX_PTR_NUM) {\ |
483 | 0 | alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\ |
484 | 0 | heap_addr = alloc_addr;\ |
485 | 0 | if (msa->stack_p) {\ |
486 | 0 | stk_alloc = (OnigStackType* )(msa->stack_p);\ |
487 | 0 | stk_base = stk_alloc;\ |
488 | 0 | stk = stk_base;\ |
489 | 0 | stk_end = stk_base + msa->stack_n;\ |
490 | 0 | } else {\ |
491 | 0 | stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\ |
492 | 0 | stk_base = stk_alloc;\ |
493 | 0 | stk = stk_base;\ |
494 | 0 | stk_end = stk_base + (stack_num);\ |
495 | 0 | }\ |
496 | 39.4k | } else if (msa->stack_p) {\ |
497 | 0 | alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\ |
498 | 0 | heap_addr = NULL;\ |
499 | 0 | stk_alloc = (OnigStackType* )(msa->stack_p);\ |
500 | 0 | stk_base = stk_alloc;\ |
501 | 0 | stk = stk_base;\ |
502 | 0 | stk_end = stk_base + msa->stack_n;\ |
503 | 0 | }\ |
504 | 39.4k | else {\ |
505 | 39.4k | alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\ |
506 | 39.4k | + sizeof(OnigStackType) * (stack_num));\ |
507 | 39.4k | heap_addr = NULL;\ |
508 | 39.4k | stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\ |
509 | 39.4k | stk_base = stk_alloc;\ |
510 | 39.4k | stk = stk_base;\ |
511 | 39.4k | stk_end = stk_base + (stack_num);\ |
512 | 39.4k | }\ |
513 | 39.4k | } while(0) |
514 | | |
515 | 39.4k | #define STACK_SAVE do{\ |
516 | 39.4k | if (stk_base != stk_alloc) {\ |
517 | 114 | msa->stack_p = stk_base;\ |
518 | 114 | msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ |
519 | 114 | };\ |
520 | 39.4k | } while(0) |
521 | | |
522 | | static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; |
523 | | |
524 | | extern unsigned int |
525 | | onig_get_match_stack_limit_size(void) |
526 | 0 | { |
527 | 0 | return MatchStackLimitSize; |
528 | 0 | } |
529 | | |
530 | | extern int |
531 | | onig_set_match_stack_limit_size(unsigned int size) |
532 | 0 | { |
533 | 0 | MatchStackLimitSize = size; |
534 | 0 | return 0; |
535 | 0 | } |
536 | | |
537 | | static int |
538 | | stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, |
539 | | OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) |
540 | 519 | { |
541 | 519 | size_t n; |
542 | 519 | OnigStackType *x, *stk_base, *stk_end, *stk; |
543 | | |
544 | 519 | stk_base = *arg_stk_base; |
545 | 519 | stk_end = *arg_stk_end; |
546 | 519 | stk = *arg_stk; |
547 | | |
548 | 519 | n = stk_end - stk_base; |
549 | 519 | if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { |
550 | 114 | x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); |
551 | 114 | if (IS_NULL(x)) { |
552 | 0 | STACK_SAVE; |
553 | 0 | return ONIGERR_MEMORY; |
554 | 0 | } |
555 | 114 | xmemcpy(x, stk_base, n * sizeof(OnigStackType)); |
556 | 114 | n *= 2; |
557 | 114 | } |
558 | 405 | else { |
559 | 405 | unsigned int limit_size = MatchStackLimitSize; |
560 | 405 | n *= 2; |
561 | 405 | if (limit_size != 0 && n > limit_size) { |
562 | 0 | if ((unsigned int )(stk_end - stk_base) == limit_size) |
563 | 0 | return ONIGERR_MATCH_STACK_LIMIT_OVER; |
564 | 0 | else |
565 | 0 | n = limit_size; |
566 | 0 | } |
567 | 405 | x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); |
568 | 405 | if (IS_NULL(x)) { |
569 | 0 | STACK_SAVE; |
570 | 0 | return ONIGERR_MEMORY; |
571 | 0 | } |
572 | 405 | } |
573 | 519 | *arg_stk = x + (stk - stk_base); |
574 | 519 | *arg_stk_base = x; |
575 | 519 | *arg_stk_end = x + n; |
576 | 519 | return 0; |
577 | 519 | } |
578 | | |
579 | 1.11M | #define STACK_ENSURE(n) do {\ |
580 | 1.11M | if (stk_end - stk < (n)) {\ |
581 | 519 | int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ |
582 | 519 | if (r != 0) {\ |
583 | 0 | STACK_SAVE;\ |
584 | 0 | if (xmalloc_base) xfree(xmalloc_base);\ |
585 | 0 | return r;\ |
586 | 0 | }\ |
587 | 519 | }\ |
588 | 1.11M | } while(0) |
589 | | |
590 | 0 | #define STACK_AT(index) (stk_base + (index)) |
591 | 0 | #define GET_STACK_INDEX(stk) ((stk) - stk_base) |
592 | | |
593 | 0 | #define STACK_PUSH_TYPE(stack_type) do {\ |
594 | 0 | STACK_ENSURE(1);\ |
595 | 0 | stk->type = (stack_type);\ |
596 | 0 | STACK_INC;\ |
597 | 0 | } while(0) |
598 | | |
599 | 0 | #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) |
600 | | |
601 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
602 | | # define STATE_CHECK_POS(s,snum) \ |
603 | | (((s) - str) * num_comb_exp_check + ((snum) - 1)) |
604 | | # define STATE_CHECK_VAL(v,snum) do {\ |
605 | | if (state_check_buff != NULL) {\ |
606 | | ptrdiff_t x = STATE_CHECK_POS(s,snum);\ |
607 | | (v) = state_check_buff[x/8] & (1<<(x%8));\ |
608 | | }\ |
609 | | else (v) = 0;\ |
610 | | } while(0) |
611 | | |
612 | | |
613 | | # define ELSE_IF_STATE_CHECK_MARK(stk) \ |
614 | | else if ((stk)->type == STK_STATE_CHECK_MARK) { \ |
615 | | ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ |
616 | | state_check_buff[x/8] |= (1<<(x%8)); \ |
617 | | } |
618 | | |
619 | | # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ |
620 | | STACK_ENSURE(1);\ |
621 | | stk->type = (stack_type);\ |
622 | | stk->u.state.pcode = (pat);\ |
623 | | stk->u.state.pstr = (s);\ |
624 | | stk->u.state.pstr_prev = (sprev);\ |
625 | | stk->u.state.state_check = 0;\ |
626 | | stk->u.state.pkeep = (keep);\ |
627 | | STACK_INC;\ |
628 | | } while(0) |
629 | | |
630 | | # define STACK_PUSH_ENSURED(stack_type,pat) do {\ |
631 | | stk->type = (stack_type);\ |
632 | | stk->u.state.pcode = (pat);\ |
633 | | stk->u.state.state_check = 0;\ |
634 | | STACK_INC;\ |
635 | | } while(0) |
636 | | |
637 | | # define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\ |
638 | | STACK_ENSURE(1);\ |
639 | | stk->type = STK_ALT;\ |
640 | | stk->u.state.pcode = (pat);\ |
641 | | stk->u.state.pstr = (s);\ |
642 | | stk->u.state.pstr_prev = (sprev);\ |
643 | | stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ |
644 | | stk->u.state.pkeep = (keep);\ |
645 | | STACK_INC;\ |
646 | | } while(0) |
647 | | |
648 | | # define STACK_PUSH_STATE_CHECK(s,snum) do {\ |
649 | | if (state_check_buff != NULL) {\ |
650 | | STACK_ENSURE(1);\ |
651 | | stk->type = STK_STATE_CHECK_MARK;\ |
652 | | stk->u.state.pstr = (s);\ |
653 | | stk->u.state.state_check = (snum);\ |
654 | | STACK_INC;\ |
655 | | }\ |
656 | | } while(0) |
657 | | |
658 | | #else /* USE_COMBINATION_EXPLOSION_CHECK */ |
659 | | |
660 | | # define ELSE_IF_STATE_CHECK_MARK(stk) |
661 | | |
662 | 1.11M | # define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\ |
663 | 1.11M | STACK_ENSURE(1);\ |
664 | 1.11M | stk->type = (stack_type);\ |
665 | 1.11M | stk->u.state.pcode = (pat);\ |
666 | 1.11M | stk->u.state.pstr = (s);\ |
667 | 1.11M | stk->u.state.pstr_prev = (sprev);\ |
668 | 1.11M | stk->u.state.pkeep = (keep);\ |
669 | 1.11M | STACK_INC;\ |
670 | 1.11M | } while(0) |
671 | | |
672 | 39.4k | # define STACK_PUSH_ENSURED(stack_type,pat) do {\ |
673 | 39.4k | stk->type = (stack_type);\ |
674 | 39.4k | stk->u.state.pcode = (pat);\ |
675 | 39.4k | STACK_INC;\ |
676 | 39.4k | } while(0) |
677 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
678 | | |
679 | 1.11M | #define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep) |
680 | 0 | #define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep) |
681 | 0 | #define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep) |
682 | 0 | #define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT) |
683 | 0 | #define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) |
684 | | #define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \ |
685 | 0 | STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep) |
686 | | |
687 | 0 | #define STACK_PUSH_REPEAT(id, pat) do {\ |
688 | 0 | STACK_ENSURE(1);\ |
689 | 0 | stk->type = STK_REPEAT;\ |
690 | 0 | stk->u.repeat.num = (id);\ |
691 | 0 | stk->u.repeat.pcode = (pat);\ |
692 | 0 | stk->u.repeat.count = 0;\ |
693 | 0 | STACK_INC;\ |
694 | 0 | } while(0) |
695 | | |
696 | 0 | #define STACK_PUSH_REPEAT_INC(sindex) do {\ |
697 | 0 | STACK_ENSURE(1);\ |
698 | 0 | stk->type = STK_REPEAT_INC;\ |
699 | 0 | stk->u.repeat_inc.si = (sindex);\ |
700 | 0 | STACK_INC;\ |
701 | 0 | } while(0) |
702 | | |
703 | 0 | #define STACK_PUSH_MEM_START(mnum, s) do {\ |
704 | 0 | STACK_ENSURE(1);\ |
705 | 0 | stk->type = STK_MEM_START;\ |
706 | 0 | stk->u.mem.num = (mnum);\ |
707 | 0 | stk->u.mem.pstr = (s);\ |
708 | 0 | stk->u.mem.start = mem_start_stk[mnum];\ |
709 | 0 | stk->u.mem.end = mem_end_stk[mnum];\ |
710 | 0 | mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ |
711 | 0 | mem_end_stk[mnum] = INVALID_STACK_INDEX;\ |
712 | 0 | STACK_INC;\ |
713 | 0 | } while(0) |
714 | | |
715 | 0 | #define STACK_PUSH_MEM_END(mnum, s) do {\ |
716 | 0 | STACK_ENSURE(1);\ |
717 | 0 | stk->type = STK_MEM_END;\ |
718 | 0 | stk->u.mem.num = (mnum);\ |
719 | 0 | stk->u.mem.pstr = (s);\ |
720 | 0 | stk->u.mem.start = mem_start_stk[mnum];\ |
721 | 0 | stk->u.mem.end = mem_end_stk[mnum];\ |
722 | 0 | mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ |
723 | 0 | STACK_INC;\ |
724 | 0 | } while(0) |
725 | | |
726 | 0 | #define STACK_PUSH_MEM_END_MARK(mnum) do {\ |
727 | 0 | STACK_ENSURE(1);\ |
728 | 0 | stk->type = STK_MEM_END_MARK;\ |
729 | 0 | stk->u.mem.num = (mnum);\ |
730 | 0 | STACK_INC;\ |
731 | 0 | } while(0) |
732 | | |
733 | 0 | #define STACK_GET_MEM_START(mnum, k) do {\ |
734 | 0 | int level = 0;\ |
735 | 0 | k = stk;\ |
736 | 0 | while (k > stk_base) {\ |
737 | 0 | k--;\ |
738 | 0 | if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ |
739 | 0 | && k->u.mem.num == (mnum)) {\ |
740 | 0 | level++;\ |
741 | 0 | }\ |
742 | 0 | else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ |
743 | 0 | if (level == 0) break;\ |
744 | 0 | level--;\ |
745 | 0 | }\ |
746 | 0 | }\ |
747 | 0 | } while(0) |
748 | | |
749 | | #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ |
750 | | int level = 0;\ |
751 | | while (k < stk) {\ |
752 | | if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ |
753 | | if (level == 0) (start) = k->u.mem.pstr;\ |
754 | | level++;\ |
755 | | }\ |
756 | | else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ |
757 | | level--;\ |
758 | | if (level == 0) {\ |
759 | | (end) = k->u.mem.pstr;\ |
760 | | break;\ |
761 | | }\ |
762 | | }\ |
763 | | k++;\ |
764 | | }\ |
765 | | } while(0) |
766 | | |
767 | 0 | #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ |
768 | 0 | STACK_ENSURE(1);\ |
769 | 0 | stk->type = STK_NULL_CHECK_START;\ |
770 | 0 | stk->u.null_check.num = (cnum);\ |
771 | 0 | stk->u.null_check.pstr = (s);\ |
772 | 0 | STACK_INC;\ |
773 | 0 | } while(0) |
774 | | |
775 | 0 | #define STACK_PUSH_NULL_CHECK_END(cnum) do {\ |
776 | 0 | STACK_ENSURE(1);\ |
777 | 0 | stk->type = STK_NULL_CHECK_END;\ |
778 | 0 | stk->u.null_check.num = (cnum);\ |
779 | 0 | STACK_INC;\ |
780 | 0 | } while(0) |
781 | | |
782 | 0 | #define STACK_PUSH_CALL_FRAME(pat) do {\ |
783 | 0 | STACK_ENSURE(1);\ |
784 | 0 | stk->type = STK_CALL_FRAME;\ |
785 | 0 | stk->u.call_frame.ret_addr = (pat);\ |
786 | 0 | STACK_INC;\ |
787 | 0 | } while(0) |
788 | | |
789 | 0 | #define STACK_PUSH_RETURN do {\ |
790 | 0 | STACK_ENSURE(1);\ |
791 | 0 | stk->type = STK_RETURN;\ |
792 | 0 | STACK_INC;\ |
793 | 0 | } while(0) |
794 | | |
795 | 0 | #define STACK_PUSH_ABSENT_POS(start, end) do {\ |
796 | 0 | STACK_ENSURE(1);\ |
797 | 0 | stk->type = STK_ABSENT_POS;\ |
798 | 0 | stk->u.absent_pos.abs_pstr = (start);\ |
799 | 0 | stk->u.absent_pos.end_pstr = (end);\ |
800 | 0 | STACK_INC;\ |
801 | 0 | } while(0) |
802 | | |
803 | | |
804 | | #ifdef ONIG_DEBUG |
805 | | # define STACK_BASE_CHECK(p, at) \ |
806 | | if ((p) < stk_base) {\ |
807 | | fprintf(stderr, "at %s\n", at);\ |
808 | | goto stack_error;\ |
809 | | } |
810 | | #else |
811 | | # define STACK_BASE_CHECK(p, at) |
812 | | #endif |
813 | | |
814 | 290k | #define STACK_POP_ONE do {\ |
815 | 290k | stk--;\ |
816 | 290k | STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ |
817 | 290k | } while(0) |
818 | | |
819 | 43.9k | #define STACK_POP do {\ |
820 | 43.9k | switch (pop_level) {\ |
821 | 43.9k | case STACK_POP_LEVEL_FREE:\ |
822 | 43.9k | while (1) {\ |
823 | 43.9k | stk--;\ |
824 | 43.9k | STACK_BASE_CHECK(stk, "STACK_POP"); \ |
825 | 43.9k | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
826 | 43.9k | ELSE_IF_STATE_CHECK_MARK(stk);\ |
827 | 0 | }\ |
828 | 43.9k | break;\ |
829 | 0 | case STACK_POP_LEVEL_MEM_START:\ |
830 | 0 | while (1) {\ |
831 | 0 | stk--;\ |
832 | 0 | STACK_BASE_CHECK(stk, "STACK_POP 2"); \ |
833 | 0 | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
834 | 0 | else if (stk->type == STK_MEM_START) {\ |
835 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
836 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
837 | 0 | }\ |
838 | 0 | ELSE_IF_STATE_CHECK_MARK(stk);\ |
839 | 0 | }\ |
840 | 0 | break;\ |
841 | 0 | default:\ |
842 | 0 | while (1) {\ |
843 | 0 | stk--;\ |
844 | 0 | STACK_BASE_CHECK(stk, "STACK_POP 3"); \ |
845 | 0 | if ((stk->type & STK_MASK_POP_USED) != 0) break;\ |
846 | 0 | else if (stk->type == STK_MEM_START) {\ |
847 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
848 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
849 | 0 | }\ |
850 | 0 | else if (stk->type == STK_REPEAT_INC) {\ |
851 | 0 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
852 | 0 | }\ |
853 | 0 | else if (stk->type == STK_MEM_END) {\ |
854 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
855 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
856 | 0 | }\ |
857 | 0 | ELSE_IF_STATE_CHECK_MARK(stk);\ |
858 | 0 | }\ |
859 | 0 | break;\ |
860 | 43.9k | }\ |
861 | 43.9k | } while(0) |
862 | | |
863 | 0 | #define STACK_POP_TIL_POS_NOT do {\ |
864 | 0 | while (1) {\ |
865 | 0 | stk--;\ |
866 | 0 | STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ |
867 | 0 | if (stk->type == STK_POS_NOT) break;\ |
868 | 0 | else if (stk->type == STK_MEM_START) {\ |
869 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
870 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
871 | 0 | }\ |
872 | 0 | else if (stk->type == STK_REPEAT_INC) {\ |
873 | 0 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
874 | 0 | }\ |
875 | 0 | else if (stk->type == STK_MEM_END) {\ |
876 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
877 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
878 | 0 | }\ |
879 | 0 | ELSE_IF_STATE_CHECK_MARK(stk);\ |
880 | 0 | }\ |
881 | 0 | } while(0) |
882 | | |
883 | 0 | #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ |
884 | 0 | while (1) {\ |
885 | 0 | stk--;\ |
886 | 0 | STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ |
887 | 0 | if (stk->type == STK_LOOK_BEHIND_NOT) break;\ |
888 | 0 | else if (stk->type == STK_MEM_START) {\ |
889 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
890 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
891 | 0 | }\ |
892 | 0 | else if (stk->type == STK_REPEAT_INC) {\ |
893 | 0 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
894 | 0 | }\ |
895 | 0 | else if (stk->type == STK_MEM_END) {\ |
896 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
897 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
898 | 0 | }\ |
899 | 0 | ELSE_IF_STATE_CHECK_MARK(stk);\ |
900 | 0 | }\ |
901 | 0 | } while(0) |
902 | | |
903 | 0 | #define STACK_POP_TIL_ABSENT do {\ |
904 | 0 | while (1) {\ |
905 | 0 | stk--;\ |
906 | 0 | STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \ |
907 | 0 | if (stk->type == STK_ABSENT) break;\ |
908 | 0 | else if (stk->type == STK_MEM_START) {\ |
909 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
910 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
911 | 0 | }\ |
912 | 0 | else if (stk->type == STK_REPEAT_INC) {\ |
913 | 0 | STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ |
914 | 0 | }\ |
915 | 0 | else if (stk->type == STK_MEM_END) {\ |
916 | 0 | mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ |
917 | 0 | mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ |
918 | 0 | }\ |
919 | 0 | ELSE_IF_STATE_CHECK_MARK(stk);\ |
920 | 0 | }\ |
921 | 0 | } while(0) |
922 | | |
923 | 0 | #define STACK_POP_ABSENT_POS(start, end) do {\ |
924 | 0 | stk--;\ |
925 | 0 | STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \ |
926 | 0 | (start) = stk->u.absent_pos.abs_pstr;\ |
927 | 0 | (end) = stk->u.absent_pos.end_pstr;\ |
928 | 0 | } while(0) |
929 | | |
930 | 0 | #define STACK_POS_END(k) do {\ |
931 | 0 | k = stk;\ |
932 | 0 | while (1) {\ |
933 | 0 | k--;\ |
934 | 0 | STACK_BASE_CHECK(k, "STACK_POS_END"); \ |
935 | 0 | if (IS_TO_VOID_TARGET(k)) {\ |
936 | 0 | k->type = STK_VOID;\ |
937 | 0 | }\ |
938 | 0 | else if (k->type == STK_POS) {\ |
939 | 0 | k->type = STK_VOID;\ |
940 | 0 | break;\ |
941 | 0 | }\ |
942 | 0 | }\ |
943 | 0 | } while(0) |
944 | | |
945 | 0 | #define STACK_STOP_BT_END do {\ |
946 | 0 | OnigStackType *k = stk;\ |
947 | 0 | while (1) {\ |
948 | 0 | k--;\ |
949 | 0 | STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ |
950 | 0 | if (IS_TO_VOID_TARGET(k)) {\ |
951 | 0 | k->type = STK_VOID;\ |
952 | 0 | }\ |
953 | 0 | else if (k->type == STK_STOP_BT) {\ |
954 | 0 | k->type = STK_VOID;\ |
955 | 0 | break;\ |
956 | 0 | }\ |
957 | 0 | }\ |
958 | 0 | } while(0) |
959 | | |
960 | 0 | #define STACK_NULL_CHECK(isnull,id,s) do {\ |
961 | 0 | OnigStackType* k = stk;\ |
962 | 0 | while (1) {\ |
963 | 0 | k--;\ |
964 | 0 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ |
965 | 0 | if (k->type == STK_NULL_CHECK_START) {\ |
966 | 0 | if (k->u.null_check.num == (id)) {\ |
967 | 0 | (isnull) = (k->u.null_check.pstr == (s));\ |
968 | 0 | break;\ |
969 | 0 | }\ |
970 | 0 | }\ |
971 | 0 | }\ |
972 | 0 | } while(0) |
973 | | |
974 | | #define STACK_NULL_CHECK_REC(isnull,id,s) do {\ |
975 | | int level = 0;\ |
976 | | OnigStackType* k = stk;\ |
977 | | while (1) {\ |
978 | | k--;\ |
979 | | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ |
980 | | if (k->type == STK_NULL_CHECK_START) {\ |
981 | | if (k->u.null_check.num == (id)) {\ |
982 | | if (level == 0) {\ |
983 | | (isnull) = (k->u.null_check.pstr == (s));\ |
984 | | break;\ |
985 | | }\ |
986 | | else level--;\ |
987 | | }\ |
988 | | }\ |
989 | | else if (k->type == STK_NULL_CHECK_END) {\ |
990 | | level++;\ |
991 | | }\ |
992 | | }\ |
993 | | } while(0) |
994 | | |
995 | 0 | #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ |
996 | 0 | OnigStackType* k = stk;\ |
997 | 0 | while (1) {\ |
998 | 0 | k--;\ |
999 | 0 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ |
1000 | 0 | if (k->type == STK_NULL_CHECK_START) {\ |
1001 | 0 | if (k->u.null_check.num == (id)) {\ |
1002 | 0 | if (k->u.null_check.pstr != (s)) {\ |
1003 | 0 | (isnull) = 0;\ |
1004 | 0 | break;\ |
1005 | 0 | }\ |
1006 | 0 | else {\ |
1007 | 0 | UChar* endp;\ |
1008 | 0 | (isnull) = 1;\ |
1009 | 0 | while (k < stk) {\ |
1010 | 0 | if (k->type == STK_MEM_START) {\ |
1011 | 0 | if (k->u.mem.end == INVALID_STACK_INDEX) {\ |
1012 | 0 | (isnull) = 0; break;\ |
1013 | 0 | }\ |
1014 | 0 | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ |
1015 | 0 | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ |
1016 | 0 | else\ |
1017 | 0 | endp = (UChar* )k->u.mem.end;\ |
1018 | 0 | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ |
1019 | 0 | (isnull) = 0; break;\ |
1020 | 0 | }\ |
1021 | 0 | else if (endp != s) {\ |
1022 | 0 | (isnull) = -1; /* empty, but position changed */ \ |
1023 | 0 | }\ |
1024 | 0 | }\ |
1025 | 0 | k++;\ |
1026 | 0 | }\ |
1027 | 0 | break;\ |
1028 | 0 | }\ |
1029 | 0 | }\ |
1030 | 0 | }\ |
1031 | 0 | }\ |
1032 | 0 | } while(0) |
1033 | | |
1034 | 0 | #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ |
1035 | 0 | int level = 0;\ |
1036 | 0 | OnigStackType* k = stk;\ |
1037 | 0 | while (1) {\ |
1038 | 0 | k--;\ |
1039 | 0 | STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ |
1040 | 0 | if (k->type == STK_NULL_CHECK_START) {\ |
1041 | 0 | if (k->u.null_check.num == (id)) {\ |
1042 | 0 | if (level == 0) {\ |
1043 | 0 | if (k->u.null_check.pstr != (s)) {\ |
1044 | 0 | (isnull) = 0;\ |
1045 | 0 | break;\ |
1046 | 0 | }\ |
1047 | 0 | else {\ |
1048 | 0 | UChar* endp;\ |
1049 | 0 | (isnull) = 1;\ |
1050 | 0 | while (k < stk) {\ |
1051 | 0 | if (k->type == STK_MEM_START) {\ |
1052 | 0 | if (k->u.mem.end == INVALID_STACK_INDEX) {\ |
1053 | 0 | (isnull) = 0; break;\ |
1054 | 0 | }\ |
1055 | 0 | if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ |
1056 | 0 | endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ |
1057 | 0 | else\ |
1058 | 0 | endp = (UChar* )k->u.mem.end;\ |
1059 | 0 | if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ |
1060 | 0 | (isnull) = 0; break;\ |
1061 | 0 | }\ |
1062 | 0 | else if (endp != s) {\ |
1063 | 0 | (isnull) = -1; /* empty, but position changed */ \ |
1064 | 0 | }\ |
1065 | 0 | }\ |
1066 | 0 | k++;\ |
1067 | 0 | }\ |
1068 | 0 | break;\ |
1069 | 0 | }\ |
1070 | 0 | }\ |
1071 | 0 | else {\ |
1072 | 0 | level--;\ |
1073 | 0 | }\ |
1074 | 0 | }\ |
1075 | 0 | }\ |
1076 | 0 | else if (k->type == STK_NULL_CHECK_END) {\ |
1077 | 0 | if (k->u.null_check.num == (id)) level++;\ |
1078 | 0 | }\ |
1079 | 0 | }\ |
1080 | 0 | } while(0) |
1081 | | |
1082 | 0 | #define STACK_GET_REPEAT(id, k) do {\ |
1083 | 0 | int level = 0;\ |
1084 | 0 | k = stk;\ |
1085 | 0 | while (1) {\ |
1086 | 0 | k--;\ |
1087 | 0 | STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ |
1088 | 0 | if (k->type == STK_REPEAT) {\ |
1089 | 0 | if (level == 0) {\ |
1090 | 0 | if (k->u.repeat.num == (id)) {\ |
1091 | 0 | break;\ |
1092 | 0 | }\ |
1093 | 0 | }\ |
1094 | 0 | }\ |
1095 | 0 | else if (k->type == STK_CALL_FRAME) level--;\ |
1096 | 0 | else if (k->type == STK_RETURN) level++;\ |
1097 | 0 | }\ |
1098 | 0 | } while(0) |
1099 | | |
1100 | 0 | #define STACK_RETURN(addr) do {\ |
1101 | 0 | int level = 0;\ |
1102 | 0 | OnigStackType* k = stk;\ |
1103 | 0 | while (1) {\ |
1104 | 0 | k--;\ |
1105 | 0 | STACK_BASE_CHECK(k, "STACK_RETURN"); \ |
1106 | 0 | if (k->type == STK_CALL_FRAME) {\ |
1107 | 0 | if (level == 0) {\ |
1108 | 0 | (addr) = k->u.call_frame.ret_addr;\ |
1109 | 0 | break;\ |
1110 | 0 | }\ |
1111 | 0 | else level--;\ |
1112 | 0 | }\ |
1113 | 0 | else if (k->type == STK_RETURN)\ |
1114 | 0 | level++;\ |
1115 | 0 | }\ |
1116 | 0 | } while(0) |
1117 | | |
1118 | | |
1119 | 0 | #define STRING_CMP(s1,s2,len) do {\ |
1120 | 0 | while (len-- > 0) {\ |
1121 | 0 | if (*s1++ != *s2++) goto fail;\ |
1122 | 0 | }\ |
1123 | 0 | } while(0) |
1124 | | |
1125 | 0 | #define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ |
1126 | 0 | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ |
1127 | 0 | goto fail; \ |
1128 | 0 | } while(0) |
1129 | | |
1130 | | static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, |
1131 | | UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end) |
1132 | 0 | { |
1133 | 0 | UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1134 | 0 | UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1135 | 0 | UChar *p1, *p2, *end1, *s2; |
1136 | 0 | int len1, len2; |
1137 | |
|
1138 | 0 | s2 = *ps2; |
1139 | 0 | end1 = s1 + mblen; |
1140 | 0 | while (s1 < end1) { |
1141 | 0 | len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); |
1142 | 0 | len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); |
1143 | 0 | if (len1 != len2) return 0; |
1144 | 0 | p1 = buf1; |
1145 | 0 | p2 = buf2; |
1146 | 0 | while (len1-- > 0) { |
1147 | 0 | if (*p1 != *p2) return 0; |
1148 | 0 | p1++; |
1149 | 0 | p2++; |
1150 | 0 | } |
1151 | 0 | } |
1152 | | |
1153 | 0 | *ps2 = s2; |
1154 | 0 | return 1; |
1155 | 0 | } |
1156 | | |
1157 | 0 | #define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ |
1158 | 0 | is_fail = 0;\ |
1159 | 0 | while (len-- > 0) {\ |
1160 | 0 | if (*s1++ != *s2++) {\ |
1161 | 0 | is_fail = 1; break;\ |
1162 | 0 | }\ |
1163 | 0 | }\ |
1164 | 0 | } while(0) |
1165 | | |
1166 | 0 | #define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ |
1167 | 0 | if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ |
1168 | 0 | is_fail = 1; \ |
1169 | 0 | else \ |
1170 | 0 | is_fail = 0; \ |
1171 | 0 | } while(0) |
1172 | | |
1173 | | |
1174 | | #define IS_EMPTY_STR (str == end) |
1175 | 39.4k | #define ON_STR_BEGIN(s) ((s) == str) |
1176 | 1.96k | #define ON_STR_END(s) ((s) == end) |
1177 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
1178 | 816k | # define DATA_ENSURE_CHECK1 (s < right_range) |
1179 | 0 | # define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) |
1180 | 1.12M | # define DATA_ENSURE(n) if (s + (n) > right_range) goto fail |
1181 | 0 | # define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue |
1182 | 0 | # define ABSENT_END_POS right_range |
1183 | | #else |
1184 | | # define DATA_ENSURE_CHECK1 (s < end) |
1185 | | # define DATA_ENSURE_CHECK(n) (s + (n) <= end) |
1186 | | # define DATA_ENSURE(n) if (s + (n) > end) goto fail |
1187 | | # define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue |
1188 | | # define ABSENT_END_POS end |
1189 | | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ |
1190 | | |
1191 | | |
1192 | | #ifdef USE_CAPTURE_HISTORY |
1193 | | static int |
1194 | | make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, |
1195 | | OnigStackType* stk_top, UChar* str, regex_t* reg) |
1196 | 0 | { |
1197 | 0 | int n, r; |
1198 | 0 | OnigCaptureTreeNode* child; |
1199 | 0 | OnigStackType* k = *kp; |
1200 | |
|
1201 | 0 | while (k < stk_top) { |
1202 | 0 | if (k->type == STK_MEM_START) { |
1203 | 0 | n = k->u.mem.num; |
1204 | 0 | if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && |
1205 | 0 | BIT_STATUS_AT(reg->capture_history, n) != 0) { |
1206 | 0 | child = history_node_new(); |
1207 | 0 | CHECK_NULL_RETURN_MEMERR(child); |
1208 | 0 | child->group = n; |
1209 | 0 | child->beg = k->u.mem.pstr - str; |
1210 | 0 | r = history_tree_add_child(node, child); |
1211 | 0 | if (r != 0) { |
1212 | 0 | history_tree_free(child); |
1213 | 0 | return r; |
1214 | 0 | } |
1215 | 0 | *kp = (k + 1); |
1216 | 0 | r = make_capture_history_tree(child, kp, stk_top, str, reg); |
1217 | 0 | if (r != 0) return r; |
1218 | | |
1219 | 0 | k = *kp; |
1220 | 0 | child->end = k->u.mem.pstr - str; |
1221 | 0 | } |
1222 | 0 | } |
1223 | 0 | else if (k->type == STK_MEM_END) { |
1224 | 0 | if (k->u.mem.num == node->group) { |
1225 | 0 | node->end = k->u.mem.pstr - str; |
1226 | 0 | *kp = k; |
1227 | 0 | return 0; |
1228 | 0 | } |
1229 | 0 | } |
1230 | 0 | k++; |
1231 | 0 | } |
1232 | | |
1233 | 0 | return 1; /* 1: root node ending. */ |
1234 | 0 | } |
1235 | | #endif /* USE_CAPTURE_HISTORY */ |
1236 | | |
1237 | | #ifdef USE_BACKREF_WITH_LEVEL |
1238 | | static int mem_is_in_memp(int mem, int num, UChar* memp) |
1239 | 0 | { |
1240 | 0 | int i; |
1241 | 0 | MemNumType m; |
1242 | |
|
1243 | 0 | for (i = 0; i < num; i++) { |
1244 | 0 | GET_MEMNUM_INC(m, memp); |
1245 | 0 | if (mem == (int )m) return 1; |
1246 | 0 | } |
1247 | 0 | return 0; |
1248 | 0 | } |
1249 | | |
1250 | | static int backref_match_at_nested_level(regex_t* reg, |
1251 | | OnigStackType* top, OnigStackType* stk_base, |
1252 | | int ignore_case, int case_fold_flag, |
1253 | | int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) |
1254 | 0 | { |
1255 | 0 | UChar *ss, *p, *pstart, *pend = NULL_UCHARP; |
1256 | 0 | int level; |
1257 | 0 | OnigStackType* k; |
1258 | |
|
1259 | 0 | level = 0; |
1260 | 0 | k = top; |
1261 | 0 | k--; |
1262 | 0 | while (k >= stk_base) { |
1263 | 0 | if (k->type == STK_CALL_FRAME) { |
1264 | 0 | level--; |
1265 | 0 | } |
1266 | 0 | else if (k->type == STK_RETURN) { |
1267 | 0 | level++; |
1268 | 0 | } |
1269 | 0 | else if (level == nest) { |
1270 | 0 | if (k->type == STK_MEM_START) { |
1271 | 0 | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { |
1272 | 0 | pstart = k->u.mem.pstr; |
1273 | 0 | if (pend != NULL_UCHARP) { |
1274 | 0 | if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ |
1275 | 0 | p = pstart; |
1276 | 0 | ss = *s; |
1277 | |
|
1278 | 0 | if (ignore_case != 0) { |
1279 | 0 | if (string_cmp_ic(reg->enc, case_fold_flag, |
1280 | 0 | pstart, &ss, pend - pstart, send) == 0) |
1281 | 0 | return 0; /* or goto next_mem; */ |
1282 | 0 | } |
1283 | 0 | else { |
1284 | 0 | while (p < pend) { |
1285 | 0 | if (*p++ != *ss++) return 0; /* or goto next_mem; */ |
1286 | 0 | } |
1287 | 0 | } |
1288 | | |
1289 | 0 | *s = ss; |
1290 | 0 | return 1; |
1291 | 0 | } |
1292 | 0 | } |
1293 | 0 | } |
1294 | 0 | else if (k->type == STK_MEM_END) { |
1295 | 0 | if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { |
1296 | 0 | pend = k->u.mem.pstr; |
1297 | 0 | } |
1298 | 0 | } |
1299 | 0 | } |
1300 | 0 | k--; |
1301 | 0 | } |
1302 | | |
1303 | 0 | return 0; |
1304 | 0 | } |
1305 | | #endif /* USE_BACKREF_WITH_LEVEL */ |
1306 | | |
1307 | | |
1308 | | #ifdef ONIG_DEBUG_STATISTICS |
1309 | | |
1310 | | # ifdef _WIN32 |
1311 | | # include <windows.h> |
1312 | | static LARGE_INTEGER ts, te, freq; |
1313 | | # define GETTIME(t) QueryPerformanceCounter(&(t)) |
1314 | | # define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \ |
1315 | | * 1000000 / freq.QuadPart) |
1316 | | # else /* _WIN32 */ |
1317 | | |
1318 | | # define USE_TIMEOFDAY |
1319 | | |
1320 | | # ifdef USE_TIMEOFDAY |
1321 | | # ifdef HAVE_SYS_TIME_H |
1322 | | # include <sys/time.h> |
1323 | | # endif |
1324 | | # ifdef HAVE_UNISTD_H |
1325 | | # include <unistd.h> |
1326 | | # endif |
1327 | | static struct timeval ts, te; |
1328 | | # define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) |
1329 | | # define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ |
1330 | | (((te).tv_sec - (ts).tv_sec)*1000000)) |
1331 | | # else /* USE_TIMEOFDAY */ |
1332 | | # ifdef HAVE_SYS_TIMES_H |
1333 | | # include <sys/times.h> |
1334 | | # endif |
1335 | | static struct tms ts, te; |
1336 | | # define GETTIME(t) times(&(t)) |
1337 | | # define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) |
1338 | | # endif /* USE_TIMEOFDAY */ |
1339 | | |
1340 | | # endif /* _WIN32 */ |
1341 | | |
1342 | | static int OpCounter[256]; |
1343 | | static int OpPrevCounter[256]; |
1344 | | static unsigned long OpTime[256]; |
1345 | | static int OpCurr = OP_FINISH; |
1346 | | static int OpPrevTarget = OP_FAIL; |
1347 | | static int MaxStackDepth = 0; |
1348 | | |
1349 | | # define MOP_IN(opcode) do {\ |
1350 | | if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ |
1351 | | OpCurr = opcode;\ |
1352 | | OpCounter[opcode]++;\ |
1353 | | GETTIME(ts);\ |
1354 | | } while(0) |
1355 | | |
1356 | | # define MOP_OUT do {\ |
1357 | | GETTIME(te);\ |
1358 | | OpTime[OpCurr] += TIMEDIFF(te, ts);\ |
1359 | | } while(0) |
1360 | | |
1361 | | extern void |
1362 | | onig_statistics_init(void) |
1363 | | { |
1364 | | int i; |
1365 | | for (i = 0; i < 256; i++) { |
1366 | | OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; |
1367 | | } |
1368 | | MaxStackDepth = 0; |
1369 | | # ifdef _WIN32 |
1370 | | QueryPerformanceFrequency(&freq); |
1371 | | # endif |
1372 | | } |
1373 | | |
1374 | | extern void |
1375 | | onig_print_statistics(FILE* f) |
1376 | | { |
1377 | | int i; |
1378 | | fprintf(f, " count prev time\n"); |
1379 | | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { |
1380 | | fprintf(f, "%8d: %8d: %10lu: %s\n", |
1381 | | OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); |
1382 | | } |
1383 | | fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); |
1384 | | } |
1385 | | |
1386 | | # define STACK_INC do {\ |
1387 | | stk++;\ |
1388 | | if (stk - stk_base > MaxStackDepth) \ |
1389 | | MaxStackDepth = stk - stk_base;\ |
1390 | | } while(0) |
1391 | | |
1392 | | #else /* ONIG_DEBUG_STATISTICS */ |
1393 | 1.15M | # define STACK_INC stk++ |
1394 | | |
1395 | | # define MOP_IN(opcode) |
1396 | | # define MOP_OUT |
1397 | | #endif /* ONIG_DEBUG_STATISTICS */ |
1398 | | |
1399 | | |
1400 | | #ifdef ONIG_DEBUG_MATCH |
1401 | | static char * |
1402 | | stack_type_str(int stack_type) |
1403 | | { |
1404 | | switch (stack_type) { |
1405 | | case STK_ALT: return "Alt "; |
1406 | | case STK_LOOK_BEHIND_NOT: return "LBNot "; |
1407 | | case STK_POS_NOT: return "PosNot"; |
1408 | | case STK_MEM_START: return "MemS "; |
1409 | | case STK_MEM_END: return "MemE "; |
1410 | | case STK_REPEAT_INC: return "RepInc"; |
1411 | | case STK_STATE_CHECK_MARK: return "StChMk"; |
1412 | | case STK_NULL_CHECK_START: return "NulChS"; |
1413 | | case STK_NULL_CHECK_END: return "NulChE"; |
1414 | | case STK_MEM_END_MARK: return "MemEMk"; |
1415 | | case STK_POS: return "Pos "; |
1416 | | case STK_STOP_BT: return "StopBt"; |
1417 | | case STK_REPEAT: return "Rep "; |
1418 | | case STK_CALL_FRAME: return "Call "; |
1419 | | case STK_RETURN: return "Ret "; |
1420 | | case STK_VOID: return "Void "; |
1421 | | case STK_ABSENT_POS: return "AbsPos"; |
1422 | | case STK_ABSENT: return "Absent"; |
1423 | | default: return " "; |
1424 | | } |
1425 | | } |
1426 | | #endif |
1427 | | |
1428 | | /* match data(str - end) from position (sstart). */ |
1429 | | /* if sstart == str then set sprev to NULL. */ |
1430 | | static OnigPosition |
1431 | | match_at(regex_t* reg, const UChar* str, const UChar* end, |
1432 | | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
1433 | | const UChar* right_range, |
1434 | | #endif |
1435 | | const UChar* sstart, UChar* sprev, OnigMatchArg* msa) |
1436 | 39.4k | { |
1437 | 39.4k | static const UChar FinishCode[] = { OP_FINISH }; |
1438 | | |
1439 | 39.4k | int i, num_mem, pop_level; |
1440 | 39.4k | ptrdiff_t n, best_len; |
1441 | 39.4k | LengthType tlen, tlen2; |
1442 | 39.4k | MemNumType mem; |
1443 | 39.4k | RelAddrType addr; |
1444 | 39.4k | OnigOptionType option = reg->options; |
1445 | 39.4k | OnigEncoding encode = reg->enc; |
1446 | 39.4k | OnigCaseFoldType case_fold_flag = reg->case_fold_flag; |
1447 | 39.4k | UChar *s, *q, *sbegin; |
1448 | 39.4k | UChar *p = reg->p; |
1449 | 39.4k | UChar *pkeep; |
1450 | 39.4k | char *alloca_base; |
1451 | 39.4k | char *xmalloc_base = NULL; |
1452 | 39.4k | OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; |
1453 | 39.4k | OnigStackType *stkp; /* used as any purpose. */ |
1454 | 39.4k | OnigStackIndex si; |
1455 | 39.4k | OnigStackIndex *repeat_stk; |
1456 | 39.4k | OnigStackIndex *mem_start_stk, *mem_end_stk; |
1457 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
1458 | | int scv; |
1459 | | unsigned char* state_check_buff = msa->state_check_buff; |
1460 | | int num_comb_exp_check = reg->num_comb_exp_check; |
1461 | | #endif |
1462 | | |
1463 | 39.4k | #if USE_TOKEN_THREADED_VM |
1464 | 39.4k | # define OP_OFFSET 1 |
1465 | 39.4k | # define VM_LOOP JUMP; |
1466 | 39.4k | # define VM_LOOP_END |
1467 | 1.30M | # define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK; |
1468 | 39.4k | # define DEFAULT L_DEFAULT: |
1469 | 299k | # define NEXT sprev = sbegin; JUMP |
1470 | 1.27M | # define JUMP RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++]) |
1471 | | |
1472 | 39.4k | RB_GNUC_EXTENSION static const void *oplabels[] = { |
1473 | 39.4k | &&L_OP_FINISH, /* matching process terminator (no more alternative) */ |
1474 | 39.4k | &&L_OP_END, /* pattern code terminator (success end) */ |
1475 | | |
1476 | 39.4k | &&L_OP_EXACT1, /* single byte, N = 1 */ |
1477 | 39.4k | &&L_OP_EXACT2, /* single byte, N = 2 */ |
1478 | 39.4k | &&L_OP_EXACT3, /* single byte, N = 3 */ |
1479 | 39.4k | &&L_OP_EXACT4, /* single byte, N = 4 */ |
1480 | 39.4k | &&L_OP_EXACT5, /* single byte, N = 5 */ |
1481 | 39.4k | &&L_OP_EXACTN, /* single byte */ |
1482 | 39.4k | &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ |
1483 | 39.4k | &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ |
1484 | 39.4k | &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ |
1485 | 39.4k | &&L_OP_EXACTMB2N, /* mb-length = 2 */ |
1486 | 39.4k | &&L_OP_EXACTMB3N, /* mb-length = 3 */ |
1487 | 39.4k | &&L_OP_EXACTMBN, /* other length */ |
1488 | | |
1489 | 39.4k | &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */ |
1490 | 39.4k | &&L_OP_EXACTN_IC, /* single byte, ignore case */ |
1491 | | |
1492 | 39.4k | &&L_OP_CCLASS, |
1493 | 39.4k | &&L_OP_CCLASS_MB, |
1494 | 39.4k | &&L_OP_CCLASS_MIX, |
1495 | 39.4k | &&L_OP_CCLASS_NOT, |
1496 | 39.4k | &&L_OP_CCLASS_MB_NOT, |
1497 | 39.4k | &&L_OP_CCLASS_MIX_NOT, |
1498 | | |
1499 | 39.4k | &&L_OP_ANYCHAR, /* "." */ |
1500 | 39.4k | &&L_OP_ANYCHAR_ML, /* "." multi-line */ |
1501 | 39.4k | &&L_OP_ANYCHAR_STAR, /* ".*" */ |
1502 | 39.4k | &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ |
1503 | 39.4k | &&L_OP_ANYCHAR_STAR_PEEK_NEXT, |
1504 | 39.4k | &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT, |
1505 | | |
1506 | 39.4k | &&L_OP_WORD, |
1507 | 39.4k | &&L_OP_NOT_WORD, |
1508 | 39.4k | &&L_OP_WORD_BOUND, |
1509 | 39.4k | &&L_OP_NOT_WORD_BOUND, |
1510 | 39.4k | # ifdef USE_WORD_BEGIN_END |
1511 | 39.4k | &&L_OP_WORD_BEGIN, |
1512 | 39.4k | &&L_OP_WORD_END, |
1513 | | # else |
1514 | | &&L_DEFAULT, |
1515 | | &&L_DEFAULT, |
1516 | | # endif |
1517 | 39.4k | &&L_OP_ASCII_WORD, |
1518 | 39.4k | &&L_OP_NOT_ASCII_WORD, |
1519 | 39.4k | &&L_OP_ASCII_WORD_BOUND, |
1520 | 39.4k | &&L_OP_NOT_ASCII_WORD_BOUND, |
1521 | 39.4k | # ifdef USE_WORD_BEGIN_END |
1522 | 39.4k | &&L_OP_ASCII_WORD_BEGIN, |
1523 | 39.4k | &&L_OP_ASCII_WORD_END, |
1524 | | # else |
1525 | | &&L_DEFAULT, |
1526 | | &&L_DEFAULT, |
1527 | | # endif |
1528 | | |
1529 | 39.4k | &&L_OP_BEGIN_BUF, |
1530 | 39.4k | &&L_OP_END_BUF, |
1531 | 39.4k | &&L_OP_BEGIN_LINE, |
1532 | 39.4k | &&L_OP_END_LINE, |
1533 | 39.4k | &&L_OP_SEMI_END_BUF, |
1534 | 39.4k | &&L_OP_BEGIN_POSITION, |
1535 | | |
1536 | 39.4k | &&L_OP_BACKREF1, |
1537 | 39.4k | &&L_OP_BACKREF2, |
1538 | 39.4k | &&L_OP_BACKREFN, |
1539 | 39.4k | &&L_OP_BACKREFN_IC, |
1540 | 39.4k | &&L_OP_BACKREF_MULTI, |
1541 | 39.4k | &&L_OP_BACKREF_MULTI_IC, |
1542 | 39.4k | # ifdef USE_BACKREF_WITH_LEVEL |
1543 | 39.4k | &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ |
1544 | | # else |
1545 | | &&L_DEFAULT, |
1546 | | # endif |
1547 | 39.4k | &&L_OP_MEMORY_START, |
1548 | 39.4k | &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */ |
1549 | 39.4k | &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */ |
1550 | 39.4k | # ifdef USE_SUBEXP_CALL |
1551 | 39.4k | &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ |
1552 | | # else |
1553 | | &&L_DEFAULT, |
1554 | | # endif |
1555 | 39.4k | &&L_OP_MEMORY_END, |
1556 | 39.4k | # ifdef USE_SUBEXP_CALL |
1557 | 39.4k | &&L_OP_MEMORY_END_REC, /* push marker to stack */ |
1558 | | # else |
1559 | | &&L_DEFAULT, |
1560 | | # endif |
1561 | | |
1562 | 39.4k | &&L_OP_KEEP, |
1563 | | |
1564 | 39.4k | &&L_OP_FAIL, /* pop stack and move */ |
1565 | 39.4k | &&L_OP_JUMP, |
1566 | 39.4k | &&L_OP_PUSH, |
1567 | 39.4k | &&L_OP_POP, |
1568 | | # ifdef USE_OP_PUSH_OR_JUMP_EXACT |
1569 | | &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ |
1570 | | # else |
1571 | 39.4k | &&L_DEFAULT, |
1572 | 39.4k | # endif |
1573 | 39.4k | &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ |
1574 | 39.4k | &&L_OP_REPEAT, /* {n,m} */ |
1575 | 39.4k | &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */ |
1576 | 39.4k | &&L_OP_REPEAT_INC, |
1577 | 39.4k | &&L_OP_REPEAT_INC_NG, /* non greedy */ |
1578 | 39.4k | &&L_OP_REPEAT_INC_SG, /* search and get in stack */ |
1579 | 39.4k | &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ |
1580 | 39.4k | &&L_OP_NULL_CHECK_START, /* null loop checker start */ |
1581 | 39.4k | &&L_OP_NULL_CHECK_END, /* null loop checker end */ |
1582 | 39.4k | # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
1583 | 39.4k | &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ |
1584 | | # else |
1585 | | &&L_DEFAULT, |
1586 | | # endif |
1587 | 39.4k | # ifdef USE_SUBEXP_CALL |
1588 | 39.4k | &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ |
1589 | | # else |
1590 | | &&L_DEFAULT, |
1591 | | # endif |
1592 | | |
1593 | 39.4k | &&L_OP_PUSH_POS, /* (?=...) start */ |
1594 | 39.4k | &&L_OP_POP_POS, /* (?=...) end */ |
1595 | 39.4k | &&L_OP_PUSH_POS_NOT, /* (?!...) start */ |
1596 | 39.4k | &&L_OP_FAIL_POS, /* (?!...) end */ |
1597 | 39.4k | &&L_OP_PUSH_STOP_BT, /* (?>...) start */ |
1598 | 39.4k | &&L_OP_POP_STOP_BT, /* (?>...) end */ |
1599 | 39.4k | &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ |
1600 | 39.4k | &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ |
1601 | 39.4k | &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ |
1602 | 39.4k | &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */ |
1603 | 39.4k | &&L_OP_ABSENT, /* (?~...) start of inner loop */ |
1604 | 39.4k | &&L_OP_ABSENT_END, /* (?~...) end */ |
1605 | | |
1606 | 39.4k | # ifdef USE_SUBEXP_CALL |
1607 | 39.4k | &&L_OP_CALL, /* \g<name> */ |
1608 | 39.4k | &&L_OP_RETURN, |
1609 | | # else |
1610 | | &&L_DEFAULT, |
1611 | | &&L_DEFAULT, |
1612 | | # endif |
1613 | 39.4k | &&L_OP_CONDITION, |
1614 | | |
1615 | | # ifdef USE_COMBINATION_EXPLOSION_CHECK |
1616 | | &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */ |
1617 | | &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ |
1618 | | &&L_OP_STATE_CHECK, /* check only */ |
1619 | | # else |
1620 | 39.4k | &&L_DEFAULT, |
1621 | 39.4k | &&L_DEFAULT, |
1622 | 39.4k | &&L_DEFAULT, |
1623 | 39.4k | # endif |
1624 | | # ifdef USE_COMBINATION_EXPLOSION_CHECK |
1625 | | &&L_OP_STATE_CHECK_ANYCHAR_STAR, |
1626 | | &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR, |
1627 | | # else |
1628 | 39.4k | &&L_DEFAULT, |
1629 | 39.4k | &&L_DEFAULT, |
1630 | 39.4k | # endif |
1631 | | /* no need: IS_DYNAMIC_OPTION() == 0 */ |
1632 | | # if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ |
1633 | | &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */ |
1634 | | &&L_OP_SET_OPTION /* set option */ |
1635 | | # else |
1636 | 39.4k | &&L_DEFAULT, |
1637 | 39.4k | &&L_DEFAULT |
1638 | 39.4k | # endif |
1639 | 39.4k | }; |
1640 | | #else /* USE_TOKEN_THREADED_VM */ |
1641 | | |
1642 | | # define OP_OFFSET 0 |
1643 | | # define VM_LOOP \ |
1644 | | while (1) { \ |
1645 | | OPCODE_EXEC_HOOK; \ |
1646 | | sbegin = s; \ |
1647 | | switch (*p++) { |
1648 | | # define VM_LOOP_END } sprev = sbegin; } |
1649 | | # define CASE(x) case x: |
1650 | | # define DEFAULT default: |
1651 | | # define NEXT break |
1652 | | # define JUMP continue; break |
1653 | | #endif /* USE_TOKEN_THREADED_VM */ |
1654 | | |
1655 | | |
1656 | 39.4k | #ifdef USE_SUBEXP_CALL |
1657 | | /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, |
1658 | | etc. Additional space is required. */ |
1659 | 78.8k | # define ADD_NUMMEM 1 |
1660 | | #else |
1661 | | /* Stack #0 not is used. */ |
1662 | | # define ADD_NUMMEM 0 |
1663 | | #endif |
1664 | | |
1665 | 39.4k | n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2; |
1666 | | |
1667 | 39.4k | STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE); |
1668 | 39.4k | pop_level = reg->stack_pop_level; |
1669 | 39.4k | num_mem = reg->num_mem; |
1670 | 39.4k | repeat_stk = (OnigStackIndex* )alloca_base; |
1671 | | |
1672 | 39.4k | mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); |
1673 | 39.4k | mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM); |
1674 | 39.4k | { |
1675 | 39.4k | OnigStackIndex *pp = mem_start_stk; |
1676 | 236k | for (; pp < repeat_stk + n; pp += 2) { |
1677 | 197k | pp[0] = INVALID_STACK_INDEX; |
1678 | 197k | pp[1] = INVALID_STACK_INDEX; |
1679 | 197k | } |
1680 | 39.4k | } |
1681 | | #ifndef USE_SUBEXP_CALL |
1682 | | mem_start_stk--; /* for index start from 1, |
1683 | | mem_start_stk[1]..mem_start_stk[num_mem] */ |
1684 | | mem_end_stk--; /* for index start from 1, |
1685 | | mem_end_stk[1]..mem_end_stk[num_mem] */ |
1686 | | #endif |
1687 | | |
1688 | | #ifdef ONIG_DEBUG_MATCH |
1689 | | fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n", |
1690 | | (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev); |
1691 | | fprintf(stderr, "size: %d, start offset: %d\n", |
1692 | | (int )(end - str), (int )(sstart - str)); |
1693 | | fprintf(stderr, "\n ofs> str stk:type addr:opcode\n"); |
1694 | | #endif |
1695 | | |
1696 | 39.4k | STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */ |
1697 | 39.4k | best_len = ONIG_MISMATCH; |
1698 | 39.4k | s = (UChar* )sstart; |
1699 | 39.4k | pkeep = (UChar* )sstart; |
1700 | | |
1701 | | |
1702 | | #ifdef ONIG_DEBUG_MATCH |
1703 | | # define OPCODE_EXEC_HOOK \ |
1704 | | if (s) { \ |
1705 | | UChar *op, *q, *bp, buf[50]; \ |
1706 | | int len; \ |
1707 | | op = p - OP_OFFSET; \ |
1708 | | fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \ |
1709 | | bp = buf; \ |
1710 | | q = s; \ |
1711 | | if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \ |
1712 | | for (i = 0; i < 7 && q < end; i++) { \ |
1713 | | len = enclen(encode, q, end); \ |
1714 | | while (len-- > 0) *bp++ = *q++; \ |
1715 | | } \ |
1716 | | if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \ |
1717 | | } \ |
1718 | | xmemcpy(bp, "\"", 1); bp += 1; \ |
1719 | | *bp = 0; \ |
1720 | | fputs((char* )buf, stderr); \ |
1721 | | for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \ |
1722 | | fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \ |
1723 | | stk - stk_base - 1, \ |
1724 | | (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \ |
1725 | | (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \ |
1726 | | onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \ |
1727 | | fprintf(stderr, "\n"); \ |
1728 | | } |
1729 | | #else |
1730 | 1.30M | # define OPCODE_EXEC_HOOK ((void) 0) |
1731 | 39.4k | #endif |
1732 | | |
1733 | | |
1734 | 39.4k | VM_LOOP { |
1735 | 39.4k | CASE(OP_END) MOP_IN(OP_END); |
1736 | 207 | n = s - sstart; |
1737 | 207 | if (n > best_len) { |
1738 | 207 | OnigRegion* region; |
1739 | 207 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
1740 | 207 | if (IS_FIND_LONGEST(option)) { |
1741 | 0 | if (n > msa->best_len) { |
1742 | 0 | msa->best_len = n; |
1743 | 0 | msa->best_s = (UChar* )sstart; |
1744 | 0 | } |
1745 | 0 | else |
1746 | 0 | goto end_best_len; |
1747 | 0 | } |
1748 | 207 | #endif |
1749 | 207 | best_len = n; |
1750 | 207 | region = msa->region; |
1751 | 207 | if (region) { |
1752 | 207 | region->beg[0] = ((pkeep > s) ? s : pkeep) - str; |
1753 | 207 | region->end[0] = s - str; |
1754 | 1.03k | for (i = 1; i <= num_mem; i++) { |
1755 | 828 | if (mem_end_stk[i] != INVALID_STACK_INDEX) { |
1756 | 828 | if (BIT_STATUS_AT(reg->bt_mem_start, i)) |
1757 | 0 | region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; |
1758 | 828 | else |
1759 | 828 | region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; |
1760 | | |
1761 | 828 | region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) |
1762 | 828 | ? STACK_AT(mem_end_stk[i])->u.mem.pstr |
1763 | 828 | : (UChar* )((void* )mem_end_stk[i])) - str; |
1764 | 828 | } |
1765 | 0 | else { |
1766 | 0 | region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; |
1767 | 0 | } |
1768 | 828 | } |
1769 | | |
1770 | 207 | #ifdef USE_CAPTURE_HISTORY |
1771 | 207 | if (reg->capture_history != 0) { |
1772 | 0 | int r; |
1773 | 0 | OnigCaptureTreeNode* node; |
1774 | |
|
1775 | 0 | if (IS_NULL(region->history_root)) { |
1776 | 0 | region->history_root = node = history_node_new(); |
1777 | 0 | CHECK_NULL_RETURN_MEMERR(node); |
1778 | 0 | } |
1779 | 0 | else { |
1780 | 0 | node = region->history_root; |
1781 | 0 | history_tree_clear(node); |
1782 | 0 | } |
1783 | | |
1784 | 0 | node->group = 0; |
1785 | 0 | node->beg = ((pkeep > s) ? s : pkeep) - str; |
1786 | 0 | node->end = s - str; |
1787 | |
|
1788 | 0 | stkp = stk_base; |
1789 | 0 | r = make_capture_history_tree(region->history_root, &stkp, |
1790 | 0 | stk, (UChar* )str, reg); |
1791 | 0 | if (r < 0) { |
1792 | 0 | best_len = r; /* error code */ |
1793 | 0 | goto finish; |
1794 | 0 | } |
1795 | 0 | } |
1796 | 207 | #endif /* USE_CAPTURE_HISTORY */ |
1797 | 207 | } /* if (region) */ |
1798 | 207 | } /* n > best_len */ |
1799 | | |
1800 | 207 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
1801 | 207 | end_best_len: |
1802 | 207 | #endif |
1803 | 207 | MOP_OUT; |
1804 | | |
1805 | 207 | if (IS_FIND_CONDITION(option)) { |
1806 | 0 | if (IS_FIND_NOT_EMPTY(option) && s == sstart) { |
1807 | 0 | best_len = ONIG_MISMATCH; |
1808 | 0 | goto fail; /* for retry */ |
1809 | 0 | } |
1810 | 0 | if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { |
1811 | 0 | goto fail; /* for retry */ |
1812 | 0 | } |
1813 | 0 | } |
1814 | | |
1815 | | /* default behavior: return first-matching result. */ |
1816 | 207 | goto finish; |
1817 | | |
1818 | 4.68k | CASE(OP_EXACT1) MOP_IN(OP_EXACT1); |
1819 | 4.68k | DATA_ENSURE(1); |
1820 | 3.50k | if (*p != *s) goto fail; |
1821 | 3.50k | p++; s++; |
1822 | 3.50k | MOP_OUT; |
1823 | 3.50k | NEXT; |
1824 | | |
1825 | 3.50k | CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC); |
1826 | 0 | { |
1827 | 0 | int len; |
1828 | 0 | UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1829 | |
|
1830 | 0 | DATA_ENSURE(1); |
1831 | 0 | len = ONIGENC_MBC_CASE_FOLD(encode, |
1832 | | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ |
1833 | 0 | case_fold_flag, |
1834 | 0 | &s, end, lowbuf); |
1835 | 0 | DATA_ENSURE(0); |
1836 | 0 | q = lowbuf; |
1837 | 0 | while (len-- > 0) { |
1838 | 0 | if (*p != *q) { |
1839 | 0 | goto fail; |
1840 | 0 | } |
1841 | 0 | p++; q++; |
1842 | 0 | } |
1843 | 0 | } |
1844 | 0 | MOP_OUT; |
1845 | 0 | NEXT; |
1846 | |
|
1847 | 0 | CASE(OP_EXACT2) MOP_IN(OP_EXACT2); |
1848 | 0 | DATA_ENSURE(2); |
1849 | 0 | if (*p != *s) goto fail; |
1850 | 0 | p++; s++; |
1851 | 0 | if (*p != *s) goto fail; |
1852 | 0 | sprev = s; |
1853 | 0 | p++; s++; |
1854 | 0 | MOP_OUT; |
1855 | 0 | JUMP; |
1856 | |
|
1857 | 0 | CASE(OP_EXACT3) MOP_IN(OP_EXACT3); |
1858 | 0 | DATA_ENSURE(3); |
1859 | 0 | if (*p != *s) goto fail; |
1860 | 0 | p++; s++; |
1861 | 0 | if (*p != *s) goto fail; |
1862 | 0 | p++; s++; |
1863 | 0 | if (*p != *s) goto fail; |
1864 | 0 | sprev = s; |
1865 | 0 | p++; s++; |
1866 | 0 | MOP_OUT; |
1867 | 0 | JUMP; |
1868 | |
|
1869 | 0 | CASE(OP_EXACT4) MOP_IN(OP_EXACT4); |
1870 | 0 | DATA_ENSURE(4); |
1871 | 0 | if (*p != *s) goto fail; |
1872 | 0 | p++; s++; |
1873 | 0 | if (*p != *s) goto fail; |
1874 | 0 | p++; s++; |
1875 | 0 | if (*p != *s) goto fail; |
1876 | 0 | p++; s++; |
1877 | 0 | if (*p != *s) goto fail; |
1878 | 0 | sprev = s; |
1879 | 0 | p++; s++; |
1880 | 0 | MOP_OUT; |
1881 | 0 | JUMP; |
1882 | |
|
1883 | 0 | CASE(OP_EXACT5) MOP_IN(OP_EXACT5); |
1884 | 0 | DATA_ENSURE(5); |
1885 | 0 | if (*p != *s) goto fail; |
1886 | 0 | p++; s++; |
1887 | 0 | if (*p != *s) goto fail; |
1888 | 0 | p++; s++; |
1889 | 0 | if (*p != *s) goto fail; |
1890 | 0 | p++; s++; |
1891 | 0 | if (*p != *s) goto fail; |
1892 | 0 | p++; s++; |
1893 | 0 | if (*p != *s) goto fail; |
1894 | 0 | sprev = s; |
1895 | 0 | p++; s++; |
1896 | 0 | MOP_OUT; |
1897 | 0 | JUMP; |
1898 | |
|
1899 | 0 | CASE(OP_EXACTN) MOP_IN(OP_EXACTN); |
1900 | 0 | GET_LENGTH_INC(tlen, p); |
1901 | 0 | DATA_ENSURE(tlen); |
1902 | 0 | while (tlen-- > 0) { |
1903 | 0 | if (*p++ != *s++) goto fail; |
1904 | 0 | } |
1905 | 0 | sprev = s - 1; |
1906 | 0 | MOP_OUT; |
1907 | 0 | JUMP; |
1908 | |
|
1909 | 0 | CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC); |
1910 | 0 | { |
1911 | 0 | int len; |
1912 | 0 | UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
1913 | |
|
1914 | 0 | GET_LENGTH_INC(tlen, p); |
1915 | 0 | endp = p + tlen; |
1916 | |
|
1917 | 0 | while (p < endp) { |
1918 | 0 | sprev = s; |
1919 | 0 | DATA_ENSURE(1); |
1920 | 0 | len = ONIGENC_MBC_CASE_FOLD(encode, |
1921 | | /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ |
1922 | 0 | case_fold_flag, |
1923 | 0 | &s, end, lowbuf); |
1924 | 0 | DATA_ENSURE(0); |
1925 | 0 | q = lowbuf; |
1926 | 0 | while (len-- > 0) { |
1927 | 0 | if (*p != *q) goto fail; |
1928 | 0 | p++; q++; |
1929 | 0 | } |
1930 | 0 | } |
1931 | 0 | } |
1932 | | |
1933 | 0 | MOP_OUT; |
1934 | 0 | JUMP; |
1935 | |
|
1936 | 0 | CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1); |
1937 | 0 | DATA_ENSURE(2); |
1938 | 0 | if (*p != *s) goto fail; |
1939 | 0 | p++; s++; |
1940 | 0 | if (*p != *s) goto fail; |
1941 | 0 | p++; s++; |
1942 | 0 | MOP_OUT; |
1943 | 0 | NEXT; |
1944 | |
|
1945 | 0 | CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2); |
1946 | 0 | DATA_ENSURE(4); |
1947 | 0 | if (*p != *s) goto fail; |
1948 | 0 | p++; s++; |
1949 | 0 | if (*p != *s) goto fail; |
1950 | 0 | p++; s++; |
1951 | 0 | sprev = s; |
1952 | 0 | if (*p != *s) goto fail; |
1953 | 0 | p++; s++; |
1954 | 0 | if (*p != *s) goto fail; |
1955 | 0 | p++; s++; |
1956 | 0 | MOP_OUT; |
1957 | 0 | JUMP; |
1958 | |
|
1959 | 0 | CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3); |
1960 | 0 | DATA_ENSURE(6); |
1961 | 0 | if (*p != *s) goto fail; |
1962 | 0 | p++; s++; |
1963 | 0 | if (*p != *s) goto fail; |
1964 | 0 | p++; s++; |
1965 | 0 | if (*p != *s) goto fail; |
1966 | 0 | p++; s++; |
1967 | 0 | if (*p != *s) goto fail; |
1968 | 0 | p++; s++; |
1969 | 0 | sprev = s; |
1970 | 0 | if (*p != *s) goto fail; |
1971 | 0 | p++; s++; |
1972 | 0 | if (*p != *s) goto fail; |
1973 | 0 | p++; s++; |
1974 | 0 | MOP_OUT; |
1975 | 0 | JUMP; |
1976 | |
|
1977 | 0 | CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N); |
1978 | 0 | GET_LENGTH_INC(tlen, p); |
1979 | 0 | DATA_ENSURE(tlen * 2); |
1980 | 0 | while (tlen-- > 0) { |
1981 | 0 | if (*p != *s) goto fail; |
1982 | 0 | p++; s++; |
1983 | 0 | if (*p != *s) goto fail; |
1984 | 0 | p++; s++; |
1985 | 0 | } |
1986 | 0 | sprev = s - 2; |
1987 | 0 | MOP_OUT; |
1988 | 0 | JUMP; |
1989 | |
|
1990 | 0 | CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N); |
1991 | 0 | GET_LENGTH_INC(tlen, p); |
1992 | 0 | DATA_ENSURE(tlen * 3); |
1993 | 0 | while (tlen-- > 0) { |
1994 | 0 | if (*p != *s) goto fail; |
1995 | 0 | p++; s++; |
1996 | 0 | if (*p != *s) goto fail; |
1997 | 0 | p++; s++; |
1998 | 0 | if (*p != *s) goto fail; |
1999 | 0 | p++; s++; |
2000 | 0 | } |
2001 | 0 | sprev = s - 3; |
2002 | 0 | MOP_OUT; |
2003 | 0 | JUMP; |
2004 | |
|
2005 | 0 | CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN); |
2006 | 0 | GET_LENGTH_INC(tlen, p); /* mb-len */ |
2007 | 0 | GET_LENGTH_INC(tlen2, p); /* string len */ |
2008 | 0 | tlen2 *= tlen; |
2009 | 0 | DATA_ENSURE(tlen2); |
2010 | 0 | while (tlen2-- > 0) { |
2011 | 0 | if (*p != *s) goto fail; |
2012 | 0 | p++; s++; |
2013 | 0 | } |
2014 | 0 | sprev = s - tlen; |
2015 | 0 | MOP_OUT; |
2016 | 0 | JUMP; |
2017 | |
|
2018 | 0 | CASE(OP_CCLASS) MOP_IN(OP_CCLASS); |
2019 | 0 | DATA_ENSURE(1); |
2020 | 0 | if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; |
2021 | 0 | p += SIZE_BITSET; |
2022 | 0 | s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ |
2023 | 0 | MOP_OUT; |
2024 | 0 | NEXT; |
2025 | |
|
2026 | 0 | CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB); |
2027 | 0 | if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; |
2028 | | |
2029 | 0 | cclass_mb: |
2030 | 0 | GET_LENGTH_INC(tlen, p); |
2031 | 0 | { |
2032 | 0 | OnigCodePoint code; |
2033 | 0 | UChar *ss; |
2034 | 0 | int mb_len; |
2035 | |
|
2036 | 0 | DATA_ENSURE(1); |
2037 | 0 | mb_len = enclen(encode, s, end); |
2038 | 0 | DATA_ENSURE(mb_len); |
2039 | 0 | ss = s; |
2040 | 0 | s += mb_len; |
2041 | 0 | code = ONIGENC_MBC_TO_CODE(encode, ss, s); |
2042 | |
|
2043 | 0 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS |
2044 | 0 | if (! onig_is_in_code_range(p, code)) goto fail; |
2045 | | #else |
2046 | | q = p; |
2047 | | ALIGNMENT_RIGHT(q); |
2048 | | if (! onig_is_in_code_range(q, code)) goto fail; |
2049 | | #endif |
2050 | 0 | } |
2051 | 0 | p += tlen; |
2052 | 0 | MOP_OUT; |
2053 | 0 | NEXT; |
2054 | |
|
2055 | 0 | CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX); |
2056 | 0 | DATA_ENSURE(1); |
2057 | 0 | if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
2058 | 0 | p += SIZE_BITSET; |
2059 | 0 | goto cclass_mb; |
2060 | 0 | } |
2061 | 0 | else { |
2062 | 0 | if (BITSET_AT(((BitSetRef )p), *s) == 0) |
2063 | 0 | goto fail; |
2064 | | |
2065 | 0 | p += SIZE_BITSET; |
2066 | 0 | GET_LENGTH_INC(tlen, p); |
2067 | 0 | p += tlen; |
2068 | 0 | s++; |
2069 | 0 | } |
2070 | 0 | MOP_OUT; |
2071 | 0 | NEXT; |
2072 | |
|
2073 | 300k | CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT); |
2074 | 300k | DATA_ENSURE(1); |
2075 | 299k | if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; |
2076 | 295k | p += SIZE_BITSET; |
2077 | 295k | s += enclen(encode, s, end); |
2078 | 295k | MOP_OUT; |
2079 | 295k | NEXT; |
2080 | | |
2081 | 295k | CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT); |
2082 | 0 | DATA_ENSURE(1); |
2083 | 0 | if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
2084 | 0 | s++; |
2085 | 0 | GET_LENGTH_INC(tlen, p); |
2086 | 0 | p += tlen; |
2087 | 0 | goto cc_mb_not_success; |
2088 | 0 | } |
2089 | | |
2090 | 0 | cclass_mb_not: |
2091 | 0 | GET_LENGTH_INC(tlen, p); |
2092 | 0 | { |
2093 | 0 | OnigCodePoint code; |
2094 | 0 | UChar *ss; |
2095 | 0 | int mb_len = enclen(encode, s, end); |
2096 | |
|
2097 | 0 | if (! DATA_ENSURE_CHECK(mb_len)) { |
2098 | 0 | DATA_ENSURE(1); |
2099 | 0 | s = (UChar* )end; |
2100 | 0 | p += tlen; |
2101 | 0 | goto cc_mb_not_success; |
2102 | 0 | } |
2103 | | |
2104 | 0 | ss = s; |
2105 | 0 | s += mb_len; |
2106 | 0 | code = ONIGENC_MBC_TO_CODE(encode, ss, s); |
2107 | |
|
2108 | 0 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS |
2109 | 0 | if (onig_is_in_code_range(p, code)) goto fail; |
2110 | | #else |
2111 | | q = p; |
2112 | | ALIGNMENT_RIGHT(q); |
2113 | | if (onig_is_in_code_range(q, code)) goto fail; |
2114 | | #endif |
2115 | 0 | } |
2116 | 0 | p += tlen; |
2117 | |
|
2118 | 0 | cc_mb_not_success: |
2119 | 0 | MOP_OUT; |
2120 | 0 | NEXT; |
2121 | |
|
2122 | 0 | CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT); |
2123 | 0 | DATA_ENSURE(1); |
2124 | 0 | if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { |
2125 | 0 | p += SIZE_BITSET; |
2126 | 0 | goto cclass_mb_not; |
2127 | 0 | } |
2128 | 0 | else { |
2129 | 0 | if (BITSET_AT(((BitSetRef )p), *s) != 0) |
2130 | 0 | goto fail; |
2131 | | |
2132 | 0 | p += SIZE_BITSET; |
2133 | 0 | GET_LENGTH_INC(tlen, p); |
2134 | 0 | p += tlen; |
2135 | 0 | s++; |
2136 | 0 | } |
2137 | 0 | MOP_OUT; |
2138 | 0 | NEXT; |
2139 | |
|
2140 | 743 | CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR); |
2141 | 743 | DATA_ENSURE(1); |
2142 | 538 | n = enclen(encode, s, end); |
2143 | 538 | DATA_ENSURE(n); |
2144 | 538 | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
2145 | 207 | s += n; |
2146 | 207 | MOP_OUT; |
2147 | 207 | NEXT; |
2148 | | |
2149 | 207 | CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML); |
2150 | 0 | DATA_ENSURE(1); |
2151 | 0 | n = enclen(encode, s, end); |
2152 | 0 | DATA_ENSURE(n); |
2153 | 0 | s += n; |
2154 | 0 | MOP_OUT; |
2155 | 0 | NEXT; |
2156 | |
|
2157 | 207 | CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR); |
2158 | 816k | while (DATA_ENSURE_CHECK1) { |
2159 | 816k | STACK_PUSH_ALT(p, s, sprev, pkeep); |
2160 | 816k | n = enclen(encode, s, end); |
2161 | 816k | DATA_ENSURE(n); |
2162 | 816k | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
2163 | 816k | sprev = s; |
2164 | 816k | s += n; |
2165 | 816k | } |
2166 | 183 | MOP_OUT; |
2167 | 183 | JUMP; |
2168 | | |
2169 | 183 | CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR); |
2170 | 0 | while (DATA_ENSURE_CHECK1) { |
2171 | 0 | STACK_PUSH_ALT(p, s, sprev, pkeep); |
2172 | 0 | n = enclen(encode, s, end); |
2173 | 0 | if (n > 1) { |
2174 | 0 | DATA_ENSURE(n); |
2175 | 0 | sprev = s; |
2176 | 0 | s += n; |
2177 | 0 | } |
2178 | 0 | else { |
2179 | 0 | sprev = s; |
2180 | 0 | s++; |
2181 | 0 | } |
2182 | 0 | } |
2183 | 0 | MOP_OUT; |
2184 | 0 | JUMP; |
2185 | |
|
2186 | 0 | CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); |
2187 | 0 | while (DATA_ENSURE_CHECK1) { |
2188 | 0 | if (*p == *s) { |
2189 | 0 | STACK_PUSH_ALT(p + 1, s, sprev, pkeep); |
2190 | 0 | } |
2191 | 0 | n = enclen(encode, s, end); |
2192 | 0 | DATA_ENSURE(n); |
2193 | 0 | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
2194 | 0 | sprev = s; |
2195 | 0 | s += n; |
2196 | 0 | } |
2197 | 0 | p++; |
2198 | 0 | MOP_OUT; |
2199 | 0 | NEXT; |
2200 | |
|
2201 | 0 | CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); |
2202 | 0 | while (DATA_ENSURE_CHECK1) { |
2203 | 0 | if (*p == *s) { |
2204 | 0 | STACK_PUSH_ALT(p + 1, s, sprev, pkeep); |
2205 | 0 | } |
2206 | 0 | n = enclen(encode, s, end); |
2207 | 0 | if (n > 1) { |
2208 | 0 | DATA_ENSURE(n); |
2209 | 0 | sprev = s; |
2210 | 0 | s += n; |
2211 | 0 | } |
2212 | 0 | else { |
2213 | 0 | sprev = s; |
2214 | 0 | s++; |
2215 | 0 | } |
2216 | 0 | } |
2217 | 0 | p++; |
2218 | 0 | MOP_OUT; |
2219 | 0 | NEXT; |
2220 | |
|
2221 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
2222 | | CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); |
2223 | | GET_STATE_CHECK_NUM_INC(mem, p); |
2224 | | while (DATA_ENSURE_CHECK1) { |
2225 | | STATE_CHECK_VAL(scv, mem); |
2226 | | if (scv) goto fail; |
2227 | | |
2228 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); |
2229 | | n = enclen(encode, s, end); |
2230 | | DATA_ENSURE(n); |
2231 | | if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail; |
2232 | | sprev = s; |
2233 | | s += n; |
2234 | | } |
2235 | | MOP_OUT; |
2236 | | NEXT; |
2237 | | |
2238 | | CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR) |
2239 | | MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); |
2240 | | |
2241 | | GET_STATE_CHECK_NUM_INC(mem, p); |
2242 | | while (DATA_ENSURE_CHECK1) { |
2243 | | STATE_CHECK_VAL(scv, mem); |
2244 | | if (scv) goto fail; |
2245 | | |
2246 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep); |
2247 | | n = enclen(encode, s, end); |
2248 | | if (n > 1) { |
2249 | | DATA_ENSURE(n); |
2250 | | sprev = s; |
2251 | | s += n; |
2252 | | } |
2253 | | else { |
2254 | | sprev = s; |
2255 | | s++; |
2256 | | } |
2257 | | } |
2258 | | MOP_OUT; |
2259 | | NEXT; |
2260 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
2261 | |
|
2262 | 0 | CASE(OP_WORD) MOP_IN(OP_WORD); |
2263 | 0 | DATA_ENSURE(1); |
2264 | 0 | if (! ONIGENC_IS_MBC_WORD(encode, s, end)) |
2265 | 0 | goto fail; |
2266 | | |
2267 | 0 | s += enclen(encode, s, end); |
2268 | 0 | MOP_OUT; |
2269 | 0 | NEXT; |
2270 | |
|
2271 | 0 | CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD); |
2272 | 0 | DATA_ENSURE(1); |
2273 | 0 | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
2274 | 0 | goto fail; |
2275 | | |
2276 | 0 | s += enclen(encode, s, end); |
2277 | 0 | MOP_OUT; |
2278 | 0 | NEXT; |
2279 | |
|
2280 | 0 | CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD); |
2281 | 0 | DATA_ENSURE(1); |
2282 | 0 | if (ONIGENC_IS_MBC_WORD(encode, s, end)) |
2283 | 0 | goto fail; |
2284 | | |
2285 | 0 | s += enclen(encode, s, end); |
2286 | 0 | MOP_OUT; |
2287 | 0 | NEXT; |
2288 | |
|
2289 | 0 | CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD); |
2290 | 0 | DATA_ENSURE(1); |
2291 | 0 | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
2292 | 0 | goto fail; |
2293 | | |
2294 | 0 | s += enclen(encode, s, end); |
2295 | 0 | MOP_OUT; |
2296 | 0 | NEXT; |
2297 | |
|
2298 | 0 | CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND); |
2299 | 0 | if (ON_STR_BEGIN(s)) { |
2300 | 0 | DATA_ENSURE(1); |
2301 | 0 | if (! ONIGENC_IS_MBC_WORD(encode, s, end)) |
2302 | 0 | goto fail; |
2303 | 0 | } |
2304 | 0 | else if (ON_STR_END(s)) { |
2305 | 0 | if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
2306 | 0 | goto fail; |
2307 | 0 | } |
2308 | 0 | else { |
2309 | 0 | if (ONIGENC_IS_MBC_WORD(encode, s, end) |
2310 | 0 | == ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
2311 | 0 | goto fail; |
2312 | 0 | } |
2313 | 0 | MOP_OUT; |
2314 | 0 | JUMP; |
2315 | |
|
2316 | 0 | CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND); |
2317 | 0 | if (ON_STR_BEGIN(s)) { |
2318 | 0 | DATA_ENSURE(1); |
2319 | 0 | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
2320 | 0 | goto fail; |
2321 | 0 | } |
2322 | 0 | else if (ON_STR_END(s)) { |
2323 | 0 | if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
2324 | 0 | goto fail; |
2325 | 0 | } |
2326 | 0 | else { |
2327 | 0 | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) |
2328 | 0 | == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
2329 | 0 | goto fail; |
2330 | 0 | } |
2331 | 0 | MOP_OUT; |
2332 | 0 | JUMP; |
2333 | |
|
2334 | 0 | CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND); |
2335 | 0 | if (ON_STR_BEGIN(s)) { |
2336 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) |
2337 | 0 | goto fail; |
2338 | 0 | } |
2339 | 0 | else if (ON_STR_END(s)) { |
2340 | 0 | if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
2341 | 0 | goto fail; |
2342 | 0 | } |
2343 | 0 | else { |
2344 | 0 | if (ONIGENC_IS_MBC_WORD(encode, s, end) |
2345 | 0 | != ONIGENC_IS_MBC_WORD(encode, sprev, end)) |
2346 | 0 | goto fail; |
2347 | 0 | } |
2348 | 0 | MOP_OUT; |
2349 | 0 | JUMP; |
2350 | |
|
2351 | 0 | CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND); |
2352 | 0 | if (ON_STR_BEGIN(s)) { |
2353 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) |
2354 | 0 | goto fail; |
2355 | 0 | } |
2356 | 0 | else if (ON_STR_END(s)) { |
2357 | 0 | if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
2358 | 0 | goto fail; |
2359 | 0 | } |
2360 | 0 | else { |
2361 | 0 | if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end) |
2362 | 0 | != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) |
2363 | 0 | goto fail; |
2364 | 0 | } |
2365 | 0 | MOP_OUT; |
2366 | 0 | JUMP; |
2367 | |
|
2368 | 0 | #ifdef USE_WORD_BEGIN_END |
2369 | 0 | CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN); |
2370 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { |
2371 | 0 | if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { |
2372 | 0 | MOP_OUT; |
2373 | 0 | JUMP; |
2374 | 0 | } |
2375 | 0 | } |
2376 | 0 | goto fail; |
2377 | | |
2378 | 0 | CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN); |
2379 | 0 | if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { |
2380 | 0 | if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { |
2381 | 0 | MOP_OUT; |
2382 | 0 | JUMP; |
2383 | 0 | } |
2384 | 0 | } |
2385 | 0 | goto fail; |
2386 | | |
2387 | 0 | CASE(OP_WORD_END) MOP_IN(OP_WORD_END); |
2388 | 0 | if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { |
2389 | 0 | if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { |
2390 | 0 | MOP_OUT; |
2391 | 0 | JUMP; |
2392 | 0 | } |
2393 | 0 | } |
2394 | 0 | goto fail; |
2395 | | |
2396 | 0 | CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END); |
2397 | 0 | if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) { |
2398 | 0 | if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) { |
2399 | 0 | MOP_OUT; |
2400 | 0 | JUMP; |
2401 | 0 | } |
2402 | 0 | } |
2403 | 0 | goto fail; |
2404 | 0 | #endif |
2405 | | |
2406 | 0 | CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF); |
2407 | 0 | if (! ON_STR_BEGIN(s)) goto fail; |
2408 | 0 | if (IS_NOTBOS(msa->options)) goto fail; |
2409 | | |
2410 | 0 | MOP_OUT; |
2411 | 0 | JUMP; |
2412 | |
|
2413 | 0 | CASE(OP_END_BUF) MOP_IN(OP_END_BUF); |
2414 | 0 | if (! ON_STR_END(s)) goto fail; |
2415 | 0 | if (IS_NOTEOS(msa->options)) goto fail; |
2416 | | |
2417 | 0 | MOP_OUT; |
2418 | 0 | JUMP; |
2419 | |
|
2420 | 39.4k | CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE); |
2421 | 39.4k | if (ON_STR_BEGIN(s)) { |
2422 | 268 | if (IS_NOTBOL(msa->options)) goto fail; |
2423 | 268 | MOP_OUT; |
2424 | 268 | JUMP; |
2425 | 268 | } |
2426 | 39.1k | else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) |
2427 | 39.1k | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
2428 | 39.1k | && !(IS_NEWLINE_CRLF(option) |
2429 | 1.75k | && ONIGENC_IS_MBC_CRNL(encode, sprev, end)) |
2430 | 39.1k | #endif |
2431 | 39.1k | && !ON_STR_END(s)) { |
2432 | 1.75k | MOP_OUT; |
2433 | 1.75k | JUMP; |
2434 | 1.75k | } |
2435 | 39.4k | goto fail; |
2436 | | |
2437 | 39.4k | CASE(OP_END_LINE) MOP_IN(OP_END_LINE); |
2438 | 207 | if (ON_STR_END(s)) { |
2439 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
2440 | | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { |
2441 | | #endif |
2442 | 183 | if (IS_NOTEOL(msa->options)) goto fail; |
2443 | 183 | MOP_OUT; |
2444 | 183 | JUMP; |
2445 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
2446 | | } |
2447 | | #endif |
2448 | 183 | } |
2449 | 24 | else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { |
2450 | 24 | MOP_OUT; |
2451 | 24 | JUMP; |
2452 | 24 | } |
2453 | 207 | goto fail; |
2454 | | |
2455 | 207 | CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF); |
2456 | 0 | if (ON_STR_END(s)) { |
2457 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
2458 | | if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) { |
2459 | | #endif |
2460 | 0 | if (IS_NOTEOL(msa->options)) goto fail; |
2461 | 0 | MOP_OUT; |
2462 | 0 | JUMP; |
2463 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
2464 | | } |
2465 | | #endif |
2466 | 0 | } |
2467 | 0 | else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) { |
2468 | 0 | UChar* ss = s + enclen(encode, s, end); |
2469 | 0 | if (ON_STR_END(ss)) { |
2470 | 0 | MOP_OUT; |
2471 | 0 | JUMP; |
2472 | 0 | } |
2473 | 0 | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
2474 | 0 | else if (IS_NEWLINE_CRLF(option) |
2475 | 0 | && ONIGENC_IS_MBC_CRNL(encode, s, end)) { |
2476 | 0 | ss += enclen(encode, ss, end); |
2477 | 0 | if (ON_STR_END(ss)) { |
2478 | 0 | MOP_OUT; |
2479 | 0 | JUMP; |
2480 | 0 | } |
2481 | 0 | } |
2482 | 0 | #endif |
2483 | 0 | } |
2484 | 0 | goto fail; |
2485 | | |
2486 | 0 | CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION); |
2487 | 0 | if (s != msa->gpos) |
2488 | 0 | goto fail; |
2489 | | |
2490 | 0 | MOP_OUT; |
2491 | 0 | JUMP; |
2492 | |
|
2493 | 0 | CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH); |
2494 | 0 | GET_MEMNUM_INC(mem, p); |
2495 | 0 | STACK_PUSH_MEM_START(mem, s); |
2496 | 0 | MOP_OUT; |
2497 | 0 | JUMP; |
2498 | |
|
2499 | 5.52k | CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START); |
2500 | 5.52k | GET_MEMNUM_INC(mem, p); |
2501 | 5.52k | mem_start_stk[mem] = (OnigStackIndex )((void* )s); |
2502 | 5.52k | mem_end_stk[mem] = INVALID_STACK_INDEX; |
2503 | 5.52k | MOP_OUT; |
2504 | 5.52k | JUMP; |
2505 | | |
2506 | 5.52k | CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH); |
2507 | 0 | GET_MEMNUM_INC(mem, p); |
2508 | 0 | STACK_PUSH_MEM_END(mem, s); |
2509 | 0 | MOP_OUT; |
2510 | 0 | JUMP; |
2511 | |
|
2512 | 4.89k | CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END); |
2513 | 4.89k | GET_MEMNUM_INC(mem, p); |
2514 | 4.89k | mem_end_stk[mem] = (OnigStackIndex )((void* )s); |
2515 | 4.89k | MOP_OUT; |
2516 | 4.89k | JUMP; |
2517 | | |
2518 | 4.89k | CASE(OP_KEEP) MOP_IN(OP_KEEP); |
2519 | 0 | pkeep = s; |
2520 | 0 | MOP_OUT; |
2521 | 0 | JUMP; |
2522 | |
|
2523 | 0 | #ifdef USE_SUBEXP_CALL |
2524 | 0 | CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC); |
2525 | 0 | GET_MEMNUM_INC(mem, p); |
2526 | 0 | STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ |
2527 | 0 | STACK_PUSH_MEM_END(mem, s); |
2528 | 0 | mem_start_stk[mem] = GET_STACK_INDEX(stkp); |
2529 | 0 | MOP_OUT; |
2530 | 0 | JUMP; |
2531 | |
|
2532 | 0 | CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC); |
2533 | 0 | GET_MEMNUM_INC(mem, p); |
2534 | 0 | mem_end_stk[mem] = (OnigStackIndex )((void* )s); |
2535 | 0 | STACK_GET_MEM_START(mem, stkp); |
2536 | |
|
2537 | 0 | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
2538 | 0 | mem_start_stk[mem] = GET_STACK_INDEX(stkp); |
2539 | 0 | else |
2540 | 0 | mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); |
2541 | |
|
2542 | 0 | STACK_PUSH_MEM_END_MARK(mem); |
2543 | 0 | MOP_OUT; |
2544 | 0 | JUMP; |
2545 | 0 | #endif |
2546 | |
|
2547 | 0 | CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1); |
2548 | 0 | mem = 1; |
2549 | 0 | goto backref; |
2550 | | |
2551 | 0 | CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2); |
2552 | 0 | mem = 2; |
2553 | 0 | goto backref; |
2554 | | |
2555 | 0 | CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN); |
2556 | 0 | GET_MEMNUM_INC(mem, p); |
2557 | 0 | backref: |
2558 | 0 | { |
2559 | 0 | int len; |
2560 | 0 | UChar *pstart, *pend; |
2561 | | |
2562 | | /* if you want to remove following line, |
2563 | | you should check in parse and compile time. */ |
2564 | 0 | if (mem > num_mem) goto fail; |
2565 | 0 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; |
2566 | 0 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; |
2567 | | |
2568 | 0 | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
2569 | 0 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
2570 | 0 | else |
2571 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
2572 | |
|
2573 | 0 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
2574 | 0 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
2575 | 0 | : (UChar* )((void* )mem_end_stk[mem])); |
2576 | 0 | n = pend - pstart; |
2577 | 0 | DATA_ENSURE(n); |
2578 | 0 | sprev = s; |
2579 | 0 | STRING_CMP(pstart, s, n); |
2580 | 0 | while (sprev + (len = enclen(encode, sprev, end)) < s) |
2581 | 0 | sprev += len; |
2582 | |
|
2583 | 0 | MOP_OUT; |
2584 | 0 | JUMP; |
2585 | 0 | } |
2586 | | |
2587 | 0 | CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC); |
2588 | 0 | GET_MEMNUM_INC(mem, p); |
2589 | 0 | { |
2590 | 0 | int len; |
2591 | 0 | UChar *pstart, *pend; |
2592 | | |
2593 | | /* if you want to remove following line, |
2594 | | you should check in parse and compile time. */ |
2595 | 0 | if (mem > num_mem) goto fail; |
2596 | 0 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; |
2597 | 0 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; |
2598 | | |
2599 | 0 | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
2600 | 0 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
2601 | 0 | else |
2602 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
2603 | |
|
2604 | 0 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
2605 | 0 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
2606 | 0 | : (UChar* )((void* )mem_end_stk[mem])); |
2607 | 0 | n = pend - pstart; |
2608 | 0 | DATA_ENSURE(n); |
2609 | 0 | sprev = s; |
2610 | 0 | STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end); |
2611 | 0 | while (sprev + (len = enclen(encode, sprev, end)) < s) |
2612 | 0 | sprev += len; |
2613 | |
|
2614 | 0 | MOP_OUT; |
2615 | 0 | JUMP; |
2616 | 0 | } |
2617 | 0 | NEXT; |
2618 | |
|
2619 | 0 | CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI); |
2620 | 0 | { |
2621 | 0 | int len, is_fail; |
2622 | 0 | UChar *pstart, *pend, *swork; |
2623 | |
|
2624 | 0 | GET_LENGTH_INC(tlen, p); |
2625 | 0 | for (i = 0; i < tlen; i++) { |
2626 | 0 | GET_MEMNUM_INC(mem, p); |
2627 | |
|
2628 | 0 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; |
2629 | 0 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; |
2630 | | |
2631 | 0 | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
2632 | 0 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
2633 | 0 | else |
2634 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
2635 | |
|
2636 | 0 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
2637 | 0 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
2638 | 0 | : (UChar* )((void* )mem_end_stk[mem])); |
2639 | 0 | n = pend - pstart; |
2640 | 0 | DATA_ENSURE_CONTINUE(n); |
2641 | 0 | sprev = s; |
2642 | 0 | swork = s; |
2643 | 0 | STRING_CMP_VALUE(pstart, swork, n, is_fail); |
2644 | 0 | if (is_fail) continue; |
2645 | 0 | s = swork; |
2646 | 0 | while (sprev + (len = enclen(encode, sprev, end)) < s) |
2647 | 0 | sprev += len; |
2648 | |
|
2649 | 0 | p += (SIZE_MEMNUM * (tlen - i - 1)); |
2650 | 0 | break; /* success */ |
2651 | 0 | } |
2652 | 0 | if (i == tlen) goto fail; |
2653 | 0 | MOP_OUT; |
2654 | 0 | JUMP; |
2655 | 0 | } |
2656 | 0 | NEXT; |
2657 | |
|
2658 | 0 | CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC); |
2659 | 0 | { |
2660 | 0 | int len, is_fail; |
2661 | 0 | UChar *pstart, *pend, *swork; |
2662 | |
|
2663 | 0 | GET_LENGTH_INC(tlen, p); |
2664 | 0 | for (i = 0; i < tlen; i++) { |
2665 | 0 | GET_MEMNUM_INC(mem, p); |
2666 | |
|
2667 | 0 | if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; |
2668 | 0 | if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; |
2669 | | |
2670 | 0 | if (BIT_STATUS_AT(reg->bt_mem_start, mem)) |
2671 | 0 | pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; |
2672 | 0 | else |
2673 | 0 | pstart = (UChar* )((void* )mem_start_stk[mem]); |
2674 | |
|
2675 | 0 | pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) |
2676 | 0 | ? STACK_AT(mem_end_stk[mem])->u.mem.pstr |
2677 | 0 | : (UChar* )((void* )mem_end_stk[mem])); |
2678 | 0 | n = pend - pstart; |
2679 | 0 | DATA_ENSURE_CONTINUE(n); |
2680 | 0 | sprev = s; |
2681 | 0 | swork = s; |
2682 | 0 | STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); |
2683 | 0 | if (is_fail) continue; |
2684 | 0 | s = swork; |
2685 | 0 | while (sprev + (len = enclen(encode, sprev, end)) < s) |
2686 | 0 | sprev += len; |
2687 | |
|
2688 | 0 | p += (SIZE_MEMNUM * (tlen - i - 1)); |
2689 | 0 | break; /* success */ |
2690 | 0 | } |
2691 | 0 | if (i == tlen) goto fail; |
2692 | 0 | MOP_OUT; |
2693 | 0 | JUMP; |
2694 | 0 | } |
2695 | | |
2696 | 0 | #ifdef USE_BACKREF_WITH_LEVEL |
2697 | 0 | CASE(OP_BACKREF_WITH_LEVEL) |
2698 | 0 | { |
2699 | 0 | int len; |
2700 | 0 | OnigOptionType ic; |
2701 | 0 | LengthType level; |
2702 | |
|
2703 | 0 | GET_OPTION_INC(ic, p); |
2704 | 0 | GET_LENGTH_INC(level, p); |
2705 | 0 | GET_LENGTH_INC(tlen, p); |
2706 | |
|
2707 | 0 | sprev = s; |
2708 | 0 | if (backref_match_at_nested_level(reg, stk, stk_base, ic, |
2709 | 0 | case_fold_flag, (int )level, (int )tlen, p, &s, end)) { |
2710 | 0 | while (sprev + (len = enclen(encode, sprev, end)) < s) |
2711 | 0 | sprev += len; |
2712 | |
|
2713 | 0 | p += (SIZE_MEMNUM * tlen); |
2714 | 0 | } |
2715 | 0 | else |
2716 | 0 | goto fail; |
2717 | | |
2718 | 0 | MOP_OUT; |
2719 | 0 | JUMP; |
2720 | 0 | } |
2721 | | |
2722 | 0 | #endif |
2723 | | |
2724 | | #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ |
2725 | | CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH); |
2726 | | GET_OPTION_INC(option, p); |
2727 | | STACK_PUSH_ALT(p, s, sprev, pkeep); |
2728 | | p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; |
2729 | | MOP_OUT; |
2730 | | JUMP; |
2731 | | |
2732 | | CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION); |
2733 | | GET_OPTION_INC(option, p); |
2734 | | MOP_OUT; |
2735 | | JUMP; |
2736 | | #endif |
2737 | | |
2738 | 0 | CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START); |
2739 | 0 | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
2740 | 0 | STACK_PUSH_NULL_CHECK_START(mem, s); |
2741 | 0 | MOP_OUT; |
2742 | 0 | JUMP; |
2743 | |
|
2744 | 0 | CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END); |
2745 | 0 | { |
2746 | 0 | int isnull; |
2747 | |
|
2748 | 0 | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
2749 | 0 | STACK_NULL_CHECK(isnull, mem, s); |
2750 | 0 | if (isnull) { |
2751 | | #ifdef ONIG_DEBUG_MATCH |
2752 | | fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n", |
2753 | | (int )mem, (uintptr_t )s, s); |
2754 | | #endif |
2755 | 0 | null_check_found: |
2756 | | /* empty loop founded, skip next instruction */ |
2757 | 0 | switch (*p++) { |
2758 | 0 | case OP_JUMP: |
2759 | 0 | case OP_PUSH: |
2760 | 0 | p += SIZE_RELADDR; |
2761 | 0 | break; |
2762 | 0 | case OP_REPEAT_INC: |
2763 | 0 | case OP_REPEAT_INC_NG: |
2764 | 0 | case OP_REPEAT_INC_SG: |
2765 | 0 | case OP_REPEAT_INC_NG_SG: |
2766 | 0 | p += SIZE_MEMNUM; |
2767 | 0 | break; |
2768 | 0 | default: |
2769 | 0 | goto unexpected_bytecode_error; |
2770 | 0 | break; |
2771 | 0 | } |
2772 | 0 | } |
2773 | 0 | } |
2774 | 0 | MOP_OUT; |
2775 | 0 | JUMP; |
2776 | |
|
2777 | 0 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
2778 | 0 | CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); |
2779 | 0 | { |
2780 | 0 | int isnull; |
2781 | |
|
2782 | 0 | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
2783 | 0 | STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); |
2784 | 0 | if (isnull) { |
2785 | | # ifdef ONIG_DEBUG_MATCH |
2786 | | fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", |
2787 | | (int )mem, (uintptr_t )s, s); |
2788 | | # endif |
2789 | 0 | if (isnull == -1) goto fail; |
2790 | 0 | goto null_check_found; |
2791 | 0 | } |
2792 | 0 | } |
2793 | 0 | MOP_OUT; |
2794 | 0 | JUMP; |
2795 | 0 | #endif |
2796 | |
|
2797 | 0 | #ifdef USE_SUBEXP_CALL |
2798 | 0 | CASE(OP_NULL_CHECK_END_MEMST_PUSH) |
2799 | 0 | MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); |
2800 | 0 | { |
2801 | 0 | int isnull; |
2802 | |
|
2803 | 0 | GET_MEMNUM_INC(mem, p); /* mem: null check id */ |
2804 | 0 | # ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT |
2805 | 0 | STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); |
2806 | | # else |
2807 | | STACK_NULL_CHECK_REC(isnull, mem, s); |
2808 | | # endif |
2809 | 0 | if (isnull) { |
2810 | | # ifdef ONIG_DEBUG_MATCH |
2811 | | fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n", |
2812 | | (int )mem, (uintptr_t )s, s); |
2813 | | # endif |
2814 | 0 | if (isnull == -1) goto fail; |
2815 | 0 | goto null_check_found; |
2816 | 0 | } |
2817 | 0 | else { |
2818 | 0 | STACK_PUSH_NULL_CHECK_END(mem); |
2819 | 0 | } |
2820 | 0 | } |
2821 | 0 | MOP_OUT; |
2822 | 0 | JUMP; |
2823 | 0 | #endif |
2824 | |
|
2825 | 290k | CASE(OP_JUMP) MOP_IN(OP_JUMP); |
2826 | 290k | GET_RELADDR_INC(addr, p); |
2827 | 290k | p += addr; |
2828 | 290k | MOP_OUT; |
2829 | 290k | CHECK_INTERRUPT_IN_MATCH_AT; |
2830 | 290k | JUMP; |
2831 | | |
2832 | 295k | CASE(OP_PUSH) MOP_IN(OP_PUSH); |
2833 | 295k | GET_RELADDR_INC(addr, p); |
2834 | 295k | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
2835 | 295k | MOP_OUT; |
2836 | 295k | JUMP; |
2837 | | |
2838 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
2839 | | CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH); |
2840 | | GET_STATE_CHECK_NUM_INC(mem, p); |
2841 | | STATE_CHECK_VAL(scv, mem); |
2842 | | if (scv) goto fail; |
2843 | | |
2844 | | GET_RELADDR_INC(addr, p); |
2845 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); |
2846 | | MOP_OUT; |
2847 | | JUMP; |
2848 | | |
2849 | | CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); |
2850 | | GET_STATE_CHECK_NUM_INC(mem, p); |
2851 | | GET_RELADDR_INC(addr, p); |
2852 | | STATE_CHECK_VAL(scv, mem); |
2853 | | if (scv) { |
2854 | | p += addr; |
2855 | | } |
2856 | | else { |
2857 | | STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep); |
2858 | | } |
2859 | | MOP_OUT; |
2860 | | JUMP; |
2861 | | |
2862 | | CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK); |
2863 | | GET_STATE_CHECK_NUM_INC(mem, p); |
2864 | | STATE_CHECK_VAL(scv, mem); |
2865 | | if (scv) goto fail; |
2866 | | |
2867 | | STACK_PUSH_STATE_CHECK(s, mem); |
2868 | | MOP_OUT; |
2869 | | JUMP; |
2870 | | #endif /* USE_COMBINATION_EXPLOSION_CHECK */ |
2871 | | |
2872 | 295k | CASE(OP_POP) MOP_IN(OP_POP); |
2873 | 290k | STACK_POP_ONE; |
2874 | 290k | MOP_OUT; |
2875 | 290k | JUMP; |
2876 | | |
2877 | | #ifdef USE_OP_PUSH_OR_JUMP_EXACT |
2878 | | CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1); |
2879 | | GET_RELADDR_INC(addr, p); |
2880 | | if (*p == *s && DATA_ENSURE_CHECK1) { |
2881 | | p++; |
2882 | | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
2883 | | MOP_OUT; |
2884 | | JUMP; |
2885 | | } |
2886 | | p += (addr + 1); |
2887 | | MOP_OUT; |
2888 | | JUMP; |
2889 | | #endif |
2890 | | |
2891 | 290k | CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT); |
2892 | 0 | GET_RELADDR_INC(addr, p); |
2893 | 0 | if (*p == *s) { |
2894 | 0 | p++; |
2895 | 0 | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
2896 | 0 | MOP_OUT; |
2897 | 0 | JUMP; |
2898 | 0 | } |
2899 | 0 | p++; |
2900 | 0 | MOP_OUT; |
2901 | 0 | JUMP; |
2902 | |
|
2903 | 0 | CASE(OP_REPEAT) MOP_IN(OP_REPEAT); |
2904 | 0 | { |
2905 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2906 | 0 | GET_RELADDR_INC(addr, p); |
2907 | |
|
2908 | 0 | STACK_ENSURE(1); |
2909 | 0 | repeat_stk[mem] = GET_STACK_INDEX(stk); |
2910 | 0 | STACK_PUSH_REPEAT(mem, p); |
2911 | | |
2912 | 0 | if (reg->repeat_range[mem].lower == 0) { |
2913 | 0 | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); |
2914 | 0 | } |
2915 | 0 | } |
2916 | 0 | MOP_OUT; |
2917 | 0 | JUMP; |
2918 | |
|
2919 | 0 | CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG); |
2920 | 0 | { |
2921 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2922 | 0 | GET_RELADDR_INC(addr, p); |
2923 | |
|
2924 | 0 | STACK_ENSURE(1); |
2925 | 0 | repeat_stk[mem] = GET_STACK_INDEX(stk); |
2926 | 0 | STACK_PUSH_REPEAT(mem, p); |
2927 | | |
2928 | 0 | if (reg->repeat_range[mem].lower == 0) { |
2929 | 0 | STACK_PUSH_ALT(p, s, sprev, pkeep); |
2930 | 0 | p += addr; |
2931 | 0 | } |
2932 | 0 | } |
2933 | 0 | MOP_OUT; |
2934 | 0 | JUMP; |
2935 | |
|
2936 | 0 | CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC); |
2937 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2938 | 0 | si = repeat_stk[mem]; |
2939 | 0 | stkp = STACK_AT(si); |
2940 | |
|
2941 | 0 | repeat_inc: |
2942 | 0 | stkp->u.repeat.count++; |
2943 | 0 | if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { |
2944 | | /* end of repeat. Nothing to do. */ |
2945 | 0 | } |
2946 | 0 | else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { |
2947 | 0 | STACK_PUSH_ALT(p, s, sprev, pkeep); |
2948 | 0 | p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ |
2949 | 0 | } |
2950 | 0 | else { |
2951 | 0 | p = stkp->u.repeat.pcode; |
2952 | 0 | } |
2953 | 0 | STACK_PUSH_REPEAT_INC(si); |
2954 | 0 | MOP_OUT; |
2955 | 0 | CHECK_INTERRUPT_IN_MATCH_AT; |
2956 | 0 | JUMP; |
2957 | |
|
2958 | 0 | CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG); |
2959 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2960 | 0 | STACK_GET_REPEAT(mem, stkp); |
2961 | 0 | si = GET_STACK_INDEX(stkp); |
2962 | 0 | goto repeat_inc; |
2963 | | |
2964 | 0 | CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG); |
2965 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2966 | 0 | si = repeat_stk[mem]; |
2967 | 0 | stkp = STACK_AT(si); |
2968 | |
|
2969 | 0 | repeat_inc_ng: |
2970 | 0 | stkp->u.repeat.count++; |
2971 | 0 | if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { |
2972 | 0 | if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { |
2973 | 0 | UChar* pcode = stkp->u.repeat.pcode; |
2974 | |
|
2975 | 0 | STACK_PUSH_REPEAT_INC(si); |
2976 | 0 | STACK_PUSH_ALT(pcode, s, sprev, pkeep); |
2977 | 0 | } |
2978 | 0 | else { |
2979 | 0 | p = stkp->u.repeat.pcode; |
2980 | 0 | STACK_PUSH_REPEAT_INC(si); |
2981 | 0 | } |
2982 | 0 | } |
2983 | 0 | else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { |
2984 | 0 | STACK_PUSH_REPEAT_INC(si); |
2985 | 0 | } |
2986 | 0 | MOP_OUT; |
2987 | 0 | CHECK_INTERRUPT_IN_MATCH_AT; |
2988 | 0 | JUMP; |
2989 | |
|
2990 | 0 | CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG); |
2991 | 0 | GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ |
2992 | 0 | STACK_GET_REPEAT(mem, stkp); |
2993 | 0 | si = GET_STACK_INDEX(stkp); |
2994 | 0 | goto repeat_inc_ng; |
2995 | | |
2996 | 0 | CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS); |
2997 | 0 | STACK_PUSH_POS(s, sprev, pkeep); |
2998 | 0 | MOP_OUT; |
2999 | 0 | JUMP; |
3000 | |
|
3001 | 0 | CASE(OP_POP_POS) MOP_IN(OP_POP_POS); |
3002 | 0 | { |
3003 | 0 | STACK_POS_END(stkp); |
3004 | 0 | s = stkp->u.state.pstr; |
3005 | 0 | sprev = stkp->u.state.pstr_prev; |
3006 | 0 | } |
3007 | 0 | MOP_OUT; |
3008 | 0 | JUMP; |
3009 | |
|
3010 | 0 | CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT); |
3011 | 0 | GET_RELADDR_INC(addr, p); |
3012 | 0 | STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep); |
3013 | 0 | MOP_OUT; |
3014 | 0 | JUMP; |
3015 | |
|
3016 | 0 | CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS); |
3017 | 0 | STACK_POP_TIL_POS_NOT; |
3018 | 0 | goto fail; |
3019 | | |
3020 | 0 | CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT); |
3021 | 0 | STACK_PUSH_STOP_BT; |
3022 | 0 | MOP_OUT; |
3023 | 0 | JUMP; |
3024 | |
|
3025 | 0 | CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT); |
3026 | 0 | STACK_STOP_BT_END; |
3027 | 0 | MOP_OUT; |
3028 | 0 | JUMP; |
3029 | |
|
3030 | 0 | CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND); |
3031 | 0 | GET_LENGTH_INC(tlen, p); |
3032 | 0 | s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); |
3033 | 0 | if (IS_NULL(s)) goto fail; |
3034 | 0 | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); |
3035 | 0 | MOP_OUT; |
3036 | 0 | JUMP; |
3037 | |
|
3038 | 0 | CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); |
3039 | 0 | GET_RELADDR_INC(addr, p); |
3040 | 0 | GET_LENGTH_INC(tlen, p); |
3041 | 0 | q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); |
3042 | 0 | if (IS_NULL(q)) { |
3043 | | /* too short case -> success. ex. /(?<!XXX)a/.match("a") |
3044 | | If you want to change to fail, replace following line. */ |
3045 | 0 | p += addr; |
3046 | | /* goto fail; */ |
3047 | 0 | } |
3048 | 0 | else { |
3049 | 0 | STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep); |
3050 | 0 | s = q; |
3051 | 0 | sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); |
3052 | 0 | } |
3053 | 0 | MOP_OUT; |
3054 | 0 | JUMP; |
3055 | |
|
3056 | 0 | CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); |
3057 | 0 | STACK_POP_TIL_LOOK_BEHIND_NOT; |
3058 | 0 | goto fail; |
3059 | | |
3060 | 0 | CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS); |
3061 | | /* Save the absent-start-pos and the original end-pos. */ |
3062 | 0 | STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS); |
3063 | 0 | MOP_OUT; |
3064 | 0 | JUMP; |
3065 | |
|
3066 | 0 | CASE(OP_ABSENT) MOP_IN(OP_ABSENT); |
3067 | 0 | { |
3068 | 0 | const UChar* aend = ABSENT_END_POS; |
3069 | 0 | UChar* absent; |
3070 | 0 | UChar* selfp = p - 1; |
3071 | |
|
3072 | 0 | STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */ |
3073 | 0 | GET_RELADDR_INC(addr, p); |
3074 | | #ifdef ONIG_DEBUG_MATCH |
3075 | | fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend); |
3076 | | #endif |
3077 | 0 | if ((absent > aend) && (s > absent)) { |
3078 | | /* An empty match occurred in (?~...) at the start point. |
3079 | | * Never match. */ |
3080 | 0 | STACK_POP; |
3081 | 0 | goto fail; |
3082 | 0 | } |
3083 | 0 | else if ((s >= aend) && (s > absent)) { |
3084 | 0 | if (s > aend) { |
3085 | | /* Only one (or less) character matched in the last iteration. |
3086 | | * This is not a possible point. */ |
3087 | 0 | goto fail; |
3088 | 0 | } |
3089 | | /* All possible points were found. Try matching after (?~...). */ |
3090 | 0 | DATA_ENSURE(0); |
3091 | 0 | p += addr; |
3092 | 0 | } |
3093 | 0 | else { |
3094 | 0 | STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */ |
3095 | | /* For approximating enclen. Strict version of enclen does not work here. */ |
3096 | 0 | n = enclen_approximate(encode, s, end); |
3097 | 0 | STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */ |
3098 | 0 | STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */ |
3099 | 0 | STACK_PUSH_ABSENT; |
3100 | 0 | ABSENT_END_POS = aend; |
3101 | 0 | } |
3102 | 0 | } |
3103 | 0 | MOP_OUT; |
3104 | 0 | JUMP; |
3105 | |
|
3106 | 0 | CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END); |
3107 | | /* The pattern inside (?~...) was matched. |
3108 | | * Set the end-pos temporary and go to next iteration. */ |
3109 | 0 | if (sprev < ABSENT_END_POS) |
3110 | 0 | ABSENT_END_POS = sprev; |
3111 | | #ifdef ONIG_DEBUG_MATCH |
3112 | | fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS); |
3113 | | #endif |
3114 | 0 | STACK_POP_TIL_ABSENT; |
3115 | 0 | goto fail; |
3116 | | |
3117 | 0 | #ifdef USE_SUBEXP_CALL |
3118 | 0 | CASE(OP_CALL) MOP_IN(OP_CALL); |
3119 | 0 | GET_ABSADDR_INC(addr, p); |
3120 | 0 | STACK_PUSH_CALL_FRAME(p); |
3121 | 0 | p = reg->p + addr; |
3122 | 0 | MOP_OUT; |
3123 | 0 | JUMP; |
3124 | |
|
3125 | 0 | CASE(OP_RETURN) MOP_IN(OP_RETURN); |
3126 | 0 | STACK_RETURN(p); |
3127 | 0 | STACK_PUSH_RETURN; |
3128 | 0 | MOP_OUT; |
3129 | 0 | JUMP; |
3130 | 0 | #endif |
3131 | |
|
3132 | 0 | CASE(OP_CONDITION) MOP_IN(OP_CONDITION); |
3133 | 0 | GET_MEMNUM_INC(mem, p); |
3134 | 0 | GET_RELADDR_INC(addr, p); |
3135 | 0 | if ((mem > num_mem) || |
3136 | 0 | (mem_end_stk[mem] == INVALID_STACK_INDEX) || |
3137 | 0 | (mem_start_stk[mem] == INVALID_STACK_INDEX)) { |
3138 | 0 | p += addr; |
3139 | 0 | } |
3140 | 0 | MOP_OUT; |
3141 | 0 | JUMP; |
3142 | |
|
3143 | 39.2k | CASE(OP_FINISH) |
3144 | 39.2k | goto finish; |
3145 | | |
3146 | 39.2k | CASE(OP_FAIL) |
3147 | 0 | if (0) { |
3148 | | /* fall */ |
3149 | 43.9k | fail: |
3150 | 43.9k | MOP_OUT; |
3151 | 43.9k | } |
3152 | 43.9k | MOP_IN(OP_FAIL); |
3153 | 43.9k | STACK_POP; |
3154 | 43.9k | p = stk->u.state.pcode; |
3155 | 43.9k | s = stk->u.state.pstr; |
3156 | 43.9k | sprev = stk->u.state.pstr_prev; |
3157 | 43.9k | pkeep = stk->u.state.pkeep; |
3158 | | |
3159 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3160 | | if (stk->u.state.state_check != 0) { |
3161 | | stk->type = STK_STATE_CHECK_MARK; |
3162 | | stk++; |
3163 | | } |
3164 | | #endif |
3165 | | |
3166 | 43.9k | MOP_OUT; |
3167 | 43.9k | CHECK_INTERRUPT_IN_MATCH_AT; |
3168 | 43.9k | JUMP; |
3169 | | |
3170 | 43.9k | DEFAULT |
3171 | 0 | goto bytecode_error; |
3172 | 43.9k | } VM_LOOP_END |
3173 | | |
3174 | 39.4k | finish: |
3175 | 39.4k | STACK_SAVE; |
3176 | 39.4k | if (xmalloc_base) xfree(xmalloc_base); |
3177 | 39.4k | return best_len; |
3178 | | |
3179 | | #ifdef ONIG_DEBUG |
3180 | | stack_error: |
3181 | | STACK_SAVE; |
3182 | | if (xmalloc_base) xfree(xmalloc_base); |
3183 | | return ONIGERR_STACK_BUG; |
3184 | | #endif |
3185 | | |
3186 | 0 | bytecode_error: |
3187 | 0 | STACK_SAVE; |
3188 | 0 | if (xmalloc_base) xfree(xmalloc_base); |
3189 | 0 | return ONIGERR_UNDEFINED_BYTECODE; |
3190 | | |
3191 | 0 | unexpected_bytecode_error: |
3192 | 0 | STACK_SAVE; |
3193 | 0 | if (xmalloc_base) xfree(xmalloc_base); |
3194 | 0 | return ONIGERR_UNEXPECTED_BYTECODE; |
3195 | 43.9k | } |
3196 | | |
3197 | | |
3198 | | static UChar* |
3199 | | slow_search(OnigEncoding enc, UChar* target, UChar* target_end, |
3200 | | const UChar* text, const UChar* text_end, UChar* text_range) |
3201 | 275 | { |
3202 | 275 | UChar *t, *p, *s, *end; |
3203 | | |
3204 | 275 | end = (UChar* )text_end; |
3205 | 275 | end -= target_end - target - 1; |
3206 | 275 | if (end > text_range) |
3207 | 0 | end = text_range; |
3208 | | |
3209 | 275 | s = (UChar* )text; |
3210 | | |
3211 | 275 | if (enc->max_enc_len == enc->min_enc_len) { |
3212 | 0 | int n = enc->max_enc_len; |
3213 | |
|
3214 | 0 | while (s < end) { |
3215 | 0 | if (*s == *target) { |
3216 | 0 | p = s + 1; |
3217 | 0 | t = target + 1; |
3218 | 0 | if (target_end == t || memcmp(t, p, target_end - t) == 0) |
3219 | 0 | return s; |
3220 | 0 | } |
3221 | 0 | s += n; |
3222 | 0 | } |
3223 | 0 | return (UChar* )NULL; |
3224 | 0 | } |
3225 | 61.4k | while (s < end) { |
3226 | 61.4k | if (*s == *target) { |
3227 | 268 | p = s + 1; |
3228 | 268 | t = target + 1; |
3229 | 268 | if (target_end == t || memcmp(t, p, target_end - t) == 0) |
3230 | 268 | return s; |
3231 | 268 | } |
3232 | 61.2k | s += enclen(enc, s, text_end); |
3233 | 61.2k | } |
3234 | | |
3235 | 7 | return (UChar* )NULL; |
3236 | 275 | } |
3237 | | |
3238 | | static int |
3239 | | str_lower_case_match(OnigEncoding enc, int case_fold_flag, |
3240 | | const UChar* t, const UChar* tend, |
3241 | | const UChar* p, const UChar* end) |
3242 | 0 | { |
3243 | 0 | int lowlen; |
3244 | 0 | UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; |
3245 | |
|
3246 | 0 | while (t < tend) { |
3247 | 0 | lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); |
3248 | 0 | q = lowbuf; |
3249 | 0 | while (lowlen > 0) { |
3250 | 0 | if (*t++ != *q++) return 0; |
3251 | 0 | lowlen--; |
3252 | 0 | } |
3253 | 0 | } |
3254 | | |
3255 | 0 | return 1; |
3256 | 0 | } |
3257 | | |
3258 | | static UChar* |
3259 | | slow_search_ic(OnigEncoding enc, int case_fold_flag, |
3260 | | UChar* target, UChar* target_end, |
3261 | | const UChar* text, const UChar* text_end, UChar* text_range) |
3262 | 0 | { |
3263 | 0 | UChar *s, *end; |
3264 | |
|
3265 | 0 | end = (UChar* )text_end; |
3266 | 0 | end -= target_end - target - 1; |
3267 | 0 | if (end > text_range) |
3268 | 0 | end = text_range; |
3269 | |
|
3270 | 0 | s = (UChar* )text; |
3271 | |
|
3272 | 0 | while (s < end) { |
3273 | 0 | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
3274 | 0 | s, text_end)) |
3275 | 0 | return s; |
3276 | | |
3277 | 0 | s += enclen(enc, s, text_end); |
3278 | 0 | } |
3279 | | |
3280 | 0 | return (UChar* )NULL; |
3281 | 0 | } |
3282 | | |
3283 | | static UChar* |
3284 | | slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, |
3285 | | const UChar* text, const UChar* adjust_text, |
3286 | | const UChar* text_end, const UChar* text_start) |
3287 | 0 | { |
3288 | 0 | UChar *t, *p, *s; |
3289 | |
|
3290 | 0 | s = (UChar* )text_end; |
3291 | 0 | s -= (target_end - target); |
3292 | 0 | if (s > text_start) |
3293 | 0 | s = (UChar* )text_start; |
3294 | 0 | else |
3295 | 0 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); |
3296 | |
|
3297 | 0 | while (s >= text) { |
3298 | 0 | if (*s == *target) { |
3299 | 0 | p = s + 1; |
3300 | 0 | t = target + 1; |
3301 | 0 | while (t < target_end) { |
3302 | 0 | if (*t != *p++) |
3303 | 0 | break; |
3304 | 0 | t++; |
3305 | 0 | } |
3306 | 0 | if (t == target_end) |
3307 | 0 | return s; |
3308 | 0 | } |
3309 | 0 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
3310 | 0 | } |
3311 | | |
3312 | 0 | return (UChar* )NULL; |
3313 | 0 | } |
3314 | | |
3315 | | static UChar* |
3316 | | slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, |
3317 | | UChar* target, UChar* target_end, |
3318 | | const UChar* text, const UChar* adjust_text, |
3319 | | const UChar* text_end, const UChar* text_start) |
3320 | 0 | { |
3321 | 0 | UChar *s; |
3322 | |
|
3323 | 0 | s = (UChar* )text_end; |
3324 | 0 | s -= (target_end - target); |
3325 | 0 | if (s > text_start) |
3326 | 0 | s = (UChar* )text_start; |
3327 | 0 | else |
3328 | 0 | s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); |
3329 | |
|
3330 | 0 | while (s >= text) { |
3331 | 0 | if (str_lower_case_match(enc, case_fold_flag, |
3332 | 0 | target, target_end, s, text_end)) |
3333 | 0 | return s; |
3334 | | |
3335 | 0 | s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
3336 | 0 | } |
3337 | | |
3338 | 0 | return (UChar* )NULL; |
3339 | 0 | } |
3340 | | |
3341 | | /* Sunday's quick search applied to a multibyte string */ |
3342 | | static UChar* |
3343 | | bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, |
3344 | | const UChar* text, const UChar* text_end, |
3345 | | const UChar* text_range) |
3346 | 0 | { |
3347 | 0 | const UChar *s, *se, *t, *p, *end; |
3348 | 0 | const UChar *tail; |
3349 | 0 | ptrdiff_t skip, tlen1; |
3350 | 0 | OnigEncoding enc = reg->enc; |
3351 | |
|
3352 | | # ifdef ONIG_DEBUG_SEARCH |
3353 | | fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
3354 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
3355 | | # endif |
3356 | |
|
3357 | 0 | tail = target_end - 1; |
3358 | 0 | tlen1 = tail - target; |
3359 | 0 | end = text_range; |
3360 | 0 | if (end + tlen1 > text_end) |
3361 | 0 | end = text_end - tlen1; |
3362 | |
|
3363 | 0 | s = text; |
3364 | |
|
3365 | 0 | while (s < end) { |
3366 | 0 | p = se = s + tlen1; |
3367 | 0 | t = tail; |
3368 | 0 | while (*p == *t) { |
3369 | 0 | if (t == target) return (UChar* )s; |
3370 | 0 | p--; t--; |
3371 | 0 | } |
3372 | 0 | if (s + 1 >= end) break; |
3373 | 0 | skip = reg->map[se[1]]; |
3374 | 0 | t = s; |
3375 | 0 | do { |
3376 | 0 | s += enclen(enc, s, end); |
3377 | 0 | } while ((s - t) < skip && s < end); |
3378 | 0 | } |
3379 | | |
3380 | 0 | return (UChar* )NULL; |
3381 | 0 | } |
3382 | | |
3383 | | /* Sunday's quick search */ |
3384 | | static UChar* |
3385 | | bm_search(regex_t* reg, const UChar* target, const UChar* target_end, |
3386 | | const UChar* text, const UChar* text_end, const UChar* text_range) |
3387 | 0 | { |
3388 | 0 | const UChar *s, *t, *p, *end; |
3389 | 0 | const UChar *tail; |
3390 | 0 | ptrdiff_t tlen1; |
3391 | |
|
3392 | | # ifdef ONIG_DEBUG_SEARCH |
3393 | | fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
3394 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
3395 | | # endif |
3396 | |
|
3397 | 0 | tail = target_end - 1; |
3398 | 0 | tlen1 = tail - target; |
3399 | 0 | end = text_range + tlen1; |
3400 | 0 | if (end > text_end) |
3401 | 0 | end = text_end; |
3402 | |
|
3403 | 0 | s = text + tlen1; |
3404 | 0 | while (s < end) { |
3405 | 0 | p = s; |
3406 | 0 | t = tail; |
3407 | 0 | while (*p == *t) { |
3408 | 0 | if (t == target) return (UChar* )p; |
3409 | 0 | p--; t--; |
3410 | 0 | } |
3411 | 0 | if (s + 1 >= end) break; |
3412 | 0 | s += reg->map[s[1]]; |
3413 | 0 | } |
3414 | | |
3415 | 0 | return (UChar* )NULL; |
3416 | 0 | } |
3417 | | |
3418 | | /* Sunday's quick search applied to a multibyte string (ignore case) */ |
3419 | | static UChar* |
3420 | | bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end, |
3421 | | const UChar* text, const UChar* text_end, |
3422 | | const UChar* text_range) |
3423 | 0 | { |
3424 | 0 | const UChar *s, *se, *t, *end; |
3425 | 0 | const UChar *tail; |
3426 | 0 | ptrdiff_t skip, tlen1; |
3427 | 0 | OnigEncoding enc = reg->enc; |
3428 | 0 | int case_fold_flag = reg->case_fold_flag; |
3429 | |
|
3430 | | # ifdef ONIG_DEBUG_SEARCH |
3431 | | fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
3432 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
3433 | | # endif |
3434 | |
|
3435 | 0 | tail = target_end - 1; |
3436 | 0 | tlen1 = tail - target; |
3437 | 0 | end = text_range; |
3438 | 0 | if (end + tlen1 > text_end) |
3439 | 0 | end = text_end - tlen1; |
3440 | |
|
3441 | 0 | s = text; |
3442 | |
|
3443 | 0 | while (s < end) { |
3444 | 0 | se = s + tlen1; |
3445 | 0 | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
3446 | 0 | s, se + 1)) |
3447 | 0 | return (UChar* )s; |
3448 | 0 | if (s + 1 >= end) break; |
3449 | 0 | skip = reg->map[se[1]]; |
3450 | 0 | t = s; |
3451 | 0 | do { |
3452 | 0 | s += enclen(enc, s, end); |
3453 | 0 | } while ((s - t) < skip && s < end); |
3454 | 0 | } |
3455 | | |
3456 | 0 | return (UChar* )NULL; |
3457 | 0 | } |
3458 | | |
3459 | | /* Sunday's quick search (ignore case) */ |
3460 | | static UChar* |
3461 | | bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end, |
3462 | | const UChar* text, const UChar* text_end, const UChar* text_range) |
3463 | 0 | { |
3464 | 0 | const UChar *s, *p, *end; |
3465 | 0 | const UChar *tail; |
3466 | 0 | ptrdiff_t tlen1; |
3467 | 0 | OnigEncoding enc = reg->enc; |
3468 | 0 | int case_fold_flag = reg->case_fold_flag; |
3469 | |
|
3470 | | # ifdef ONIG_DEBUG_SEARCH |
3471 | | fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n", |
3472 | | (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range); |
3473 | | # endif |
3474 | |
|
3475 | 0 | tail = target_end - 1; |
3476 | 0 | tlen1 = tail - target; |
3477 | 0 | end = text_range + tlen1; |
3478 | 0 | if (end > text_end) |
3479 | 0 | end = text_end; |
3480 | |
|
3481 | 0 | s = text + tlen1; |
3482 | 0 | while (s < end) { |
3483 | 0 | p = s - tlen1; |
3484 | 0 | if (str_lower_case_match(enc, case_fold_flag, target, target_end, |
3485 | 0 | p, s + 1)) |
3486 | 0 | return (UChar* )p; |
3487 | 0 | if (s + 1 >= end) break; |
3488 | 0 | s += reg->map[s[1]]; |
3489 | 0 | } |
3490 | | |
3491 | 0 | return (UChar* )NULL; |
3492 | 0 | } |
3493 | | |
3494 | | static UChar* |
3495 | | map_search(OnigEncoding enc, UChar map[], |
3496 | | const UChar* text, const UChar* text_range, const UChar* text_end) |
3497 | 0 | { |
3498 | 0 | const UChar *s = text; |
3499 | |
|
3500 | 0 | while (s < text_range) { |
3501 | 0 | if (map[*s]) return (UChar* )s; |
3502 | | |
3503 | 0 | s += enclen(enc, s, text_end); |
3504 | 0 | } |
3505 | 0 | return (UChar* )NULL; |
3506 | 0 | } |
3507 | | |
3508 | | static UChar* |
3509 | | map_search_backward(OnigEncoding enc, UChar map[], |
3510 | | const UChar* text, const UChar* adjust_text, |
3511 | | const UChar* text_start, const UChar* text_end) |
3512 | 0 | { |
3513 | 0 | const UChar *s = text_start; |
3514 | |
|
3515 | 0 | while (s >= text) { |
3516 | 0 | if (map[*s]) return (UChar* )s; |
3517 | | |
3518 | 0 | s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); |
3519 | 0 | } |
3520 | 0 | return (UChar* )NULL; |
3521 | 0 | } |
3522 | | |
3523 | | extern OnigPosition |
3524 | | onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, |
3525 | | OnigOptionType option) |
3526 | 0 | { |
3527 | 0 | ptrdiff_t r; |
3528 | 0 | UChar *prev; |
3529 | 0 | OnigMatchArg msa; |
3530 | |
|
3531 | 0 | MATCH_ARG_INIT(msa, option, region, at, at); |
3532 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3533 | | { |
3534 | | ptrdiff_t offset = at - str; |
3535 | | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); |
3536 | | } |
3537 | | #endif |
3538 | |
|
3539 | 0 | if (region) { |
3540 | 0 | r = onig_region_resize_clear(region, reg->num_mem + 1); |
3541 | 0 | } |
3542 | 0 | else |
3543 | 0 | r = 0; |
3544 | |
|
3545 | 0 | if (r == 0) { |
3546 | 0 | prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); |
3547 | 0 | r = match_at(reg, str, end, |
3548 | 0 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
3549 | 0 | end, |
3550 | 0 | #endif |
3551 | 0 | at, prev, &msa); |
3552 | 0 | } |
3553 | |
|
3554 | 0 | MATCH_ARG_FREE(msa); |
3555 | 0 | return r; |
3556 | 0 | } |
3557 | | |
3558 | | static int |
3559 | | forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, |
3560 | | UChar* range, UChar** low, UChar** high, UChar** low_prev) |
3561 | 275 | { |
3562 | 275 | UChar *p, *pprev = (UChar* )NULL; |
3563 | | |
3564 | | #ifdef ONIG_DEBUG_SEARCH |
3565 | | fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n", |
3566 | | (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range); |
3567 | | #endif |
3568 | | |
3569 | 275 | p = s; |
3570 | 275 | if (reg->dmin > 0) { |
3571 | 275 | if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { |
3572 | 0 | p += reg->dmin; |
3573 | 0 | } |
3574 | 275 | else { |
3575 | 275 | UChar *q = p + reg->dmin; |
3576 | | |
3577 | 275 | if (q >= end) return 0; /* fail */ |
3578 | 550 | while (p < q) p += enclen(reg->enc, p, end); |
3579 | 275 | } |
3580 | 275 | } |
3581 | | |
3582 | 275 | retry: |
3583 | 275 | switch (reg->optimize) { |
3584 | 275 | case ONIG_OPTIMIZE_EXACT: |
3585 | 275 | p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); |
3586 | 275 | break; |
3587 | 0 | case ONIG_OPTIMIZE_EXACT_IC: |
3588 | 0 | p = slow_search_ic(reg->enc, reg->case_fold_flag, |
3589 | 0 | reg->exact, reg->exact_end, p, end, range); |
3590 | 0 | break; |
3591 | | |
3592 | 0 | case ONIG_OPTIMIZE_EXACT_BM: |
3593 | 0 | p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); |
3594 | 0 | break; |
3595 | | |
3596 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: |
3597 | 0 | p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); |
3598 | 0 | break; |
3599 | | |
3600 | 0 | case ONIG_OPTIMIZE_EXACT_BM_IC: |
3601 | 0 | p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range); |
3602 | 0 | break; |
3603 | | |
3604 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: |
3605 | 0 | p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range); |
3606 | 0 | break; |
3607 | | |
3608 | 0 | case ONIG_OPTIMIZE_MAP: |
3609 | 0 | p = map_search(reg->enc, reg->map, p, range, end); |
3610 | 0 | break; |
3611 | 275 | } |
3612 | | |
3613 | 275 | if (p && p < range) { |
3614 | 268 | if (p - reg->dmin < s) { |
3615 | 0 | retry_gate: |
3616 | 0 | pprev = p; |
3617 | 0 | p += enclen(reg->enc, p, end); |
3618 | 0 | goto retry; |
3619 | 0 | } |
3620 | | |
3621 | 268 | if (reg->sub_anchor) { |
3622 | 0 | UChar* prev; |
3623 | |
|
3624 | 0 | switch (reg->sub_anchor) { |
3625 | 0 | case ANCHOR_BEGIN_LINE: |
3626 | 0 | if (!ON_STR_BEGIN(p)) { |
3627 | 0 | prev = onigenc_get_prev_char_head(reg->enc, |
3628 | 0 | (pprev ? pprev : str), p, end); |
3629 | 0 | if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) |
3630 | 0 | goto retry_gate; |
3631 | 0 | } |
3632 | 0 | break; |
3633 | | |
3634 | 0 | case ANCHOR_END_LINE: |
3635 | 0 | if (ON_STR_END(p)) { |
3636 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3637 | | prev = (UChar* )onigenc_get_prev_char_head(reg->enc, |
3638 | | (pprev ? pprev : str), p); |
3639 | | if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) |
3640 | | goto retry_gate; |
3641 | | #endif |
3642 | 0 | } |
3643 | 0 | else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) |
3644 | 0 | goto retry_gate; |
3645 | 0 | break; |
3646 | 0 | } |
3647 | 0 | } |
3648 | | |
3649 | 268 | if (reg->dmax == 0) { |
3650 | 0 | *low = p; |
3651 | 0 | if (low_prev) { |
3652 | 0 | if (*low > s) |
3653 | 0 | *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); |
3654 | 0 | else |
3655 | 0 | *low_prev = onigenc_get_prev_char_head(reg->enc, |
3656 | 0 | (pprev ? pprev : str), p, end); |
3657 | 0 | } |
3658 | 0 | } |
3659 | 268 | else { |
3660 | 268 | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
3661 | 0 | if (p < str + reg->dmax) { |
3662 | 0 | *low = (UChar* )str; |
3663 | 0 | if (low_prev) |
3664 | 0 | *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end); |
3665 | 0 | } |
3666 | 0 | else { |
3667 | 0 | *low = p - reg->dmax; |
3668 | 0 | if (*low > s) { |
3669 | 0 | *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, |
3670 | 0 | *low, end, (const UChar** )low_prev); |
3671 | 0 | if (low_prev && IS_NULL(*low_prev)) |
3672 | 0 | *low_prev = onigenc_get_prev_char_head(reg->enc, |
3673 | 0 | (pprev ? pprev : s), *low, end); |
3674 | 0 | } |
3675 | 0 | else { |
3676 | 0 | if (low_prev) |
3677 | 0 | *low_prev = onigenc_get_prev_char_head(reg->enc, |
3678 | 0 | (pprev ? pprev : str), *low, end); |
3679 | 0 | } |
3680 | 0 | } |
3681 | 0 | } |
3682 | 268 | } |
3683 | | /* no needs to adjust *high, *high is used as range check only */ |
3684 | 268 | *high = p - reg->dmin; |
3685 | | |
3686 | | #ifdef ONIG_DEBUG_SEARCH |
3687 | | fprintf(stderr, |
3688 | | "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n", |
3689 | | *low - str, *high - str, reg->dmin, reg->dmax); |
3690 | | #endif |
3691 | 268 | return 1; /* success */ |
3692 | 268 | } |
3693 | | |
3694 | 7 | return 0; /* fail */ |
3695 | 275 | } |
3696 | | |
3697 | | #define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 |
3698 | | |
3699 | | static int |
3700 | | backward_search_range(regex_t* reg, const UChar* str, const UChar* end, |
3701 | | UChar* s, const UChar* range, UChar* adjrange, |
3702 | | UChar** low, UChar** high) |
3703 | 0 | { |
3704 | 0 | UChar *p; |
3705 | |
|
3706 | 0 | range += reg->dmin; |
3707 | 0 | p = s; |
3708 | |
|
3709 | 0 | retry: |
3710 | 0 | switch (reg->optimize) { |
3711 | 0 | case ONIG_OPTIMIZE_EXACT: |
3712 | 0 | exact_method: |
3713 | 0 | p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, |
3714 | 0 | range, adjrange, end, p); |
3715 | 0 | break; |
3716 | | |
3717 | 0 | case ONIG_OPTIMIZE_EXACT_IC: |
3718 | 0 | case ONIG_OPTIMIZE_EXACT_BM_IC: |
3719 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC: |
3720 | 0 | p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, |
3721 | 0 | reg->exact, reg->exact_end, |
3722 | 0 | range, adjrange, end, p); |
3723 | 0 | break; |
3724 | | |
3725 | 0 | case ONIG_OPTIMIZE_EXACT_BM: |
3726 | 0 | case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: |
3727 | 0 | goto exact_method; |
3728 | 0 | break; |
3729 | | |
3730 | 0 | case ONIG_OPTIMIZE_MAP: |
3731 | 0 | p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); |
3732 | 0 | break; |
3733 | 0 | } |
3734 | | |
3735 | 0 | if (p) { |
3736 | 0 | if (reg->sub_anchor) { |
3737 | 0 | UChar* prev; |
3738 | |
|
3739 | 0 | switch (reg->sub_anchor) { |
3740 | 0 | case ANCHOR_BEGIN_LINE: |
3741 | 0 | if (!ON_STR_BEGIN(p)) { |
3742 | 0 | prev = onigenc_get_prev_char_head(reg->enc, str, p, end); |
3743 | 0 | if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) { |
3744 | 0 | p = prev; |
3745 | 0 | goto retry; |
3746 | 0 | } |
3747 | 0 | } |
3748 | 0 | break; |
3749 | | |
3750 | 0 | case ANCHOR_END_LINE: |
3751 | 0 | if (ON_STR_END(p)) { |
3752 | | #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE |
3753 | | prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); |
3754 | | if (IS_NULL(prev)) goto fail; |
3755 | | if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) { |
3756 | | p = prev; |
3757 | | goto retry; |
3758 | | } |
3759 | | #endif |
3760 | 0 | } |
3761 | 0 | else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) { |
3762 | 0 | p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); |
3763 | 0 | if (IS_NULL(p)) goto fail; |
3764 | 0 | goto retry; |
3765 | 0 | } |
3766 | 0 | break; |
3767 | 0 | } |
3768 | 0 | } |
3769 | | |
3770 | | /* no needs to adjust *high, *high is used as range check only */ |
3771 | 0 | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
3772 | 0 | *low = p - reg->dmax; |
3773 | 0 | *high = p - reg->dmin; |
3774 | 0 | *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); |
3775 | 0 | } |
3776 | |
|
3777 | | #ifdef ONIG_DEBUG_SEARCH |
3778 | | fprintf(stderr, "backward_search_range: low: %d, high: %d\n", |
3779 | | (int )(*low - str), (int )(*high - str)); |
3780 | | #endif |
3781 | 0 | return 1; /* success */ |
3782 | 0 | } |
3783 | | |
3784 | 0 | fail: |
3785 | | #ifdef ONIG_DEBUG_SEARCH |
3786 | | fprintf(stderr, "backward_search_range: fail.\n"); |
3787 | | #endif |
3788 | 0 | return 0; /* fail */ |
3789 | 0 | } |
3790 | | |
3791 | | |
3792 | | extern OnigPosition |
3793 | | onig_search(regex_t* reg, const UChar* str, const UChar* end, |
3794 | | const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) |
3795 | 275 | { |
3796 | 275 | return onig_search_gpos(reg, str, end, start, start, range, region, option); |
3797 | 275 | } |
3798 | | |
3799 | | extern OnigPosition |
3800 | | onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end, |
3801 | | const UChar* global_pos, |
3802 | | const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) |
3803 | 275 | { |
3804 | 275 | ptrdiff_t r; |
3805 | 275 | UChar *s, *prev; |
3806 | 275 | OnigMatchArg msa; |
3807 | 275 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
3808 | 275 | const UChar *orig_start = start; |
3809 | 275 | const UChar *orig_range = range; |
3810 | 275 | #endif |
3811 | | |
3812 | | #ifdef ONIG_DEBUG_SEARCH |
3813 | | fprintf(stderr, |
3814 | | "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n", |
3815 | | (uintptr_t )str, str, end - str, start - str, range - str); |
3816 | | #endif |
3817 | | |
3818 | 275 | if (region) { |
3819 | 275 | r = onig_region_resize_clear(region, reg->num_mem + 1); |
3820 | 275 | if (r) goto finish_no_msa; |
3821 | 275 | } |
3822 | | |
3823 | 275 | if (start > end || start < str) goto mismatch_no_msa; |
3824 | | |
3825 | | |
3826 | 275 | #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE |
3827 | 275 | # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
3828 | 275 | # define MATCH_AND_RETURN_CHECK(upper_range) \ |
3829 | 39.4k | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ |
3830 | 39.4k | if (r != ONIG_MISMATCH) {\ |
3831 | 207 | if (r >= 0) {\ |
3832 | 207 | if (! IS_FIND_LONGEST(reg->options)) {\ |
3833 | 207 | goto match;\ |
3834 | 207 | }\ |
3835 | 207 | }\ |
3836 | 207 | else goto finish; /* error */ \ |
3837 | 207 | } |
3838 | | # else |
3839 | | # define MATCH_AND_RETURN_CHECK(upper_range) \ |
3840 | | r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ |
3841 | | if (r != ONIG_MISMATCH) {\ |
3842 | | if (r >= 0) {\ |
3843 | | goto match;\ |
3844 | | }\ |
3845 | | else goto finish; /* error */ \ |
3846 | | } |
3847 | | # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ |
3848 | | #else |
3849 | | # ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
3850 | | # define MATCH_AND_RETURN_CHECK(none) \ |
3851 | | r = match_at(reg, str, end, s, prev, &msa);\ |
3852 | | if (r != ONIG_MISMATCH) {\ |
3853 | | if (r >= 0) {\ |
3854 | | if (! IS_FIND_LONGEST(reg->options)) {\ |
3855 | | goto match;\ |
3856 | | }\ |
3857 | | }\ |
3858 | | else goto finish; /* error */ \ |
3859 | | } |
3860 | | # else |
3861 | | # define MATCH_AND_RETURN_CHECK(none) \ |
3862 | | r = match_at(reg, str, end, s, prev, &msa);\ |
3863 | | if (r != ONIG_MISMATCH) {\ |
3864 | | if (r >= 0) {\ |
3865 | | goto match;\ |
3866 | | }\ |
3867 | | else goto finish; /* error */ \ |
3868 | | } |
3869 | | # endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ |
3870 | | #endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ |
3871 | | |
3872 | | |
3873 | | /* anchor optimize: resume search range */ |
3874 | 275 | if (reg->anchor != 0 && str < end) { |
3875 | 0 | UChar *min_semi_end, *max_semi_end; |
3876 | |
|
3877 | 0 | if (reg->anchor & ANCHOR_BEGIN_POSITION) { |
3878 | | /* search start-position only */ |
3879 | 0 | begin_position: |
3880 | 0 | if (range > start) |
3881 | 0 | { |
3882 | 0 | if (global_pos > start) |
3883 | 0 | { |
3884 | 0 | if (global_pos < range) |
3885 | 0 | range = global_pos + 1; |
3886 | 0 | } |
3887 | 0 | else |
3888 | 0 | range = start + 1; |
3889 | 0 | } |
3890 | 0 | else |
3891 | 0 | range = start; |
3892 | 0 | } |
3893 | 0 | else if (reg->anchor & ANCHOR_BEGIN_BUF) { |
3894 | | /* search str-position only */ |
3895 | 0 | if (range > start) { |
3896 | 0 | if (start != str) goto mismatch_no_msa; |
3897 | 0 | range = str + 1; |
3898 | 0 | } |
3899 | 0 | else { |
3900 | 0 | if (range <= str) { |
3901 | 0 | start = str; |
3902 | 0 | range = str; |
3903 | 0 | } |
3904 | 0 | else |
3905 | 0 | goto mismatch_no_msa; |
3906 | 0 | } |
3907 | 0 | } |
3908 | 0 | else if (reg->anchor & ANCHOR_END_BUF) { |
3909 | 0 | min_semi_end = max_semi_end = (UChar* )end; |
3910 | |
|
3911 | 0 | end_buf: |
3912 | 0 | if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) |
3913 | 0 | goto mismatch_no_msa; |
3914 | | |
3915 | 0 | if (range > start) { |
3916 | 0 | if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { |
3917 | 0 | start = min_semi_end - reg->anchor_dmax; |
3918 | 0 | if (start < end) |
3919 | 0 | start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); |
3920 | 0 | } |
3921 | 0 | if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { |
3922 | 0 | range = max_semi_end - reg->anchor_dmin + 1; |
3923 | 0 | } |
3924 | |
|
3925 | 0 | if (start > range) goto mismatch_no_msa; |
3926 | | /* If start == range, match with empty at end. |
3927 | | Backward search is used. */ |
3928 | 0 | } |
3929 | 0 | else { |
3930 | 0 | if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { |
3931 | 0 | range = min_semi_end - reg->anchor_dmax; |
3932 | 0 | } |
3933 | 0 | if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { |
3934 | 0 | start = max_semi_end - reg->anchor_dmin; |
3935 | 0 | start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); |
3936 | 0 | } |
3937 | 0 | if (range > start) goto mismatch_no_msa; |
3938 | 0 | } |
3939 | 0 | } |
3940 | 0 | else if (reg->anchor & ANCHOR_SEMI_END_BUF) { |
3941 | 0 | UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); |
3942 | |
|
3943 | 0 | max_semi_end = (UChar* )end; |
3944 | 0 | if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { |
3945 | 0 | min_semi_end = pre_end; |
3946 | |
|
3947 | 0 | #ifdef USE_CRNL_AS_LINE_TERMINATOR |
3948 | 0 | pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); |
3949 | 0 | if (IS_NOT_NULL(pre_end) && |
3950 | 0 | IS_NEWLINE_CRLF(reg->options) && |
3951 | 0 | ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { |
3952 | 0 | min_semi_end = pre_end; |
3953 | 0 | } |
3954 | 0 | #endif |
3955 | 0 | if (min_semi_end > str && start <= min_semi_end) { |
3956 | 0 | goto end_buf; |
3957 | 0 | } |
3958 | 0 | } |
3959 | 0 | else { |
3960 | 0 | min_semi_end = (UChar* )end; |
3961 | 0 | goto end_buf; |
3962 | 0 | } |
3963 | 0 | } |
3964 | 0 | else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { |
3965 | 0 | goto begin_position; |
3966 | 0 | } |
3967 | 0 | } |
3968 | 275 | else if (str == end) { /* empty string */ |
3969 | 0 | static const UChar address_for_empty_string[] = ""; |
3970 | |
|
3971 | | #ifdef ONIG_DEBUG_SEARCH |
3972 | | fprintf(stderr, "onig_search: empty string.\n"); |
3973 | | #endif |
3974 | |
|
3975 | 0 | if (reg->threshold_len == 0) { |
3976 | 0 | start = end = str = address_for_empty_string; |
3977 | 0 | s = (UChar* )start; |
3978 | 0 | prev = (UChar* )NULL; |
3979 | |
|
3980 | 0 | MATCH_ARG_INIT(msa, option, region, start, start); |
3981 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3982 | | msa.state_check_buff = (void* )0; |
3983 | | msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ |
3984 | | #endif |
3985 | 0 | MATCH_AND_RETURN_CHECK(end); |
3986 | 0 | goto mismatch; |
3987 | 0 | } |
3988 | 0 | goto mismatch_no_msa; |
3989 | 0 | } |
3990 | | |
3991 | | #ifdef ONIG_DEBUG_SEARCH |
3992 | | fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", |
3993 | | (int )(end - str), (int )(start - str), (int )(range - str)); |
3994 | | #endif |
3995 | | |
3996 | 275 | MATCH_ARG_INIT(msa, option, region, start, global_pos); |
3997 | | #ifdef USE_COMBINATION_EXPLOSION_CHECK |
3998 | | { |
3999 | | ptrdiff_t offset = (MIN(start, range) - str); |
4000 | | STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); |
4001 | | } |
4002 | | #endif |
4003 | | |
4004 | 275 | s = (UChar* )start; |
4005 | 275 | if (range > start) { /* forward search */ |
4006 | 275 | if (s > str) |
4007 | 0 | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
4008 | 275 | else |
4009 | 275 | prev = (UChar* )NULL; |
4010 | | |
4011 | 275 | if (reg->optimize != ONIG_OPTIMIZE_NONE) { |
4012 | 275 | UChar *sch_range, *low, *high, *low_prev; |
4013 | | |
4014 | 275 | sch_range = (UChar* )range; |
4015 | 275 | if (reg->dmax != 0) { |
4016 | 275 | if (reg->dmax == ONIG_INFINITE_DISTANCE) |
4017 | 275 | sch_range = (UChar* )end; |
4018 | 0 | else { |
4019 | 0 | sch_range += reg->dmax; |
4020 | 0 | if (sch_range > end) sch_range = (UChar* )end; |
4021 | 0 | } |
4022 | 275 | } |
4023 | | |
4024 | 275 | if ((end - start) < reg->threshold_len) |
4025 | 0 | goto mismatch; |
4026 | | |
4027 | 275 | if (reg->dmax != ONIG_INFINITE_DISTANCE) { |
4028 | 0 | do { |
4029 | 0 | if (! forward_search_range(reg, str, end, s, sch_range, |
4030 | 0 | &low, &high, &low_prev)) goto mismatch; |
4031 | 0 | if (s < low) { |
4032 | 0 | s = low; |
4033 | 0 | prev = low_prev; |
4034 | 0 | } |
4035 | 0 | while (s <= high) { |
4036 | 0 | MATCH_AND_RETURN_CHECK(orig_range); |
4037 | 0 | prev = s; |
4038 | 0 | s += enclen(reg->enc, s, end); |
4039 | 0 | } |
4040 | 0 | } while (s < range); |
4041 | 0 | goto mismatch; |
4042 | 0 | } |
4043 | 275 | else { /* check only. */ |
4044 | 275 | if (! forward_search_range(reg, str, end, s, sch_range, |
4045 | 275 | &low, &high, (UChar** )NULL)) goto mismatch; |
4046 | | |
4047 | 268 | if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { |
4048 | 0 | do { |
4049 | 0 | MATCH_AND_RETURN_CHECK(orig_range); |
4050 | 0 | prev = s; |
4051 | 0 | s += enclen(reg->enc, s, end); |
4052 | |
|
4053 | 0 | if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { |
4054 | 0 | while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0) |
4055 | 0 | && s < range) { |
4056 | 0 | prev = s; |
4057 | 0 | s += enclen(reg->enc, s, end); |
4058 | 0 | } |
4059 | 0 | } |
4060 | 0 | } while (s < range); |
4061 | 0 | goto mismatch; |
4062 | 0 | } |
4063 | 268 | } |
4064 | 275 | } |
4065 | | |
4066 | 39.3k | do { |
4067 | 39.3k | MATCH_AND_RETURN_CHECK(orig_range); |
4068 | 39.1k | prev = s; |
4069 | 39.1k | s += enclen(reg->enc, s, end); |
4070 | 39.1k | } while (s < range); |
4071 | | |
4072 | 61 | if (s == range) { /* because empty match with /$/. */ |
4073 | 61 | MATCH_AND_RETURN_CHECK(orig_range); |
4074 | 61 | } |
4075 | 61 | } |
4076 | 0 | else { /* backward search */ |
4077 | 0 | if (reg->optimize != ONIG_OPTIMIZE_NONE) { |
4078 | 0 | UChar *low, *high, *adjrange, *sch_start; |
4079 | |
|
4080 | 0 | if (range < end) |
4081 | 0 | adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); |
4082 | 0 | else |
4083 | 0 | adjrange = (UChar* )end; |
4084 | |
|
4085 | 0 | if (reg->dmax != ONIG_INFINITE_DISTANCE && |
4086 | 0 | (end - range) >= reg->threshold_len) { |
4087 | 0 | do { |
4088 | 0 | sch_start = s + reg->dmax; |
4089 | 0 | if (sch_start > end) sch_start = (UChar* )end; |
4090 | 0 | if (backward_search_range(reg, str, end, sch_start, range, adjrange, |
4091 | 0 | &low, &high) <= 0) |
4092 | 0 | goto mismatch; |
4093 | | |
4094 | 0 | if (s > high) |
4095 | 0 | s = high; |
4096 | |
|
4097 | 0 | while (s >= low) { |
4098 | 0 | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
4099 | 0 | MATCH_AND_RETURN_CHECK(orig_start); |
4100 | 0 | s = prev; |
4101 | 0 | } |
4102 | 0 | } while (s >= range); |
4103 | 0 | goto mismatch; |
4104 | 0 | } |
4105 | 0 | else { /* check only. */ |
4106 | 0 | if ((end - range) < reg->threshold_len) goto mismatch; |
4107 | | |
4108 | 0 | sch_start = s; |
4109 | 0 | if (reg->dmax != 0) { |
4110 | 0 | if (reg->dmax == ONIG_INFINITE_DISTANCE) |
4111 | 0 | sch_start = (UChar* )end; |
4112 | 0 | else { |
4113 | 0 | sch_start += reg->dmax; |
4114 | 0 | if (sch_start > end) sch_start = (UChar* )end; |
4115 | 0 | else |
4116 | 0 | sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, |
4117 | 0 | start, sch_start, end); |
4118 | 0 | } |
4119 | 0 | } |
4120 | 0 | if (backward_search_range(reg, str, end, sch_start, range, adjrange, |
4121 | 0 | &low, &high) <= 0) goto mismatch; |
4122 | 0 | } |
4123 | 0 | } |
4124 | | |
4125 | 0 | do { |
4126 | 0 | prev = onigenc_get_prev_char_head(reg->enc, str, s, end); |
4127 | 0 | MATCH_AND_RETURN_CHECK(orig_start); |
4128 | 0 | s = prev; |
4129 | 0 | } while (s >= range); |
4130 | 0 | } |
4131 | | |
4132 | 68 | mismatch: |
4133 | 68 | #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
4134 | 68 | if (IS_FIND_LONGEST(reg->options)) { |
4135 | 0 | if (msa.best_len >= 0) { |
4136 | 0 | s = msa.best_s; |
4137 | 0 | goto match; |
4138 | 0 | } |
4139 | 0 | } |
4140 | 68 | #endif |
4141 | 68 | r = ONIG_MISMATCH; |
4142 | | |
4143 | 68 | finish: |
4144 | 68 | MATCH_ARG_FREE(msa); |
4145 | | |
4146 | | /* If result is mismatch and no FIND_NOT_EMPTY option, |
4147 | | then the region is not set in match_at(). */ |
4148 | 68 | if (IS_FIND_NOT_EMPTY(reg->options) && region) { |
4149 | 0 | onig_region_clear(region); |
4150 | 0 | } |
4151 | | |
4152 | | #ifdef ONIG_DEBUG |
4153 | | if (r != ONIG_MISMATCH) |
4154 | | fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); |
4155 | | #endif |
4156 | 68 | return r; |
4157 | | |
4158 | 0 | mismatch_no_msa: |
4159 | 0 | r = ONIG_MISMATCH; |
4160 | 0 | finish_no_msa: |
4161 | | #ifdef ONIG_DEBUG |
4162 | | if (r != ONIG_MISMATCH) |
4163 | | fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r); |
4164 | | #endif |
4165 | 0 | return r; |
4166 | | |
4167 | 207 | match: |
4168 | 207 | MATCH_ARG_FREE(msa); |
4169 | 207 | return s - str; |
4170 | 0 | } |
4171 | | |
4172 | | extern OnigPosition |
4173 | | onig_scan(regex_t* reg, const UChar* str, const UChar* end, |
4174 | | OnigRegion* region, OnigOptionType option, |
4175 | | int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), |
4176 | | void* callback_arg) |
4177 | 0 | { |
4178 | 0 | OnigPosition r; |
4179 | 0 | OnigPosition n; |
4180 | 0 | int rs; |
4181 | 0 | const UChar* start; |
4182 | |
|
4183 | 0 | n = 0; |
4184 | 0 | start = str; |
4185 | 0 | while (1) { |
4186 | 0 | r = onig_search(reg, str, end, start, end, region, option); |
4187 | 0 | if (r >= 0) { |
4188 | 0 | rs = scan_callback(n, r, region, callback_arg); |
4189 | 0 | n++; |
4190 | 0 | if (rs != 0) |
4191 | 0 | return rs; |
4192 | | |
4193 | 0 | if (region->end[0] == start - str) { |
4194 | 0 | if (start >= end) break; |
4195 | 0 | start += enclen(reg->enc, start, end); |
4196 | 0 | } |
4197 | 0 | else |
4198 | 0 | start = str + region->end[0]; |
4199 | | |
4200 | 0 | if (start > end) |
4201 | 0 | break; |
4202 | 0 | } |
4203 | 0 | else if (r == ONIG_MISMATCH) { |
4204 | 0 | break; |
4205 | 0 | } |
4206 | 0 | else { /* error */ |
4207 | 0 | return r; |
4208 | 0 | } |
4209 | 0 | } |
4210 | | |
4211 | 0 | return n; |
4212 | 0 | } |
4213 | | |
4214 | | extern OnigEncoding |
4215 | | onig_get_encoding(const regex_t* reg) |
4216 | 0 | { |
4217 | 0 | return reg->enc; |
4218 | 0 | } |
4219 | | |
4220 | | extern OnigOptionType |
4221 | | onig_get_options(const regex_t* reg) |
4222 | 0 | { |
4223 | 0 | return reg->options; |
4224 | 0 | } |
4225 | | |
4226 | | extern OnigCaseFoldType |
4227 | | onig_get_case_fold_flag(const regex_t* reg) |
4228 | 0 | { |
4229 | 0 | return reg->case_fold_flag; |
4230 | 0 | } |
4231 | | |
4232 | | extern const OnigSyntaxType* |
4233 | | onig_get_syntax(const regex_t* reg) |
4234 | 0 | { |
4235 | 0 | return reg->syntax; |
4236 | 0 | } |
4237 | | |
4238 | | extern int |
4239 | | onig_number_of_captures(const regex_t* reg) |
4240 | 0 | { |
4241 | 0 | return reg->num_mem; |
4242 | 0 | } |
4243 | | |
4244 | | extern int |
4245 | | onig_number_of_capture_histories(const regex_t* reg) |
4246 | 0 | { |
4247 | 0 | #ifdef USE_CAPTURE_HISTORY |
4248 | 0 | int i, n; |
4249 | |
|
4250 | 0 | n = 0; |
4251 | 0 | for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { |
4252 | 0 | if (BIT_STATUS_AT(reg->capture_history, i) != 0) |
4253 | 0 | n++; |
4254 | 0 | } |
4255 | 0 | return n; |
4256 | | #else |
4257 | | return 0; |
4258 | | #endif |
4259 | 0 | } |
4260 | | |
4261 | | extern void |
4262 | | onig_copy_encoding(OnigEncodingType *to, OnigEncoding from) |
4263 | 0 | { |
4264 | 0 | *to = *from; |
4265 | 0 | } |