/src/php-src/oniguruma/src/regint.h
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef REGINT_H |
2 | | #define REGINT_H |
3 | | /********************************************************************** |
4 | | regint.h - Oniguruma (regular expression library) |
5 | | **********************************************************************/ |
6 | | /*- |
7 | | * Copyright (c) 2002-2021 K.Kosako |
8 | | * All rights reserved. |
9 | | * |
10 | | * Redistribution and use in source and binary forms, with or without |
11 | | * modification, are permitted provided that the following conditions |
12 | | * are met: |
13 | | * 1. Redistributions of source code must retain the above copyright |
14 | | * notice, this list of conditions and the following disclaimer. |
15 | | * 2. Redistributions in binary form must reproduce the above copyright |
16 | | * notice, this list of conditions and the following disclaimer in the |
17 | | * documentation and/or other materials provided with the distribution. |
18 | | * |
19 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
20 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
23 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 | | * SUCH DAMAGE. |
30 | | */ |
31 | | |
32 | | /* for debug */ |
33 | | /* #define ONIG_DEBUG_PARSE */ |
34 | | /* #define ONIG_DEBUG_COMPILE */ |
35 | | /* #define ONIG_DEBUG_SEARCH */ |
36 | | /* #define ONIG_DEBUG_MATCH */ |
37 | | /* #define ONIG_DEBUG_MATCH_COUNTER */ |
38 | | /* #define ONIG_DEBUG_CALL */ |
39 | | /* #define ONIG_DONT_OPTIMIZE */ |
40 | | |
41 | | /* for byte-code statistical data. */ |
42 | | /* #define ONIG_DEBUG_STATISTICS */ |
43 | | |
44 | | #if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \ |
45 | | defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ |
46 | | defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_CALL) || \ |
47 | | defined(ONIG_DEBUG_STATISTICS) |
48 | | #ifndef ONIG_DEBUG |
49 | | #define ONIG_DEBUG |
50 | | #define DBGFP stderr |
51 | | #endif |
52 | | #endif |
53 | | |
54 | | #ifndef ONIG_DISABLE_DIRECT_THREADING |
55 | | #ifdef __GNUC__ |
56 | | #define USE_GOTO_LABELS_AS_VALUES |
57 | | #endif |
58 | | #endif |
59 | | |
60 | | /* config */ |
61 | | /* spec. config */ |
62 | | #define USE_REGSET |
63 | | #define USE_CALL |
64 | | #define USE_CALLOUT |
65 | | #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ |
66 | | #define USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ |
67 | | #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ |
68 | | #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR |
69 | | #define USE_RETRY_LIMIT |
70 | | #ifdef USE_GOTO_LABELS_AS_VALUES |
71 | | #define USE_THREADED_CODE |
72 | | #define USE_DIRECT_THREADED_CODE |
73 | | #endif |
74 | | |
75 | | /* internal config */ |
76 | | #define USE_CHECK_VALIDITY_OF_STRING_IN_TREE |
77 | | #define USE_OP_PUSH_OR_JUMP_EXACT |
78 | | #define USE_QUANT_PEEK_NEXT |
79 | | #define USE_ST_LIBRARY |
80 | | #define USE_TIMEOFDAY |
81 | | #define USE_STRICT_POINTER_ADDRESS |
82 | | #define USE_STRICT_POINTER_COMPARISON |
83 | | |
84 | | #define USE_WORD_BEGIN_END /* "\<", "\>" */ |
85 | | #define USE_CAPTURE_HISTORY |
86 | | #define USE_VARIABLE_META_CHARS |
87 | | #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE |
88 | | /* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */ |
89 | | |
90 | | /* enabled by configure --enable-posix-api=yes */ |
91 | | /* #define USE_POSIX_API */ |
92 | | |
93 | 0 | #define DEFAULT_PARSE_DEPTH_LIMIT 4096 |
94 | | #define INIT_MATCH_STACK_SIZE 160 |
95 | | #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ |
96 | | #define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 |
97 | | #define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */ |
98 | | #define DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH 0 /* unlimited */ |
99 | | #define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20 |
100 | | |
101 | | |
102 | | #include "regenc.h" |
103 | | |
104 | | #ifndef ONIG_NO_STANDARD_C_HEADERS |
105 | | |
106 | | #include <stddef.h> |
107 | | #include <stdarg.h> |
108 | | #include <limits.h> |
109 | | #include <stdlib.h> |
110 | | #include <string.h> |
111 | | #include <ctype.h> |
112 | | |
113 | | #ifdef HAVE_STDINT_H |
114 | | #include <stdint.h> |
115 | | #endif |
116 | | |
117 | | #if defined(HAVE_ALLOCA_H) && !defined(__GNUC__) |
118 | | #include <alloca.h> |
119 | | #endif |
120 | | |
121 | | #ifdef HAVE_SYS_TYPES_H |
122 | | #ifndef __BORLANDC__ |
123 | | #include <sys/types.h> |
124 | | #endif |
125 | | #endif |
126 | | |
127 | | #ifdef HAVE_INTTYPES_H |
128 | | #include <inttypes.h> |
129 | | #endif |
130 | | |
131 | | #if defined(_WIN32) || defined(__BORLANDC__) |
132 | | #include <malloc.h> |
133 | | #endif |
134 | | |
135 | | #if defined(ONIG_DEBUG) || defined(NEED_TO_INCLUDE_STDIO) |
136 | | # include <stdio.h> |
137 | | #endif |
138 | | |
139 | | #ifdef ONIG_DEBUG_STATISTICS |
140 | | #ifdef USE_TIMEOFDAY |
141 | | |
142 | | #ifdef HAVE_SYS_TIME_H |
143 | | #include <sys/time.h> |
144 | | #endif |
145 | | #ifdef HAVE_UNISTD_H |
146 | | #include <unistd.h> |
147 | | #endif |
148 | | |
149 | | #else /* USE_TIMEOFDAY */ |
150 | | |
151 | | #ifdef HAVE_SYS_TIMES_H |
152 | | #include <sys/times.h> |
153 | | #endif |
154 | | |
155 | | #endif /* USE_TIMEOFDAY */ |
156 | | #endif /* ONIG_DEBUG_STATISTICS */ |
157 | | |
158 | | /* I don't think these x....'s need to be included in |
159 | | ONIG_NO_STANDARD_C_HEADERS, but they are required by Issue #170 |
160 | | and do so since there is no problem. |
161 | | */ |
162 | | #ifndef xmemset |
163 | 357k | #define xmemset memset |
164 | | #endif |
165 | | |
166 | | #ifndef xmemcpy |
167 | 329k | #define xmemcpy memcpy |
168 | | #endif |
169 | | |
170 | | #ifndef xmemmove |
171 | 0 | #define xmemmove memmove |
172 | | #endif |
173 | | |
174 | | #endif /* ONIG_NO_STANDARD_C_HEADERS */ |
175 | | |
176 | | |
177 | | #ifdef MIN |
178 | | #undef MIN |
179 | | #endif |
180 | | #ifdef MAX |
181 | | #undef MAX |
182 | | #endif |
183 | | |
184 | 0 | #define MIN(a,b) (((a)>(b))?(b):(a)) |
185 | 0 | #define MAX(a,b) (((a)<(b))?(b):(a)) |
186 | | |
187 | 1.45M | #define IS_NULL(p) (((void*)(p)) == (void*)0) |
188 | 1.80M | #define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) |
189 | 441k | #define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL |
190 | 354k | #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY |
191 | 0 | #define NULL_UCHARP ((UChar* )0) |
192 | | |
193 | | #ifdef USE_STRICT_POINTER_COMPARISON |
194 | 0 | #define PTR_GE(p,q) ((p) != NULL && (p) >= (q)) |
195 | | #else |
196 | | #define PTR_GE(p,q) (p) >= (q) |
197 | | #endif |
198 | | |
199 | | #ifndef ONIG_INT_MAX |
200 | 0 | #define ONIG_INT_MAX INT_MAX |
201 | | #endif |
202 | | |
203 | 0 | #define CHAR_MAP_SIZE 256 |
204 | 1.43M | #define INFINITE_LEN ONIG_INFINITE_DISTANCE |
205 | 0 | #define STEP_BACK_MAX_CHAR_LEN 65535 /* INT_MAX is too big */ |
206 | 0 | #define LOOK_BEHIND_MAX_CHAR_LEN STEP_BACK_MAX_CHAR_LEN |
207 | | |
208 | | /* escape other system UChar definition */ |
209 | | #ifdef ONIG_ESCAPE_UCHAR_COLLISION |
210 | | #undef ONIG_ESCAPE_UCHAR_COLLISION |
211 | | #endif |
212 | | |
213 | 399k | #define xmalloc malloc |
214 | 28.0k | #define xrealloc realloc |
215 | 3.50k | #define xcalloc calloc |
216 | 206k | #define xfree free |
217 | | |
218 | 0 | #define st_init_table onig_st_init_table |
219 | 0 | #define st_init_table_with_size onig_st_init_table_with_size |
220 | | #define st_init_numtable onig_st_init_numtable |
221 | | #define st_init_numtable_with_size onig_st_init_numtable_with_size |
222 | | #define st_init_strtable onig_st_init_strtable |
223 | | #define st_init_strtable_with_size onig_st_init_strtable_with_size |
224 | | #define st_delete onig_st_delete |
225 | | #define st_delete_safe onig_st_delete_safe |
226 | | #define st_insert onig_st_insert |
227 | | #define st_lookup onig_st_lookup |
228 | 0 | #define st_foreach onig_st_foreach |
229 | | #define st_add_direct onig_st_add_direct |
230 | | #define st_free_table onig_st_free_table |
231 | | #define st_cleanup_safe onig_st_cleanup_safe |
232 | | #define st_copy onig_st_copy |
233 | | #define st_nothing_key_clone onig_st_nothing_key_clone |
234 | | #define st_nothing_key_free onig_st_nothing_key_free |
235 | | /* */ |
236 | | #define onig_st_is_member st_is_member |
237 | | |
238 | | |
239 | | #if defined(_WIN32) && !defined(__GNUC__) |
240 | | |
241 | | #ifndef xalloca |
242 | | #define xalloca _alloca |
243 | | #endif |
244 | | #ifndef xvsnprintf |
245 | | #define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args) |
246 | | #endif |
247 | | #ifndef xsnprintf |
248 | | #define xsnprintf sprintf_s |
249 | | #endif |
250 | | #ifndef xstrcat |
251 | | #define xstrcat(dest,src,size) strcat_s(dest,size,src) |
252 | | #endif |
253 | | |
254 | | #else |
255 | | |
256 | | #ifndef xalloca |
257 | 0 | #define xalloca alloca |
258 | | #endif |
259 | | #ifndef xvsnprintf |
260 | 0 | #define xvsnprintf vsnprintf |
261 | | #endif |
262 | | #ifndef xsnprintf |
263 | 0 | #define xsnprintf snprintf |
264 | | #endif |
265 | | #ifndef xstrcat |
266 | 0 | #define xstrcat(dest,src,size) strcat(dest,src) |
267 | | #endif |
268 | | |
269 | | #endif /* defined(_WIN32) && !defined(__GNUC__) */ |
270 | | |
271 | | |
272 | | #ifdef _WIN32 |
273 | | #ifdef _MSC_VER |
274 | | |
275 | | #if _MSC_VER < 1300 |
276 | | typedef int intptr_t; |
277 | | typedef unsigned int uintptr_t; |
278 | | #endif |
279 | | |
280 | | #if _MSC_VER < 1600 |
281 | | typedef __int32 int32_t; |
282 | | typedef unsigned __int32 uint32_t; |
283 | | typedef __int64 int64_t; |
284 | | typedef unsigned __int64 uint64_t; |
285 | | #endif |
286 | | |
287 | | #endif |
288 | | #endif /* _WIN32 */ |
289 | | |
290 | | #if SIZEOF_VOIDP == SIZEOF_LONG |
291 | | typedef unsigned long hash_data_type; |
292 | | #elif SIZEOF_VOIDP == SIZEOF_LONG_LONG |
293 | | typedef unsigned long long hash_data_type; |
294 | | #endif |
295 | | |
296 | | /* strend hash */ |
297 | | typedef void* hash_table_type; |
298 | | |
299 | | |
300 | | #ifdef USE_CALLOUT |
301 | | |
302 | | typedef struct { |
303 | | int flag; |
304 | | OnigCalloutOf of; |
305 | | int in; |
306 | | int name_id; |
307 | | const UChar* tag_start; |
308 | | const UChar* tag_end; |
309 | | OnigCalloutType type; |
310 | | OnigCalloutFunc start_func; |
311 | | OnigCalloutFunc end_func; |
312 | | union { |
313 | | struct { |
314 | | const UChar* start; |
315 | | const UChar* end; |
316 | | } content; |
317 | | struct { |
318 | | int num; |
319 | | int passed_num; |
320 | | OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM]; |
321 | | OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; |
322 | | } arg; |
323 | | } u; |
324 | | } CalloutListEntry; |
325 | | |
326 | | #endif |
327 | | |
328 | | /* stack pop level */ |
329 | | enum StackPopLevel { |
330 | | STACK_POP_LEVEL_FREE = 0, |
331 | | STACK_POP_LEVEL_MEM_START = 1, |
332 | | STACK_POP_LEVEL_ALL = 2 |
333 | | }; |
334 | | |
335 | | /* optimize flags */ |
336 | | enum OptimizeType { |
337 | | OPTIMIZE_NONE = 0, |
338 | | OPTIMIZE_STR, /* Slow Search */ |
339 | | OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */ |
340 | | OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */ |
341 | | OPTIMIZE_MAP /* char map */ |
342 | | }; |
343 | | |
344 | | /* bit status */ |
345 | | typedef unsigned int MemStatusType; |
346 | | |
347 | 0 | #define MEM_STATUS_BITS_NUM (sizeof(MemStatusType) * 8) |
348 | 10.5k | #define MEM_STATUS_CLEAR(stats) (stats) = 0 |
349 | | #define MEM_STATUS_ON_ALL(stats) (stats) = ~((MemStatusType )0) |
350 | | #define MEM_STATUS_AT(stats,n) \ |
351 | 0 | ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) |
352 | | #define MEM_STATUS_AT0(stats,n) \ |
353 | 0 | ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) |
354 | | |
355 | 3.50k | #define MEM_STATUS_IS_ALL_ON(stats) (((stats) & 1) != 0) |
356 | | |
357 | 0 | #define MEM_STATUS_ON(stats,n) do {\ |
358 | 0 | if ((n) < (int )MEM_STATUS_BITS_NUM) {\ |
359 | 0 | if ((n) != 0)\ |
360 | 0 | (stats) |= ((MemStatusType )1 << (n));\ |
361 | 0 | }\ |
362 | 0 | else\ |
363 | 0 | (stats) |= 1;\ |
364 | 0 | } while (0) |
365 | | |
366 | 0 | #define MEM_STATUS_ON_SIMPLE(stats,n) do {\ |
367 | 0 | if ((n) < (int )MEM_STATUS_BITS_NUM)\ |
368 | 0 | (stats) |= ((MemStatusType )1 << (n));\ |
369 | 0 | } while (0) |
370 | | |
371 | | #define MEM_STATUS_LIMIT_AT(stats,n) \ |
372 | 0 | ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : 0) |
373 | 0 | #define MEM_STATUS_LIMIT_ON(stats,n) do {\ |
374 | 0 | if ((n) < (int )MEM_STATUS_BITS_NUM && (n) != 0) {\ |
375 | 0 | (stats) |= ((MemStatusType )1 << (n));\ |
376 | 0 | }\ |
377 | 0 | } while (0) |
378 | | |
379 | | |
380 | | #define IS_CODE_WORD_ASCII(enc,code) \ |
381 | | (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) |
382 | | #define IS_CODE_DIGIT_ASCII(enc, code) \ |
383 | 0 | (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_DIGIT(enc,code)) |
384 | | #define IS_CODE_XDIGIT_ASCII(enc, code) \ |
385 | 0 | (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_XDIGIT(enc,code)) |
386 | | |
387 | 0 | #define DIGITVAL(code) ((code) - '0') |
388 | 0 | #define ODIGITVAL(code) DIGITVAL(code) |
389 | | #define XDIGITVAL(enc,code) \ |
390 | 0 | (IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \ |
391 | 0 | : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) |
392 | | |
393 | | #define OPTON_CALLBACK_EACH_MATCH(option) \ |
394 | 0 | ((option) & ONIG_OPTION_CALLBACK_EACH_MATCH) |
395 | 0 | #define OPTON_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) |
396 | 0 | #define OPTON_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) |
397 | | #define OPTON_NEGATE_SINGLELINE(option) ((option) & \ |
398 | | ONIG_OPTION_NEGATE_SINGLELINE) |
399 | 3.50k | #define OPTON_DONT_CAPTURE_GROUP(option) ((option) & \ |
400 | 3.50k | ONIG_OPTION_DONT_CAPTURE_GROUP) |
401 | 0 | #define OPTON_CAPTURE_GROUP(option) ((option) & ONIG_OPTION_CAPTURE_GROUP) |
402 | 0 | #define OPTON_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) |
403 | 0 | #define OPTON_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) |
404 | 0 | #define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) |
405 | 0 | #define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \ |
406 | 0 | ONIG_OPTION_CHECK_VALIDITY_OF_STRING) |
407 | 0 | #define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING) |
408 | 0 | #define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING) |
409 | 0 | #define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION) |
410 | | |
411 | | |
412 | 0 | #define INFINITE_REPEAT -1 |
413 | 0 | #define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT) |
414 | | |
415 | | /* bitset */ |
416 | 21.4M | #define BITS_PER_BYTE 8 |
417 | 21.4M | #define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) |
418 | 725k | #define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */ |
419 | 725k | #define BITSET_REAL_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) |
420 | | |
421 | | typedef uint32_t Bits; |
422 | | typedef Bits BitSet[BITSET_REAL_SIZE]; |
423 | | typedef Bits* BitSetRef; |
424 | | |
425 | 161k | #define SIZE_BITSET sizeof(BitSet) |
426 | | |
427 | 80.6k | #define BITSET_CLEAR(bs) do {\ |
428 | 80.6k | int i;\ |
429 | 725k | for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { (bs)[i] = 0; } \ |
430 | 80.6k | } while (0) |
431 | | |
432 | 20.7M | #define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5] |
433 | 20.7M | #define BS_BIT(pos) (1u << ((unsigned int )(pos) & 0x1f)) |
434 | | |
435 | 20.6M | #define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) |
436 | 161k | #define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) |
437 | | #define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) |
438 | | #define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) |
439 | | |
440 | | /* has body */ |
441 | 7.01k | #define ANCR_PREC_READ (1<<0) |
442 | 108k | #define ANCR_PREC_READ_NOT (1<<1) |
443 | 10.5k | #define ANCR_LOOK_BEHIND (1<<2) |
444 | 0 | #define ANCR_LOOK_BEHIND_NOT (1<<3) |
445 | | /* no body */ |
446 | 17.5k | #define ANCR_BEGIN_BUF (1<<4) |
447 | 7.01k | #define ANCR_BEGIN_LINE (1<<5) |
448 | 7.01k | #define ANCR_BEGIN_POSITION (1<<6) |
449 | 17.5k | #define ANCR_END_BUF (1<<7) |
450 | 17.5k | #define ANCR_SEMI_END_BUF (1<<8) |
451 | 10.5k | #define ANCR_END_LINE (1<<9) |
452 | 0 | #define ANCR_WORD_BOUNDARY (1<<10) |
453 | 0 | #define ANCR_NO_WORD_BOUNDARY (1<<11) |
454 | 0 | #define ANCR_WORD_BEGIN (1<<12) |
455 | 0 | #define ANCR_WORD_END (1<<13) |
456 | 3.50k | #define ANCR_ANYCHAR_INF (1<<14) |
457 | 3.50k | #define ANCR_ANYCHAR_INF_ML (1<<15) |
458 | 7.01k | #define ANCR_TEXT_SEGMENT_BOUNDARY (1<<16) |
459 | 3.50k | #define ANCR_NO_TEXT_SEGMENT_BOUNDARY (1<<17) |
460 | | |
461 | | |
462 | 3.50k | #define ANCHOR_HAS_BODY(a) ((a)->type < ANCR_BEGIN_BUF) |
463 | | |
464 | | #define IS_WORD_ANCHOR_TYPE(type) \ |
465 | 0 | ((type) == ANCR_WORD_BOUNDARY || (type) == ANCR_NO_WORD_BOUNDARY || \ |
466 | 0 | (type) == ANCR_WORD_BEGIN || (type) == ANCR_WORD_END) |
467 | | |
468 | | /* operation code */ |
469 | | enum OpCode { |
470 | | OP_FINISH = 0, /* matching process terminator (no more alternative) */ |
471 | | OP_END = 1, /* pattern code terminator (success end) */ |
472 | | OP_STR_1 = 2, /* single byte, N = 1 */ |
473 | | OP_STR_2, /* single byte, N = 2 */ |
474 | | OP_STR_3, /* single byte, N = 3 */ |
475 | | OP_STR_4, /* single byte, N = 4 */ |
476 | | OP_STR_5, /* single byte, N = 5 */ |
477 | | OP_STR_N, /* single byte */ |
478 | | OP_STR_MB2N1, /* mb-length = 2 N = 1 */ |
479 | | OP_STR_MB2N2, /* mb-length = 2 N = 2 */ |
480 | | OP_STR_MB2N3, /* mb-length = 2 N = 3 */ |
481 | | OP_STR_MB2N, /* mb-length = 2 */ |
482 | | OP_STR_MB3N, /* mb-length = 3 */ |
483 | | OP_STR_MBN, /* other length */ |
484 | | OP_CCLASS, |
485 | | OP_CCLASS_MB, |
486 | | OP_CCLASS_MIX, |
487 | | OP_CCLASS_NOT, |
488 | | OP_CCLASS_MB_NOT, |
489 | | OP_CCLASS_MIX_NOT, |
490 | | OP_ANYCHAR, /* "." */ |
491 | | OP_ANYCHAR_ML, /* "." multi-line */ |
492 | | OP_ANYCHAR_STAR, /* ".*" */ |
493 | | OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ |
494 | | OP_ANYCHAR_STAR_PEEK_NEXT, |
495 | | OP_ANYCHAR_ML_STAR_PEEK_NEXT, |
496 | | OP_WORD, |
497 | | OP_WORD_ASCII, |
498 | | OP_NO_WORD, |
499 | | OP_NO_WORD_ASCII, |
500 | | OP_WORD_BOUNDARY, |
501 | | OP_NO_WORD_BOUNDARY, |
502 | | OP_WORD_BEGIN, |
503 | | OP_WORD_END, |
504 | | OP_TEXT_SEGMENT_BOUNDARY, |
505 | | OP_BEGIN_BUF, |
506 | | OP_END_BUF, |
507 | | OP_BEGIN_LINE, |
508 | | OP_END_LINE, |
509 | | OP_SEMI_END_BUF, |
510 | | OP_CHECK_POSITION, |
511 | | OP_BACKREF1, |
512 | | OP_BACKREF2, |
513 | | OP_BACKREF_N, |
514 | | OP_BACKREF_N_IC, |
515 | | OP_BACKREF_MULTI, |
516 | | OP_BACKREF_MULTI_IC, |
517 | | #ifdef USE_BACKREF_WITH_LEVEL |
518 | | OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ |
519 | | OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */ |
520 | | #endif |
521 | | OP_BACKREF_CHECK, /* (?(n)), (?('name')) */ |
522 | | #ifdef USE_BACKREF_WITH_LEVEL |
523 | | OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */ |
524 | | #endif |
525 | | OP_MEM_START, |
526 | | OP_MEM_START_PUSH, /* push back-tracker to stack */ |
527 | | OP_MEM_END_PUSH, /* push back-tracker to stack */ |
528 | | #ifdef USE_CALL |
529 | | OP_MEM_END_PUSH_REC, /* push back-tracker to stack */ |
530 | | #endif |
531 | | OP_MEM_END, |
532 | | #ifdef USE_CALL |
533 | | OP_MEM_END_REC, /* push marker to stack */ |
534 | | #endif |
535 | | OP_FAIL, /* pop stack and move */ |
536 | | OP_JUMP, |
537 | | OP_PUSH, |
538 | | OP_PUSH_SUPER, |
539 | | OP_POP, |
540 | | OP_POP_TO_MARK, |
541 | | #ifdef USE_OP_PUSH_OR_JUMP_EXACT |
542 | | OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ |
543 | | #endif |
544 | | OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ |
545 | | OP_REPEAT, /* {n,m} */ |
546 | | OP_REPEAT_NG, /* {n,m}? (non greedy) */ |
547 | | OP_REPEAT_INC, |
548 | | OP_REPEAT_INC_NG, /* non greedy */ |
549 | | OP_EMPTY_CHECK_START, /* null loop checker start */ |
550 | | OP_EMPTY_CHECK_END, /* null loop checker end */ |
551 | | OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ |
552 | | #ifdef USE_CALL |
553 | | OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ |
554 | | #endif |
555 | | OP_MOVE, |
556 | | OP_STEP_BACK_START, |
557 | | OP_STEP_BACK_NEXT, |
558 | | OP_CUT_TO_MARK, |
559 | | OP_MARK, |
560 | | OP_SAVE_VAL, |
561 | | OP_UPDATE_VAR, |
562 | | #ifdef USE_CALL |
563 | | OP_CALL, /* \g<name> */ |
564 | | OP_RETURN, |
565 | | #endif |
566 | | #ifdef USE_CALLOUT |
567 | | OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */ |
568 | | OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */ |
569 | | #endif |
570 | | }; |
571 | | |
572 | | enum SaveType { |
573 | | SAVE_KEEP = 0, /* SAVE S */ |
574 | | SAVE_S = 1, |
575 | | SAVE_RIGHT_RANGE = 2, |
576 | | }; |
577 | | |
578 | | enum UpdateVarType { |
579 | | UPDATE_VAR_KEEP_FROM_STACK_LAST = 0, |
580 | | UPDATE_VAR_S_FROM_STACK = 1, |
581 | | UPDATE_VAR_RIGHT_RANGE_FROM_STACK = 2, |
582 | | UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, |
583 | | UPDATE_VAR_RIGHT_RANGE_TO_S = 4, |
584 | | UPDATE_VAR_RIGHT_RANGE_INIT = 5, |
585 | | }; |
586 | | |
587 | | enum CheckPositionType { |
588 | | CHECK_POSITION_SEARCH_START = 0, |
589 | | CHECK_POSITION_CURRENT_RIGHT_RANGE = 1, |
590 | | }; |
591 | | |
592 | | enum TextSegmentBoundaryType { |
593 | | EXTENDED_GRAPHEME_CLUSTER_BOUNDARY = 0, |
594 | | WORD_BOUNDARY = 1, |
595 | | }; |
596 | | |
597 | | typedef int RelAddrType; |
598 | | typedef int AbsAddrType; |
599 | | typedef int LengthType; |
600 | | typedef int RelPositionType; |
601 | | typedef int RepeatNumType; |
602 | | typedef int MemNumType; |
603 | | typedef void* PointerType; |
604 | | typedef int SaveType; |
605 | | typedef int UpdateVarType; |
606 | | typedef int ModeType; |
607 | | |
608 | 0 | #define SIZE_OPCODE 1 |
609 | | #define SIZE_RELADDR sizeof(RelAddrType) |
610 | | #define SIZE_ABSADDR sizeof(AbsAddrType) |
611 | | #define SIZE_LENGTH sizeof(LengthType) |
612 | | #define SIZE_MEMNUM sizeof(MemNumType) |
613 | | #define SIZE_REPEATNUM sizeof(RepeatNumType) |
614 | | #define SIZE_OPTION sizeof(OnigOptionType) |
615 | 0 | #define SIZE_CODE_POINT sizeof(OnigCodePoint) |
616 | | #define SIZE_POINTER sizeof(PointerType) |
617 | | #define SIZE_SAVE_TYPE sizeof(SaveType) |
618 | | #define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType) |
619 | | #define SIZE_MODE sizeof(ModeType) |
620 | | |
621 | | /* code point's address must be aligned address. */ |
622 | 0 | #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) |
623 | | |
624 | | |
625 | | /* op-code + arg size */ |
626 | | |
627 | | /* for relative address increment to go next op. */ |
628 | 3.50k | #define SIZE_INC 1 |
629 | | |
630 | 0 | #define OPSIZE_ANYCHAR_STAR 1 |
631 | 0 | #define OPSIZE_ANYCHAR_STAR_PEEK_NEXT 1 |
632 | 7.01k | #define OPSIZE_JUMP 1 |
633 | 3.50k | #define OPSIZE_PUSH 1 |
634 | | #define OPSIZE_PUSH_SUPER 1 |
635 | 0 | #define OPSIZE_POP 1 |
636 | 0 | #define OPSIZE_POP_TO_MARK 1 |
637 | | #ifdef USE_OP_PUSH_OR_JUMP_EXACT |
638 | 0 | #define OPSIZE_PUSH_OR_JUMP_EXACT1 1 |
639 | | #endif |
640 | 0 | #define OPSIZE_PUSH_IF_PEEK_NEXT 1 |
641 | 0 | #define OPSIZE_REPEAT 1 |
642 | 0 | #define OPSIZE_REPEAT_INC 1 |
643 | | #define OPSIZE_REPEAT_INC_NG 1 |
644 | 0 | #define OPSIZE_WORD_BOUNDARY 1 |
645 | 0 | #define OPSIZE_BACKREF 1 |
646 | 0 | #define OPSIZE_FAIL 1 |
647 | 0 | #define OPSIZE_MEM_START 1 |
648 | 0 | #define OPSIZE_MEM_START_PUSH 1 |
649 | 0 | #define OPSIZE_MEM_END_PUSH 1 |
650 | 0 | #define OPSIZE_MEM_END_PUSH_REC 1 |
651 | 0 | #define OPSIZE_MEM_END 1 |
652 | 0 | #define OPSIZE_MEM_END_REC 1 |
653 | 0 | #define OPSIZE_EMPTY_CHECK_START 1 |
654 | 0 | #define OPSIZE_EMPTY_CHECK_END 1 |
655 | 0 | #define OPSIZE_CHECK_POSITION 1 |
656 | 0 | #define OPSIZE_CALL 1 |
657 | 0 | #define OPSIZE_RETURN 1 |
658 | 0 | #define OPSIZE_MOVE 1 |
659 | 0 | #define OPSIZE_STEP_BACK_START 1 |
660 | 0 | #define OPSIZE_STEP_BACK_NEXT 1 |
661 | 0 | #define OPSIZE_CUT_TO_MARK 1 |
662 | 0 | #define OPSIZE_MARK 1 |
663 | 0 | #define OPSIZE_SAVE_VAL 1 |
664 | 0 | #define OPSIZE_UPDATE_VAR 1 |
665 | | |
666 | | #ifdef USE_CALLOUT |
667 | 0 | #define OPSIZE_CALLOUT_CONTENTS 1 |
668 | 0 | #define OPSIZE_CALLOUT_NAME 1 |
669 | | #endif |
670 | | |
671 | | |
672 | 210k | #define MC_ESC(syn) (syn)->meta_char_table.esc |
673 | 0 | #define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar |
674 | 0 | #define MC_ANYTIME(syn) (syn)->meta_char_table.anytime |
675 | 0 | #define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time |
676 | 0 | #define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time |
677 | 0 | #define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime |
678 | | |
679 | | #define IS_MC_ESC_CODE(code, syn) \ |
680 | 105k | ((code) == MC_ESC(syn) && \ |
681 | 3.50k | !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) |
682 | | |
683 | | |
684 | | #define SYN_POSIX_COMMON_OP \ |
685 | | ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ |
686 | | ONIG_SYN_OP_DECIMAL_BACKREF | \ |
687 | | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ |
688 | | ONIG_SYN_OP_LINE_ANCHOR | \ |
689 | | ONIG_SYN_OP_ESC_CONTROL_CHARS ) |
690 | | |
691 | | #define SYN_GNU_REGEX_OP \ |
692 | | ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ |
693 | | ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ |
694 | | ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ |
695 | | ONIG_SYN_OP_VBAR_ALT | \ |
696 | | ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ |
697 | | ONIG_SYN_OP_QMARK_ZERO_ONE | \ |
698 | | ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ |
699 | | ONIG_SYN_OP_ESC_W_WORD | \ |
700 | | ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ |
701 | | ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ |
702 | | ONIG_SYN_OP_LINE_ANCHOR ) |
703 | | |
704 | | #define SYN_GNU_REGEX_BV \ |
705 | | ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ |
706 | | ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ |
707 | | ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) |
708 | | |
709 | | |
710 | 20.7M | #define NCCLASS_FLAGS(cc) ((cc)->flags) |
711 | 0 | #define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) |
712 | 0 | #define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) |
713 | 20.7M | #define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) |
714 | | |
715 | | /* cclass node */ |
716 | | #define FLAG_NCCLASS_NOT (1<<0) |
717 | | #define FLAG_NCCLASS_SHARE (1<<1) |
718 | | |
719 | 0 | #define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) |
720 | 0 | #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) |
721 | 20.7M | #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) |
722 | | |
723 | | |
724 | | typedef struct { |
725 | | #ifdef USE_DIRECT_THREADED_CODE |
726 | | const void* opaddr; |
727 | | #else |
728 | | enum OpCode opcode; |
729 | | #endif |
730 | | union { |
731 | | struct { |
732 | | UChar s[16]; /* Now used first 7 bytes only. */ |
733 | | } exact; |
734 | | struct { |
735 | | UChar* s; |
736 | | LengthType n; /* number of chars */ |
737 | | } exact_n; /* EXACTN, EXACTN_IC, EXACTMB2N, EXACTMB3N */ |
738 | | struct { |
739 | | UChar* s; |
740 | | LengthType n; /* number of chars */ |
741 | | LengthType len; /* char byte length */ |
742 | | } exact_len_n; /* EXACTMBN */ |
743 | | struct { |
744 | | BitSetRef bsp; |
745 | | } cclass; |
746 | | struct { |
747 | | void* mb; |
748 | | } cclass_mb; |
749 | | struct { |
750 | | void* mb; /* mb must be same position with cclass_mb for match_at(). */ |
751 | | BitSetRef bsp; |
752 | | } cclass_mix; |
753 | | struct { |
754 | | UChar c; |
755 | | } anychar_star_peek_next; |
756 | | struct { |
757 | | ModeType mode; |
758 | | } word_boundary; /* OP_WORD_BOUNDARY, OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END */ |
759 | | struct { |
760 | | enum TextSegmentBoundaryType type; |
761 | | int not; |
762 | | } text_segment_boundary; |
763 | | struct { |
764 | | enum CheckPositionType type; |
765 | | } check_position; |
766 | | struct { |
767 | | union { |
768 | | MemNumType n1; /* num == 1 */ |
769 | | MemNumType* ns; /* num > 1 */ |
770 | | }; |
771 | | int num; |
772 | | int nest_level; |
773 | | } backref_general; /* BACKREF_MULTI, BACKREF_MULTI_IC, BACKREF_WITH_LEVEL, BACKREF_CHECK, BACKREF_CHECK_WITH_LEVEL, */ |
774 | | struct { |
775 | | MemNumType n1; |
776 | | } backref_n; /* BACKREF_N, BACKREF_N_IC */ |
777 | | struct { |
778 | | MemNumType num; |
779 | | } memory_start; /* MEMORY_START, MEMORY_START_PUSH */ |
780 | | struct { |
781 | | MemNumType num; |
782 | | } memory_end; /* MEMORY_END, MEMORY_END_REC, MEMORY_END_PUSH, MEMORY_END_PUSH_REC */ |
783 | | struct { |
784 | | RelAddrType addr; |
785 | | } jump; |
786 | | struct { |
787 | | RelAddrType addr; |
788 | | } push; |
789 | | struct { |
790 | | RelAddrType addr; |
791 | | UChar c; |
792 | | } push_or_jump_exact1; |
793 | | struct { |
794 | | RelAddrType addr; |
795 | | UChar c; |
796 | | } push_if_peek_next; |
797 | | struct { |
798 | | MemNumType id; |
799 | | } pop_to_mark; |
800 | | struct { |
801 | | MemNumType id; |
802 | | RelAddrType addr; |
803 | | } repeat; /* REPEAT, REPEAT_NG */ |
804 | | struct { |
805 | | MemNumType id; |
806 | | } repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */ |
807 | | struct { |
808 | | MemNumType mem; |
809 | | } empty_check_start; |
810 | | struct { |
811 | | MemNumType mem; |
812 | | MemStatusType empty_status_mem; |
813 | | } empty_check_end; /* EMPTY_CHECK_END, EMPTY_CHECK_END_MEMST, EMPTY_CHECK_END_MEMST_PUSH */ |
814 | | struct { |
815 | | RelAddrType addr; |
816 | | } prec_read_not_start; |
817 | | struct { |
818 | | LengthType len; |
819 | | } look_behind; |
820 | | struct { |
821 | | LengthType len; |
822 | | RelAddrType addr; |
823 | | } look_behind_not_start; |
824 | | struct { |
825 | | RelPositionType n; /* char relative position */ |
826 | | } move; |
827 | | struct { |
828 | | LengthType initial; /* char length */ |
829 | | LengthType remaining; /* char length */ |
830 | | RelAddrType addr; |
831 | | } step_back_start; |
832 | | struct { |
833 | | MemNumType id; |
834 | | int restore_pos; /* flag: restore current string position */ |
835 | | } cut_to_mark; |
836 | | struct { |
837 | | MemNumType id; |
838 | | int save_pos; /* flag: save current string position */ |
839 | | } mark; |
840 | | struct { |
841 | | SaveType type; |
842 | | MemNumType id; |
843 | | } save_val; |
844 | | struct { |
845 | | UpdateVarType type; |
846 | | MemNumType id; |
847 | | int clear; /* UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK or UPDATE_VAR_RIGHT_RANGE_FROM_STACK */ |
848 | | } update_var; |
849 | | struct { |
850 | | AbsAddrType addr; |
851 | | #if defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_CALL) |
852 | | MemNumType called_mem; |
853 | | #endif |
854 | | } call; |
855 | | #ifdef USE_CALLOUT |
856 | | struct { |
857 | | MemNumType num; |
858 | | } callout_contents; |
859 | | struct { |
860 | | MemNumType num; |
861 | | MemNumType id; |
862 | | } callout_name; |
863 | | #endif |
864 | | }; |
865 | | } Operation; |
866 | | |
867 | | typedef struct { |
868 | | const UChar* pattern; |
869 | | const UChar* pattern_end; |
870 | | #ifdef USE_CALLOUT |
871 | | void* tag_table; |
872 | | int callout_num; |
873 | | int callout_list_alloc; |
874 | | CalloutListEntry* callout_list; /* index: callout num */ |
875 | | #endif |
876 | | } RegexExt; |
877 | | |
878 | | typedef struct { |
879 | | int lower; |
880 | | int upper; |
881 | | union { |
882 | | Operation* pcode; /* address of repeated body */ |
883 | | int offset; |
884 | | } u; |
885 | | } RepeatRange; |
886 | | |
887 | | struct re_pattern_buffer { |
888 | | /* common members of BBuf(bytes-buffer) */ |
889 | | Operation* ops; |
890 | | #ifdef USE_DIRECT_THREADED_CODE |
891 | | enum OpCode* ocs; |
892 | | #endif |
893 | | Operation* ops_curr; |
894 | | unsigned int ops_used; /* used space for ops */ |
895 | | unsigned int ops_alloc; /* allocated space for ops */ |
896 | | unsigned char* string_pool; |
897 | | unsigned char* string_pool_end; |
898 | | |
899 | | int num_mem; /* used memory(...) num counted from 1 */ |
900 | | int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ |
901 | | int num_empty_check; /* OP_EMPTY_CHECK_START/END id counter */ |
902 | | int num_call; /* number of subexp call */ |
903 | | MemStatusType capture_history; /* (?@...) flag (1-31) */ |
904 | | MemStatusType push_mem_start; /* need backtrack flag */ |
905 | | MemStatusType push_mem_end; /* need backtrack flag */ |
906 | | int stack_pop_level; |
907 | | int repeat_range_alloc; |
908 | | RepeatRange* repeat_range; |
909 | | |
910 | | OnigEncoding enc; |
911 | | OnigOptionType options; |
912 | | OnigSyntaxType* syntax; |
913 | | OnigCaseFoldType case_fold_flag; |
914 | | void* name_table; |
915 | | |
916 | | /* optimization info (string search, char-map and anchors) */ |
917 | | int optimize; /* optimize flag */ |
918 | | int threshold_len; /* search str-length for apply optimize */ |
919 | | int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ |
920 | | OnigLen anc_dist_min; /* (SEMI_)END_BUF anchor distance */ |
921 | | OnigLen anc_dist_max; /* (SEMI_)END_BUF anchor distance */ |
922 | | int sub_anchor; /* start-anchor for exact or map */ |
923 | | unsigned char *exact; |
924 | | unsigned char *exact_end; |
925 | | unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */ |
926 | | int map_offset; |
927 | | OnigLen dist_min; /* min-distance of exact or map */ |
928 | | OnigLen dist_max; /* max-distance of exact or map */ |
929 | | RegexExt* extp; |
930 | | }; |
931 | | |
932 | 287k | #define COP(reg) ((reg)->ops_curr) |
933 | 7.01k | #define COP_CURR_OFFSET(reg) ((reg)->ops_used - 1) |
934 | | #define COP_CURR_OFFSET_BYTES(reg, p) \ |
935 | 0 | ((int )((char* )(&((reg)->ops_curr->p)) - (char* )((reg)->ops))) |
936 | | |
937 | | |
938 | | extern void onig_add_end_call(void (*func)(void)); |
939 | | extern void onig_warning(const char* s); |
940 | | extern UChar* onig_error_code_to_format P_((int code)); |
941 | | extern void ONIG_VARIADIC_FUNC_ATTR onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); |
942 | | extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); |
943 | | extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); |
944 | | extern RegexExt* onig_get_regex_ext(regex_t* reg); |
945 | | extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); |
946 | | extern int onig_positive_int_multiply(int x, int y); |
947 | | extern hash_table_type onig_st_init_strend_table_with_size P_((int size)); |
948 | | extern int onig_st_lookup_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); |
949 | | extern int onig_st_insert_strend P_((hash_table_type table, const UChar* str_key, const UChar* end_key, hash_data_type value)); |
950 | | |
951 | | #ifdef ONIG_DEBUG |
952 | | |
953 | | #ifdef ONIG_DEBUG_COMPILE |
954 | | extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); |
955 | | #endif |
956 | | |
957 | | #ifdef ONIG_DEBUG_STATISTICS |
958 | | extern void onig_statistics_init P_((void)); |
959 | | extern int onig_print_statistics P_((FILE* f)); |
960 | | #endif |
961 | | |
962 | | #endif /* ONIG_DEBUG */ |
963 | | |
964 | | #ifdef USE_CALLOUT |
965 | | |
966 | | extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id); |
967 | | extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id); |
968 | | extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id); |
969 | | extern int onig_callout_tag_table_free(void* table); |
970 | | extern void onig_free_reg_callout_list(int n, CalloutListEntry* list); |
971 | | extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num); |
972 | | extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num); |
973 | | |
974 | | /* for definition of builtin callout */ |
975 | 7.01k | #define BC0_P(name, func) do {\ |
976 | 7.01k | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
977 | 7.01k | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
978 | 7.01k | (UChar* )(name), (UChar* )((name) + len),\ |
979 | 7.01k | ONIG_CALLOUT_IN_PROGRESS,\ |
980 | 7.01k | onig_builtin_ ## func, 0, 0, 0, 0, 0);\ |
981 | 7.01k | if (id < 0) return id;\ |
982 | 7.01k | } while(0) |
983 | | |
984 | | #define BC0_R(name, func) do {\ |
985 | | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
986 | | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
987 | | (UChar* )(name), (UChar* )((name) + len),\ |
988 | | ONIG_CALLOUT_IN_RETRACTION,\ |
989 | | onig_builtin_ ## func, 0, 0, 0, 0, 0);\ |
990 | | if (id < 0) return id;\ |
991 | | } while(0) |
992 | | |
993 | | #define BC0_B(name, func) do {\ |
994 | | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
995 | | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
996 | | (UChar* )(name), (UChar* )((name) + len),\ |
997 | | ONIG_CALLOUT_IN_BOTH,\ |
998 | | onig_builtin_ ## func, 0, 0, 0, 0, 0);\ |
999 | | if (id < 0) return id;\ |
1000 | | } while(0) |
1001 | | |
1002 | 3.50k | #define BC_P(name, func, na, ts) do {\ |
1003 | 3.50k | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
1004 | 3.50k | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
1005 | 3.50k | (UChar* )(name), (UChar* )((name) + len),\ |
1006 | 3.50k | ONIG_CALLOUT_IN_PROGRESS,\ |
1007 | 3.50k | onig_builtin_ ## func, 0, (na), (ts), 0, 0); \ |
1008 | 3.50k | if (id < 0) return id;\ |
1009 | 3.50k | } while(0) |
1010 | | |
1011 | 3.50k | #define BC_P_O(name, func, nts, ts, nopts, opts) do {\ |
1012 | 3.50k | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
1013 | 3.50k | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
1014 | 3.50k | (UChar* )(name), (UChar* )((name) + len),\ |
1015 | 3.50k | ONIG_CALLOUT_IN_PROGRESS,\ |
1016 | 3.50k | onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ |
1017 | 3.50k | if (id < 0) return id;\ |
1018 | 3.50k | } while(0) |
1019 | | |
1020 | | #define BC_B(name, func, na, ts) do {\ |
1021 | | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
1022 | | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
1023 | | (UChar* )(name), (UChar* )((name) + len),\ |
1024 | | ONIG_CALLOUT_IN_BOTH,\ |
1025 | | onig_builtin_ ## func, 0, (na), (ts), 0, 0);\ |
1026 | | if (id < 0) return id;\ |
1027 | | } while(0) |
1028 | | |
1029 | 10.5k | #define BC_B_O(name, func, nts, ts, nopts, opts) do {\ |
1030 | 10.5k | int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ |
1031 | 10.5k | id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ |
1032 | 10.5k | (UChar* )(name), (UChar* )((name) + len),\ |
1033 | 10.5k | ONIG_CALLOUT_IN_BOTH,\ |
1034 | 10.5k | onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ |
1035 | 10.5k | if (id < 0) return id;\ |
1036 | 10.5k | } while(0) |
1037 | | |
1038 | | #endif /* USE_CALLOUT */ |
1039 | | |
1040 | | |
1041 | | typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); |
1042 | | |
1043 | | #endif /* REGINT_H */ |