/src/fluent-bit/lib/onigmo/regparse.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /**********************************************************************  | 
2  |  |   regparse.c -  Onigmo (Oniguruma-mod) (regular expression library)  | 
3  |  | **********************************************************************/  | 
4  |  | /*-  | 
5  |  |  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>  | 
6  |  |  * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>  | 
7  |  |  * All rights reserved.  | 
8  |  |  *  | 
9  |  |  * Redistribution and use in source and binary forms, with or without  | 
10  |  |  * modification, are permitted provided that the following conditions  | 
11  |  |  * are met:  | 
12  |  |  * 1. Redistributions of source code must retain the above copyright  | 
13  |  |  *    notice, this list of conditions and the following disclaimer.  | 
14  |  |  * 2. Redistributions in binary form must reproduce the above copyright  | 
15  |  |  *    notice, this list of conditions and the following disclaimer in the  | 
16  |  |  *    documentation and/or other materials provided with the distribution.  | 
17  |  |  *  | 
18  |  |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND  | 
19  |  |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE  | 
20  |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE  | 
21  |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE  | 
22  |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  | 
23  |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  | 
24  |  |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  | 
25  |  |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  | 
26  |  |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  | 
27  |  |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  | 
28  |  |  * SUCH DAMAGE.  | 
29  |  |  */  | 
30  |  |  | 
31  |  | #include "regparse.h"  | 
32  |  | #include <stdarg.h>  | 
33  |  |  | 
34  | 0  | #define WARN_BUFSIZE    256  | 
35  |  |  | 
36  |  | #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS  | 
37  |  |  | 
38  |  |  | 
39  |  | const OnigSyntaxType OnigSyntaxRuby = { | 
40  |  |   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |  | 
41  |  |      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |  | 
42  |  |      ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |  | 
43  |  |      ONIG_SYN_OP_ESC_C_CONTROL )  | 
44  |  |    & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )  | 
45  |  |   , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |  | 
46  |  |       ONIG_SYN_OP2_OPTION_RUBY |  | 
47  |  |       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |  | 
48  |  |       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |  | 
49  |  |       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |  | 
50  |  |       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |  | 
51  |  |       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |  | 
52  |  |       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |  | 
53  |  |       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |  | 
54  |  |       ONIG_SYN_OP2_ESC_H_XDIGIT |  | 
55  |  | #ifndef RUBY  | 
56  |  |       ONIG_SYN_OP2_ESC_U_HEX4 |  | 
57  |  | #endif  | 
58  |  |       ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |  | 
59  |  |       ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |  | 
60  |  |       ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |  | 
61  |  |       ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |  | 
62  |  |       ONIG_SYN_OP2_QMARK_TILDE_ABSENT )  | 
63  |  |   , ( SYN_GNU_REGEX_BV |  | 
64  |  |       ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |  | 
65  |  |       ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |  | 
66  |  |       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |  | 
67  |  |       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |  | 
68  |  |       ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |  | 
69  |  |       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |  | 
70  |  |       ONIG_SYN_WARN_CC_DUP |  | 
71  |  |       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )  | 
72  |  |   , ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE |  | 
73  |  |       ONIG_OPTION_WORD_BOUND_ALL_RANGE )  | 
74  |  |   ,  | 
75  |  |   { | 
76  |  |       (OnigCodePoint )'\\'                       /* esc */  | 
77  |  |     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */  | 
78  |  |     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */  | 
79  |  |     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */  | 
80  |  |     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */  | 
81  |  |     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */  | 
82  |  |   }  | 
83  |  | };  | 
84  |  |  | 
85  |  | const OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;  | 
86  |  |  | 
87  | 0  | extern void onig_null_warn(const char* s ARG_UNUSED) { } | 
88  |  |  | 
89  |  | #ifdef DEFAULT_WARN_FUNCTION  | 
90  |  | static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;  | 
91  |  | #else  | 
92  |  | static OnigWarnFunc onig_warn = onig_null_warn;  | 
93  |  | #endif  | 
94  |  |  | 
95  |  | #ifdef DEFAULT_VERB_WARN_FUNCTION  | 
96  |  | static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;  | 
97  |  | #else  | 
98  |  | static OnigWarnFunc onig_verb_warn = onig_null_warn;  | 
99  |  | #endif  | 
100  |  |  | 
101  |  | extern void onig_set_warn_func(OnigWarnFunc f)  | 
102  | 0  | { | 
103  | 0  |   onig_warn = f;  | 
104  | 0  | }  | 
105  |  |  | 
106  |  | extern void onig_set_verb_warn_func(OnigWarnFunc f)  | 
107  | 0  | { | 
108  | 0  |   onig_verb_warn = f;  | 
109  | 0  | }  | 
110  |  |  | 
111  |  | static void CC_DUP_WARN(ScanEnv *env, OnigCodePoint from, OnigCodePoint to);  | 
112  |  |  | 
113  |  |  | 
114  |  | static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;  | 
115  |  |  | 
116  |  | extern unsigned int  | 
117  |  | onig_get_parse_depth_limit(void)  | 
118  | 0  | { | 
119  | 0  |   return ParseDepthLimit;  | 
120  | 0  | }  | 
121  |  |  | 
122  |  | extern int  | 
123  |  | onig_set_parse_depth_limit(unsigned int depth)  | 
124  | 0  | { | 
125  | 0  |   if (depth == 0)  | 
126  | 0  |     ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;  | 
127  | 0  |   else  | 
128  | 0  |     ParseDepthLimit = depth;  | 
129  | 0  |   return 0;  | 
130  | 0  | }  | 
131  |  |  | 
132  |  |  | 
133  |  | static void  | 
134  |  | bbuf_free(BBuf* bbuf)  | 
135  | 10.6k  | { | 
136  | 10.6k  |   if (IS_NOT_NULL(bbuf)) { | 
137  | 5.34k  |     if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);  | 
138  | 5.34k  |     xfree(bbuf);  | 
139  | 5.34k  |   }  | 
140  | 10.6k  | }  | 
141  |  |  | 
142  |  | static int  | 
143  |  | bbuf_clone(BBuf** rto, BBuf* from)  | 
144  | 0  | { | 
145  | 0  |   int r;  | 
146  | 0  |   BBuf *to;  | 
147  |  | 
  | 
148  | 0  |   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));  | 
149  | 0  |   CHECK_NULL_RETURN_MEMERR(to);  | 
150  | 0  |   r = BBUF_INIT(to, from->alloc);  | 
151  | 0  |   if (r != 0) return r;  | 
152  | 0  |   to->used = from->used;  | 
153  | 0  |   xmemcpy(to->p, from->p, from->used);  | 
154  | 0  |   return 0;  | 
155  | 0  | }  | 
156  |  |  | 
157  |  | #define BACKREF_REL_TO_ABS(rel_no, env) \  | 
158  | 0  |   ((env)->num_mem + 1 + (rel_no))  | 
159  |  |  | 
160  | 411  | #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))  | 
161  |  |  | 
162  |  | #define MBCODE_START_POS(enc) \  | 
163  | 0  |   (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)  | 
164  |  |  | 
165  |  | #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \  | 
166  | 0  |   add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)  | 
167  |  |  | 
168  | 0  | #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ | 
169  | 0  |   if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ | 
170  | 0  |     r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\  | 
171  | 0  |     if (r) return r;\  | 
172  | 0  |   }\  | 
173  | 0  | } while (0)  | 
174  |  |  | 
175  |  |  | 
176  | 737k  | #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \ | 
177  | 737k  |   if (BITSET_AT(bs, pos)) CC_DUP_WARN(env, pos, pos); \  | 
178  | 737k  |   BS_ROOM(bs, pos) |= BS_BIT(pos); \  | 
179  | 737k  | } while (0)  | 
180  |  |  | 
181  | 0  | #define BITSET_IS_EMPTY(bs,empty) do {\ | 
182  | 0  |   int i;\  | 
183  | 0  |   empty = 1;\  | 
184  | 0  |   for (i = 0; i < BITSET_SIZE; i++) {\ | 
185  | 0  |     if ((bs)[i] != 0) {\ | 
186  | 0  |       empty = 0; break;\  | 
187  | 0  |     }\  | 
188  | 0  |   }\  | 
189  | 0  | } while (0)  | 
190  |  |  | 
191  |  | static void  | 
192  |  | bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)  | 
193  | 5.34k  | { | 
194  | 5.34k  |   int i;  | 
195  | 689k  |   for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { | 
196  | 683k  |     BITSET_SET_BIT_CHKDUP(bs, i);  | 
197  | 683k  |   }  | 
198  | 5.34k  | }  | 
199  |  |  | 
200  |  | #if 0  | 
201  |  | static void  | 
202  |  | bitset_set_all(BitSetRef bs)  | 
203  |  | { | 
204  |  |   int i;  | 
205  |  |   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); } | 
206  |  | }  | 
207  |  | #endif  | 
208  |  |  | 
209  |  | static void  | 
210  |  | bitset_invert(BitSetRef bs)  | 
211  | 0  | { | 
212  | 0  |   int i;  | 
213  | 0  |   for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } | 
214  | 0  | }  | 
215  |  |  | 
216  |  | static void  | 
217  |  | bitset_invert_to(BitSetRef from, BitSetRef to)  | 
218  | 0  | { | 
219  | 0  |   int i;  | 
220  | 0  |   for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); } | 
221  | 0  | }  | 
222  |  |  | 
223  |  | static void  | 
224  |  | bitset_and(BitSetRef dest, BitSetRef bs)  | 
225  | 5.34k  | { | 
226  | 5.34k  |   int i;  | 
227  | 48.0k  |   for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; } | 
228  | 5.34k  | }  | 
229  |  |  | 
230  |  | static void  | 
231  |  | bitset_or(BitSetRef dest, BitSetRef bs)  | 
232  | 5.34k  | { | 
233  | 5.34k  |   int i;  | 
234  | 48.0k  |   for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; } | 
235  | 5.34k  | }  | 
236  |  |  | 
237  |  | static void  | 
238  |  | bitset_copy(BitSetRef dest, BitSetRef bs)  | 
239  | 0  | { | 
240  | 0  |   int i;  | 
241  | 0  |   for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; } | 
242  | 0  | }  | 
243  |  |  | 
244  |  | #if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)  | 
245  |  | extern int  | 
246  |  | onig_strncmp(const UChar* s1, const UChar* s2, int n)  | 
247  |  | { | 
248  |  |   int x;  | 
249  |  |  | 
250  |  |   while (n-- > 0) { | 
251  |  |     x = *s2++ - *s1++;  | 
252  |  |     if (x) return x;  | 
253  |  |   }  | 
254  |  |   return 0;  | 
255  |  | }  | 
256  |  | #endif  | 
257  |  |  | 
258  |  | extern void  | 
259  |  | onig_strcpy(UChar* dest, const UChar* src, const UChar* end)  | 
260  | 152k  | { | 
261  | 152k  |   ptrdiff_t len = end - src;  | 
262  | 152k  |   if (len > 0) { | 
263  | 152k  |     xmemcpy(dest, src, len);  | 
264  | 152k  |     dest[len] = (UChar )0;  | 
265  | 152k  |   }  | 
266  | 152k  | }  | 
267  |  |  | 
268  |  | #ifdef USE_NAMED_GROUP  | 
269  |  | static UChar*  | 
270  |  | strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)  | 
271  | 1.64k  | { | 
272  | 1.64k  |   ptrdiff_t slen;  | 
273  | 1.64k  |   int term_len, i;  | 
274  | 1.64k  |   UChar *r;  | 
275  |  |  | 
276  | 1.64k  |   slen = end - s;  | 
277  | 1.64k  |   term_len = ONIGENC_MBC_MINLEN(enc);  | 
278  |  |  | 
279  | 1.64k  |   r = (UChar* )xmalloc(slen + term_len);  | 
280  | 1.64k  |   CHECK_NULL_RETURN(r);  | 
281  | 1.64k  |   xmemcpy(r, s, slen);  | 
282  |  |  | 
283  | 3.28k  |   for (i = 0; i < term_len; i++)  | 
284  | 1.64k  |     r[slen + i] = (UChar )0;  | 
285  |  |  | 
286  | 1.64k  |   return r;  | 
287  | 1.64k  | }  | 
288  |  | #endif  | 
289  |  |  | 
290  |  | /* scan pattern methods */  | 
291  | 0  | #define PEND_VALUE   0  | 
292  |  |  | 
293  |  | #ifdef __GNUC__  | 
294  |  | /* get rid of Wunused-but-set-variable and Wuninitialized */  | 
295  | 242k  | # define PFETCH_READY  UChar* pfetch_prev = NULL; (void)pfetch_prev  | 
296  |  | #else  | 
297  |  | # define PFETCH_READY  UChar* pfetch_prev  | 
298  |  | #endif  | 
299  | 326k  | #define PEND         (p < end ?  0 : 1)  | 
300  | 13.5k  | #define PUNFETCH     p = pfetch_prev  | 
301  | 9.04k  | #define PINC       do { \ | 
302  | 9.04k  |   pfetch_prev = p; \  | 
303  | 9.04k  |   p += enclen(enc, p, end); \  | 
304  | 9.04k  | } while (0)  | 
305  | 249k  | #define PFETCH(c)  do { \ | 
306  | 249k  |   c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \  | 
307  | 249k  |   pfetch_prev = p; \  | 
308  | 249k  |   p += enclen(enc, p, end); \  | 
309  | 249k  | } while (0)  | 
310  |  |  | 
311  | 0  | #define PINC_S     do { \ | 
312  | 0  |   p += enclen(enc, p, end); \  | 
313  | 0  | } while (0)  | 
314  | 15.2k  | #define PFETCH_S(c) do { \ | 
315  | 15.2k  |   c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \  | 
316  | 15.2k  |   p += enclen(enc, p, end); \  | 
317  | 15.2k  | } while (0)  | 
318  |  |  | 
319  | 45.2k  | #define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)  | 
320  | 79.3k  | #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)  | 
321  |  |  | 
322  |  | static UChar*  | 
323  |  | strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,  | 
324  |  |         size_t capa)  | 
325  | 23.0k  | { | 
326  | 23.0k  |   UChar* r;  | 
327  |  |  | 
328  | 23.0k  |   if (dest)  | 
329  | 23.0k  |     r = (UChar* )xrealloc(dest, capa + 1);  | 
330  | 0  |   else  | 
331  | 0  |     r = (UChar* )xmalloc(capa + 1);  | 
332  |  |  | 
333  | 23.0k  |   CHECK_NULL_RETURN(r);  | 
334  | 23.0k  |   onig_strcpy(r + (dest_end - dest), src, src_end);  | 
335  | 23.0k  |   return r;  | 
336  | 23.0k  | }  | 
337  |  |  | 
338  |  | /* dest on static area */  | 
339  |  | static UChar*  | 
340  |  | strcat_capa_from_static(UChar* dest, UChar* dest_end,  | 
341  |  |       const UChar* src, const UChar* src_end, size_t capa)  | 
342  | 1.64k  | { | 
343  | 1.64k  |   UChar* r;  | 
344  |  |  | 
345  | 1.64k  |   r = (UChar* )xmalloc(capa + 1);  | 
346  | 1.64k  |   CHECK_NULL_RETURN(r);  | 
347  | 1.64k  |   onig_strcpy(r, dest, dest_end);  | 
348  | 1.64k  |   onig_strcpy(r + (dest_end - dest), src, src_end);  | 
349  | 1.64k  |   return r;  | 
350  | 1.64k  | }  | 
351  |  |  | 
352  |  |  | 
353  |  | #ifdef USE_ST_LIBRARY  | 
354  |  |  | 
355  |  | # ifdef RUBY  | 
356  |  | #  include "ruby/st.h"  | 
357  |  | # else  | 
358  |  | #  include "st.h"  | 
359  |  | # endif  | 
360  |  |  | 
361  |  | typedef struct { | 
362  |  |   const UChar* s;  | 
363  |  |   const UChar* end;  | 
364  |  | } st_str_end_key;  | 
365  |  |  | 
366  |  | static int  | 
367  |  | str_end_cmp(st_data_t xp, st_data_t yp)  | 
368  | 0  | { | 
369  | 0  |   const st_str_end_key *x, *y;  | 
370  | 0  |   const UChar *p, *q;  | 
371  | 0  |   int c;  | 
372  |  | 
  | 
373  | 0  |   x = (const st_str_end_key *)xp;  | 
374  | 0  |   y = (const st_str_end_key *)yp;  | 
375  | 0  |   if ((x->end - x->s) != (y->end - y->s))  | 
376  | 0  |     return 1;  | 
377  |  |  | 
378  | 0  |   p = x->s;  | 
379  | 0  |   q = y->s;  | 
380  | 0  |   while (p < x->end) { | 
381  | 0  |     c = (int )*p - (int )*q;  | 
382  | 0  |     if (c != 0) return c;  | 
383  |  |  | 
384  | 0  |     p++; q++;  | 
385  | 0  |   }  | 
386  |  |  | 
387  | 0  |   return 0;  | 
388  | 0  | }  | 
389  |  |  | 
390  |  | static st_index_t  | 
391  |  | str_end_hash(st_data_t xp)  | 
392  | 2.87k  | { | 
393  | 2.87k  |   const st_str_end_key *x = (const st_str_end_key *)xp;  | 
394  | 2.87k  |   const UChar *p;  | 
395  | 2.87k  |   st_index_t val = 0;  | 
396  |  |  | 
397  | 2.87k  |   p = x->s;  | 
398  | 13.5k  |   while (p < x->end) { | 
399  | 10.6k  |     val = val * 997 + (int )*p++;  | 
400  | 10.6k  |   }  | 
401  |  |  | 
402  | 2.87k  |   return val + (val >> 5);  | 
403  | 2.87k  | }  | 
404  |  |  | 
405  |  | extern hash_table_type*  | 
406  |  | onig_st_init_strend_table_with_size(st_index_t size)  | 
407  | 411  | { | 
408  | 411  |   static const struct st_hash_type hashType = { | 
409  | 411  |     str_end_cmp,  | 
410  | 411  |     str_end_hash,  | 
411  | 411  |   };  | 
412  |  |  | 
413  | 411  |   return (hash_table_type* )  | 
414  | 411  |            onig_st_init_table_with_size(&hashType, size);  | 
415  | 411  | }  | 
416  |  |  | 
417  |  | extern int  | 
418  |  | onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,  | 
419  |  |           const UChar* end_key, hash_data_type *value)  | 
420  | 1.23k  | { | 
421  | 1.23k  |   st_str_end_key key;  | 
422  |  |  | 
423  | 1.23k  |   key.s   = (UChar* )str_key;  | 
424  | 1.23k  |   key.end = (UChar* )end_key;  | 
425  |  |  | 
426  | 1.23k  |   return onig_st_lookup(table, (st_data_t )(&key), value);  | 
427  | 1.23k  | }  | 
428  |  |  | 
429  |  | extern int  | 
430  |  | onig_st_insert_strend(hash_table_type* table, const UChar* str_key,  | 
431  |  |           const UChar* end_key, hash_data_type value)  | 
432  | 1.64k  | { | 
433  | 1.64k  |   st_str_end_key* key;  | 
434  | 1.64k  |   int result;  | 
435  |  |  | 
436  | 1.64k  |   key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));  | 
437  | 1.64k  |   key->s   = (UChar* )str_key;  | 
438  | 1.64k  |   key->end = (UChar* )end_key;  | 
439  | 1.64k  |   result = onig_st_insert(table, (st_data_t )key, value);  | 
440  | 1.64k  |   if (result) { | 
441  | 0  |     xfree(key);  | 
442  | 0  |   }  | 
443  | 1.64k  |   return result;  | 
444  | 1.64k  | }  | 
445  |  |  | 
446  |  | #endif /* USE_ST_LIBRARY */  | 
447  |  |  | 
448  |  |  | 
449  |  | #ifdef USE_NAMED_GROUP  | 
450  |  |  | 
451  | 0  | # define INIT_NAME_BACKREFS_ALLOC_NUM   8  | 
452  |  |  | 
453  |  | typedef struct { | 
454  |  |   UChar* name;  | 
455  |  |   size_t name_len;   /* byte length */  | 
456  |  |   int    back_num;   /* number of backrefs */  | 
457  |  |   int    back_alloc;  | 
458  |  |   int    back_ref1;  | 
459  |  |   int*   back_refs;  | 
460  |  | } NameEntry;  | 
461  |  |  | 
462  |  | # ifdef USE_ST_LIBRARY  | 
463  |  |  | 
464  |  | typedef st_table  NameTable;  | 
465  |  | typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */  | 
466  |  |  | 
467  |  | #  ifdef ONIG_DEBUG  | 
468  |  | static int  | 
469  |  | i_print_name_entry(UChar* key, NameEntry* e, void* arg)  | 
470  |  | { | 
471  |  |   int i;  | 
472  |  |   FILE* fp = (FILE* )arg;  | 
473  |  |  | 
474  |  |   fprintf(fp, "%s: ", e->name);  | 
475  |  |   if (e->back_num == 0)  | 
476  |  |     fputs("-", fp); | 
477  |  |   else if (e->back_num == 1)  | 
478  |  |     fprintf(fp, "%d", e->back_ref1);  | 
479  |  |   else { | 
480  |  |     for (i = 0; i < e->back_num; i++) { | 
481  |  |       if (i > 0) fprintf(fp, ", ");  | 
482  |  |       fprintf(fp, "%d", e->back_refs[i]);  | 
483  |  |     }  | 
484  |  |   }  | 
485  |  |   fputs("\n", fp); | 
486  |  |   return ST_CONTINUE;  | 
487  |  | }  | 
488  |  |  | 
489  |  | extern int  | 
490  |  | onig_print_names(FILE* fp, regex_t* reg)  | 
491  |  | { | 
492  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
493  |  |  | 
494  |  |   if (IS_NOT_NULL(t)) { | 
495  |  |     fprintf(fp, "name table\n");  | 
496  |  |     onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);  | 
497  |  |     fputs("\n", fp); | 
498  |  |   }  | 
499  |  |   return 0;  | 
500  |  | }  | 
501  |  | #  endif /* ONIG_DEBUG */  | 
502  |  |  | 
503  |  | static int  | 
504  |  | i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)  | 
505  | 1.64k  | { | 
506  | 1.64k  |   xfree(e->name);  | 
507  | 1.64k  |   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);  | 
508  | 1.64k  |   xfree(key);  | 
509  | 1.64k  |   xfree(e);  | 
510  | 1.64k  |   return ST_DELETE;  | 
511  | 1.64k  | }  | 
512  |  |  | 
513  |  | static int  | 
514  |  | names_clear(regex_t* reg)  | 
515  | 18.9k  | { | 
516  | 18.9k  |   NameTable* t = (NameTable* )reg->name_table;  | 
517  |  |  | 
518  | 18.9k  |   if (IS_NOT_NULL(t)) { | 
519  | 411  |     onig_st_foreach(t, i_free_name_entry, 0);  | 
520  | 411  |   }  | 
521  | 18.9k  |   return 0;  | 
522  | 18.9k  | }  | 
523  |  |  | 
524  |  | extern int  | 
525  |  | onig_names_free(regex_t* reg)  | 
526  | 9.45k  | { | 
527  | 9.45k  |   int r;  | 
528  | 9.45k  |   NameTable* t;  | 
529  |  |  | 
530  | 9.45k  |   r = names_clear(reg);  | 
531  | 9.45k  |   if (r) return r;  | 
532  |  |  | 
533  | 9.45k  |   t = (NameTable* )reg->name_table;  | 
534  | 9.45k  |   if (IS_NOT_NULL(t)) onig_st_free_table(t);  | 
535  | 9.45k  |   reg->name_table = (void* )NULL;  | 
536  | 9.45k  |   return 0;  | 
537  | 9.45k  | }  | 
538  |  |  | 
539  |  | static NameEntry*  | 
540  |  | name_find(regex_t* reg, const UChar* name, const UChar* name_end)  | 
541  | 1.64k  | { | 
542  | 1.64k  |   NameEntry* e;  | 
543  | 1.64k  |   NameTable* t = (NameTable* )reg->name_table;  | 
544  |  |  | 
545  | 1.64k  |   e = (NameEntry* )NULL;  | 
546  | 1.64k  |   if (IS_NOT_NULL(t)) { | 
547  | 1.23k  |     onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));  | 
548  | 1.23k  |   }  | 
549  | 1.64k  |   return e;  | 
550  | 1.64k  | }  | 
551  |  |  | 
552  |  | typedef struct { | 
553  |  |   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);  | 
554  |  |   regex_t* reg;  | 
555  |  |   void* arg;  | 
556  |  |   int ret;  | 
557  |  |   OnigEncoding enc;  | 
558  |  | } INamesArg;  | 
559  |  |  | 
560  |  | static int  | 
561  |  | i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)  | 
562  | 0  | { | 
563  | 0  |   int r = (*(arg->func))(e->name,  | 
564  | 0  |        e->name + e->name_len,  | 
565  | 0  |        e->back_num,  | 
566  | 0  |        (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),  | 
567  | 0  |        arg->reg, arg->arg);  | 
568  | 0  |   if (r != 0) { | 
569  | 0  |     arg->ret = r;  | 
570  | 0  |     return ST_STOP;  | 
571  | 0  |   }  | 
572  | 0  |   return ST_CONTINUE;  | 
573  | 0  | }  | 
574  |  |  | 
575  |  | extern int  | 
576  |  | onig_foreach_name(regex_t* reg,  | 
577  |  |   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)  | 
578  | 0  | { | 
579  | 0  |   INamesArg narg;  | 
580  | 0  |   NameTable* t = (NameTable* )reg->name_table;  | 
581  |  | 
  | 
582  | 0  |   narg.ret = 0;  | 
583  | 0  |   if (IS_NOT_NULL(t)) { | 
584  | 0  |     narg.func = func;  | 
585  | 0  |     narg.reg  = reg;  | 
586  | 0  |     narg.arg  = arg;  | 
587  | 0  |     narg.enc  = reg->enc; /* should be pattern encoding. */  | 
588  | 0  |     onig_st_foreach(t, i_names, (HashDataType )&narg);  | 
589  | 0  |   }  | 
590  | 0  |   return narg.ret;  | 
591  | 0  | }  | 
592  |  |  | 
593  |  | static int  | 
594  |  | i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)  | 
595  | 0  | { | 
596  | 0  |   int i;  | 
597  |  | 
  | 
598  | 0  |   if (e->back_num > 1) { | 
599  | 0  |     for (i = 0; i < e->back_num; i++) { | 
600  | 0  |       e->back_refs[i] = map[e->back_refs[i]].new_val;  | 
601  | 0  |     }  | 
602  | 0  |   }  | 
603  | 0  |   else if (e->back_num == 1) { | 
604  | 0  |     e->back_ref1 = map[e->back_ref1].new_val;  | 
605  | 0  |   }  | 
606  |  | 
  | 
607  | 0  |   return ST_CONTINUE;  | 
608  | 0  | }  | 
609  |  |  | 
610  |  | extern int  | 
611  |  | onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)  | 
612  | 0  | { | 
613  | 0  |   NameTable* t = (NameTable* )reg->name_table;  | 
614  |  | 
  | 
615  | 0  |   if (IS_NOT_NULL(t)) { | 
616  | 0  |     onig_st_foreach(t, i_renumber_name, (HashDataType )map);  | 
617  | 0  |   }  | 
618  | 0  |   return 0;  | 
619  | 0  | }  | 
620  |  |  | 
621  |  |  | 
622  |  | extern int  | 
623  |  | onig_number_of_names(const regex_t* reg)  | 
624  | 0  | { | 
625  | 0  |   NameTable* t = (NameTable* )reg->name_table;  | 
626  |  | 
  | 
627  | 0  |   if (IS_NOT_NULL(t))  | 
628  | 0  |     return (int )t->num_entries;  | 
629  | 0  |   else  | 
630  | 0  |     return 0;  | 
631  | 0  | }  | 
632  |  |  | 
633  |  | # else  /* USE_ST_LIBRARY */  | 
634  |  |  | 
635  |  | #  define INIT_NAMES_ALLOC_NUM    8  | 
636  |  |  | 
637  |  | typedef struct { | 
638  |  |   NameEntry* e;  | 
639  |  |   int        num;  | 
640  |  |   int        alloc;  | 
641  |  | } NameTable;  | 
642  |  |  | 
643  |  | #  ifdef ONIG_DEBUG  | 
644  |  | extern int  | 
645  |  | onig_print_names(FILE* fp, regex_t* reg)  | 
646  |  | { | 
647  |  |   int i, j;  | 
648  |  |   NameEntry* e;  | 
649  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
650  |  |  | 
651  |  |   if (IS_NOT_NULL(t) && t->num > 0) { | 
652  |  |     fprintf(fp, "name table\n");  | 
653  |  |     for (i = 0; i < t->num; i++) { | 
654  |  |       e = &(t->e[i]);  | 
655  |  |       fprintf(fp, "%s: ", e->name);  | 
656  |  |       if (e->back_num == 0) { | 
657  |  |   fputs("-", fp); | 
658  |  |       }  | 
659  |  |       else if (e->back_num == 1) { | 
660  |  |   fprintf(fp, "%d", e->back_ref1);  | 
661  |  |       }  | 
662  |  |       else { | 
663  |  |   for (j = 0; j < e->back_num; j++) { | 
664  |  |     if (j > 0) fprintf(fp, ", ");  | 
665  |  |     fprintf(fp, "%d", e->back_refs[j]);  | 
666  |  |   }  | 
667  |  |       }  | 
668  |  |       fputs("\n", fp); | 
669  |  |     }  | 
670  |  |     fputs("\n", fp); | 
671  |  |   }  | 
672  |  |   return 0;  | 
673  |  | }  | 
674  |  | #  endif  | 
675  |  |  | 
676  |  | static int  | 
677  |  | names_clear(regex_t* reg)  | 
678  |  | { | 
679  |  |   int i;  | 
680  |  |   NameEntry* e;  | 
681  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
682  |  |  | 
683  |  |   if (IS_NOT_NULL(t)) { | 
684  |  |     for (i = 0; i < t->num; i++) { | 
685  |  |       e = &(t->e[i]);  | 
686  |  |       if (IS_NOT_NULL(e->name)) { | 
687  |  |   xfree(e->name);  | 
688  |  |   e->name       = NULL;  | 
689  |  |   e->name_len   = 0;  | 
690  |  |   e->back_num   = 0;  | 
691  |  |   e->back_alloc = 0;  | 
692  |  |   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);  | 
693  |  |   e->back_refs = (int* )NULL;  | 
694  |  |       }  | 
695  |  |     }  | 
696  |  |     if (IS_NOT_NULL(t->e)) { | 
697  |  |       xfree(t->e);  | 
698  |  |       t->e = NULL;  | 
699  |  |     }  | 
700  |  |     t->num = 0;  | 
701  |  |   }  | 
702  |  |   return 0;  | 
703  |  | }  | 
704  |  |  | 
705  |  | extern int  | 
706  |  | onig_names_free(regex_t* reg)  | 
707  |  | { | 
708  |  |   int r;  | 
709  |  |   NameTable* t;  | 
710  |  |  | 
711  |  |   r = names_clear(reg);  | 
712  |  |   if (r) return r;  | 
713  |  |  | 
714  |  |   t = (NameTable* )reg->name_table;  | 
715  |  |   if (IS_NOT_NULL(t)) xfree(t);  | 
716  |  |   reg->name_table = NULL;  | 
717  |  |   return 0;  | 
718  |  | }  | 
719  |  |  | 
720  |  | static NameEntry*  | 
721  |  | name_find(regex_t* reg, const UChar* name, const UChar* name_end)  | 
722  |  | { | 
723  |  |   int i, len;  | 
724  |  |   NameEntry* e;  | 
725  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
726  |  |  | 
727  |  |   if (IS_NOT_NULL(t)) { | 
728  |  |     len = name_end - name;  | 
729  |  |     for (i = 0; i < t->num; i++) { | 
730  |  |       e = &(t->e[i]);  | 
731  |  |       if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)  | 
732  |  |   return e;  | 
733  |  |     }  | 
734  |  |   }  | 
735  |  |   return (NameEntry* )NULL;  | 
736  |  | }  | 
737  |  |  | 
738  |  | extern int  | 
739  |  | onig_foreach_name(regex_t* reg,  | 
740  |  |   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)  | 
741  |  | { | 
742  |  |   int i, r;  | 
743  |  |   NameEntry* e;  | 
744  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
745  |  |  | 
746  |  |   if (IS_NOT_NULL(t)) { | 
747  |  |     for (i = 0; i < t->num; i++) { | 
748  |  |       e = &(t->e[i]);  | 
749  |  |       r = (*func)(e->name, e->name + e->name_len, e->back_num,  | 
750  |  |       (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),  | 
751  |  |       reg, arg);  | 
752  |  |       if (r != 0) return r;  | 
753  |  |     }  | 
754  |  |   }  | 
755  |  |   return 0;  | 
756  |  | }  | 
757  |  |  | 
758  |  | extern int  | 
759  |  | onig_number_of_names(const regex_t* reg)  | 
760  |  | { | 
761  |  |   NameTable* t = (NameTable* )reg->name_table;  | 
762  |  |  | 
763  |  |   if (IS_NOT_NULL(t))  | 
764  |  |     return t->num;  | 
765  |  |   else  | 
766  |  |     return 0;  | 
767  |  | }  | 
768  |  |  | 
769  |  | # endif /* else USE_ST_LIBRARY */  | 
770  |  |  | 
771  |  | static int  | 
772  |  | name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)  | 
773  | 1.64k  | { | 
774  | 1.64k  |   int alloc;  | 
775  | 1.64k  |   NameEntry* e;  | 
776  | 1.64k  |   NameTable* t = (NameTable* )reg->name_table;  | 
777  |  |  | 
778  | 1.64k  |   if (name_end - name <= 0)  | 
779  | 0  |     return ONIGERR_EMPTY_GROUP_NAME;  | 
780  |  |  | 
781  | 1.64k  |   e = name_find(reg, name, name_end);  | 
782  | 1.64k  |   if (IS_NULL(e)) { | 
783  | 1.64k  | # ifdef USE_ST_LIBRARY  | 
784  | 1.64k  |     if (IS_NULL(t)) { | 
785  | 411  |       t = onig_st_init_strend_table_with_size(5);  | 
786  | 411  |       reg->name_table = (void* )t;  | 
787  | 411  |     }  | 
788  | 1.64k  |     e = (NameEntry* )xmalloc(sizeof(NameEntry));  | 
789  | 1.64k  |     CHECK_NULL_RETURN_MEMERR(e);  | 
790  |  |  | 
791  | 1.64k  |     e->name = strdup_with_null(reg->enc, name, name_end);  | 
792  | 1.64k  |     if (IS_NULL(e->name)) { | 
793  | 0  |       xfree(e);  | 
794  | 0  |       return ONIGERR_MEMORY;  | 
795  | 0  |     }  | 
796  | 1.64k  |     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),  | 
797  | 1.64k  |                           (HashDataType )e);  | 
798  |  |  | 
799  | 1.64k  |     e->name_len   = name_end - name;  | 
800  | 1.64k  |     e->back_num   = 0;  | 
801  | 1.64k  |     e->back_alloc = 0;  | 
802  | 1.64k  |     e->back_refs  = (int* )NULL;  | 
803  |  |  | 
804  |  | # else  | 
805  |  |  | 
806  |  |     if (IS_NULL(t)) { | 
807  |  |       alloc = INIT_NAMES_ALLOC_NUM;  | 
808  |  |       t = (NameTable* )xmalloc(sizeof(NameTable));  | 
809  |  |       CHECK_NULL_RETURN_MEMERR(t);  | 
810  |  |       t->e     = NULL;  | 
811  |  |       t->alloc = 0;  | 
812  |  |       t->num   = 0;  | 
813  |  |  | 
814  |  |       t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);  | 
815  |  |       if (IS_NULL(t->e)) { | 
816  |  |   xfree(t);  | 
817  |  |   return ONIGERR_MEMORY;  | 
818  |  |       }  | 
819  |  |       t->alloc = alloc;  | 
820  |  |       reg->name_table = t;  | 
821  |  |       goto clear;  | 
822  |  |     }  | 
823  |  |     else if (t->num == t->alloc) { | 
824  |  |       int i;  | 
825  |  |       NameEntry* p;  | 
826  |  |  | 
827  |  |       alloc = t->alloc * 2;  | 
828  |  |       p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);  | 
829  |  |       CHECK_NULL_RETURN_MEMERR(p);  | 
830  |  |       t->e = p;  | 
831  |  |       t->alloc = alloc;  | 
832  |  |  | 
833  |  |     clear:  | 
834  |  |       for (i = t->num; i < t->alloc; i++) { | 
835  |  |   t->e[i].name       = NULL;  | 
836  |  |   t->e[i].name_len   = 0;  | 
837  |  |   t->e[i].back_num   = 0;  | 
838  |  |   t->e[i].back_alloc = 0;  | 
839  |  |   t->e[i].back_refs  = (int* )NULL;  | 
840  |  |       }  | 
841  |  |     }  | 
842  |  |     e = &(t->e[t->num]);  | 
843  |  |     t->num++;  | 
844  |  |     e->name = strdup_with_null(reg->enc, name, name_end);  | 
845  |  |     if (IS_NULL(e->name)) return ONIGERR_MEMORY;  | 
846  |  |     e->name_len = name_end - name;  | 
847  |  | # endif  | 
848  | 1.64k  |   }  | 
849  |  |  | 
850  | 1.64k  |   if (e->back_num >= 1 &&  | 
851  | 1.64k  |       ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { | 
852  | 0  |     onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,  | 
853  | 0  |             name, name_end);  | 
854  | 0  |     return ONIGERR_MULTIPLEX_DEFINED_NAME;  | 
855  | 0  |   }  | 
856  |  |  | 
857  | 1.64k  |   e->back_num++;  | 
858  | 1.64k  |   if (e->back_num == 1) { | 
859  | 1.64k  |     e->back_ref1 = backref;  | 
860  | 1.64k  |   }  | 
861  | 0  |   else { | 
862  | 0  |     if (e->back_num == 2) { | 
863  | 0  |       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;  | 
864  | 0  |       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);  | 
865  | 0  |       CHECK_NULL_RETURN_MEMERR(e->back_refs);  | 
866  | 0  |       e->back_alloc = alloc;  | 
867  | 0  |       e->back_refs[0] = e->back_ref1;  | 
868  | 0  |       e->back_refs[1] = backref;  | 
869  | 0  |     }  | 
870  | 0  |     else { | 
871  | 0  |       if (e->back_num > e->back_alloc) { | 
872  | 0  |   int* p;  | 
873  | 0  |   alloc = e->back_alloc * 2;  | 
874  | 0  |   p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);  | 
875  | 0  |   CHECK_NULL_RETURN_MEMERR(p);  | 
876  | 0  |   e->back_refs = p;  | 
877  | 0  |   e->back_alloc = alloc;  | 
878  | 0  |       }  | 
879  | 0  |       e->back_refs[e->back_num - 1] = backref;  | 
880  | 0  |     }  | 
881  | 0  |   }  | 
882  |  |  | 
883  | 1.64k  |   return 0;  | 
884  | 1.64k  | }  | 
885  |  |  | 
886  |  | extern int  | 
887  |  | onig_name_to_group_numbers(regex_t* reg, const UChar* name,  | 
888  |  |          const UChar* name_end, int** nums)  | 
889  | 0  | { | 
890  | 0  |   NameEntry* e = name_find(reg, name, name_end);  | 
891  |  | 
  | 
892  | 0  |   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;  | 
893  |  |  | 
894  | 0  |   switch (e->back_num) { | 
895  | 0  |   case 0:  | 
896  | 0  |     *nums = 0;  | 
897  | 0  |     break;  | 
898  | 0  |   case 1:  | 
899  | 0  |     *nums = &(e->back_ref1);  | 
900  | 0  |     break;  | 
901  | 0  |   default:  | 
902  | 0  |     *nums = e->back_refs;  | 
903  | 0  |     break;  | 
904  | 0  |   }  | 
905  | 0  |   return e->back_num;  | 
906  | 0  | }  | 
907  |  |  | 
908  |  | extern int  | 
909  |  | onig_name_to_backref_number(regex_t* reg, const UChar* name,  | 
910  |  |           const UChar* name_end, const OnigRegion *region)  | 
911  | 0  | { | 
912  | 0  |   int i, n, *nums;  | 
913  |  | 
  | 
914  | 0  |   n = onig_name_to_group_numbers(reg, name, name_end, &nums);  | 
915  | 0  |   if (n < 0)  | 
916  | 0  |     return n;  | 
917  | 0  |   else if (n == 0)  | 
918  | 0  |     return ONIGERR_PARSER_BUG;  | 
919  | 0  |   else if (n == 1)  | 
920  | 0  |     return nums[0];  | 
921  | 0  |   else { | 
922  | 0  |     if (IS_NOT_NULL(region)) { | 
923  | 0  |       for (i = n - 1; i >= 0; i--) { | 
924  | 0  |   if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)  | 
925  | 0  |     return nums[i];  | 
926  | 0  |       }  | 
927  | 0  |     }  | 
928  | 0  |     return nums[n - 1];  | 
929  | 0  |   }  | 
930  | 0  | }  | 
931  |  |  | 
932  |  | #else /* USE_NAMED_GROUP */  | 
933  |  |  | 
934  |  | extern int  | 
935  |  | onig_name_to_group_numbers(regex_t* reg, const UChar* name,  | 
936  |  |          const UChar* name_end, int** nums)  | 
937  |  | { | 
938  |  |   return ONIG_NO_SUPPORT_CONFIG;  | 
939  |  | }  | 
940  |  |  | 
941  |  | extern int  | 
942  |  | onig_name_to_backref_number(regex_t* reg, const UChar* name,  | 
943  |  |           const UChar* name_end, const OnigRegion* region)  | 
944  |  | { | 
945  |  |   return ONIG_NO_SUPPORT_CONFIG;  | 
946  |  | }  | 
947  |  |  | 
948  |  | extern int  | 
949  |  | onig_foreach_name(regex_t* reg,  | 
950  |  |   int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)  | 
951  |  | { | 
952  |  |   return ONIG_NO_SUPPORT_CONFIG;  | 
953  |  | }  | 
954  |  |  | 
955  |  | extern int  | 
956  |  | onig_number_of_names(const regex_t* reg)  | 
957  |  | { | 
958  |  |   return 0;  | 
959  |  | }  | 
960  |  | #endif /* else USE_NAMED_GROUP */  | 
961  |  |  | 
962  |  | extern int  | 
963  |  | onig_noname_group_capture_is_active(const regex_t* reg)  | 
964  | 0  | { | 
965  | 0  |   if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))  | 
966  | 0  |     return 0;  | 
967  |  |  | 
968  | 0  | #ifdef USE_NAMED_GROUP  | 
969  | 0  |   if (onig_number_of_names(reg) > 0 &&  | 
970  | 0  |       IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&  | 
971  | 0  |       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { | 
972  | 0  |     return 0;  | 
973  | 0  |   }  | 
974  | 0  | #endif  | 
975  |  |  | 
976  | 0  |   return 1;  | 
977  | 0  | }  | 
978  |  |  | 
979  |  |  | 
980  | 0  | #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16  | 
981  |  |  | 
982  |  | static void  | 
983  |  | scan_env_clear(ScanEnv* env)  | 
984  | 9.45k  | { | 
985  | 9.45k  |   int i;  | 
986  |  |  | 
987  | 9.45k  |   BIT_STATUS_CLEAR(env->capture_history);  | 
988  | 9.45k  |   BIT_STATUS_CLEAR(env->bt_mem_start);  | 
989  | 9.45k  |   BIT_STATUS_CLEAR(env->bt_mem_end);  | 
990  | 9.45k  |   BIT_STATUS_CLEAR(env->backrefed_mem);  | 
991  | 9.45k  |   env->error      = (UChar* )NULL;  | 
992  | 9.45k  |   env->error_end  = (UChar* )NULL;  | 
993  | 9.45k  |   env->num_call   = 0;  | 
994  | 9.45k  |   env->num_mem    = 0;  | 
995  | 9.45k  | #ifdef USE_NAMED_GROUP  | 
996  | 9.45k  |   env->num_named  = 0;  | 
997  | 9.45k  | #endif  | 
998  | 9.45k  |   env->mem_alloc         = 0;  | 
999  | 9.45k  |   env->mem_nodes_dynamic = (Node** )NULL;  | 
1000  |  |  | 
1001  | 85.0k  |   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)  | 
1002  | 75.6k  |     env->mem_nodes_static[i] = NULL_NODE;  | 
1003  |  |  | 
1004  |  | #ifdef USE_COMBINATION_EXPLOSION_CHECK  | 
1005  |  |   env->num_comb_exp_check  = 0;  | 
1006  |  |   env->comb_exp_max_regnum = 0;  | 
1007  |  |   env->curr_max_regnum     = 0;  | 
1008  |  |   env->has_recursion       = 0;  | 
1009  |  | #endif  | 
1010  | 9.45k  |   env->parse_depth         = 0;  | 
1011  | 9.45k  |   env->warnings_flag       = 0;  | 
1012  | 9.45k  | }  | 
1013  |  |  | 
1014  |  | static int  | 
1015  |  | scan_env_add_mem_entry(ScanEnv* env)  | 
1016  | 2.05k  | { | 
1017  | 2.05k  |   int i, need, alloc;  | 
1018  | 2.05k  |   Node** p;  | 
1019  |  |  | 
1020  | 2.05k  |   need = env->num_mem + 1;  | 
1021  | 2.05k  |   if (need > ONIG_MAX_CAPTURE_GROUP_NUM)  | 
1022  | 0  |     return ONIGERR_TOO_MANY_CAPTURE_GROUPS;  | 
1023  | 2.05k  |   if (need >= SCANENV_MEMNODES_SIZE) { | 
1024  | 0  |     if (env->mem_alloc <= need) { | 
1025  | 0  |       if (IS_NULL(env->mem_nodes_dynamic)) { | 
1026  | 0  |   alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;  | 
1027  | 0  |   p = (Node** )xmalloc(sizeof(Node*) * alloc);  | 
1028  | 0  |   CHECK_NULL_RETURN_MEMERR(p);  | 
1029  | 0  |   xmemcpy(p, env->mem_nodes_static,  | 
1030  | 0  |     sizeof(Node*) * SCANENV_MEMNODES_SIZE);  | 
1031  | 0  |       }  | 
1032  | 0  |       else { | 
1033  | 0  |   alloc = env->mem_alloc * 2;  | 
1034  | 0  |   p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);  | 
1035  | 0  |   CHECK_NULL_RETURN_MEMERR(p);  | 
1036  | 0  |       }  | 
1037  |  |  | 
1038  | 0  |       for (i = env->num_mem + 1; i < alloc; i++)  | 
1039  | 0  |   p[i] = NULL_NODE;  | 
1040  |  | 
  | 
1041  | 0  |       env->mem_nodes_dynamic = p;  | 
1042  | 0  |       env->mem_alloc = alloc;  | 
1043  | 0  |     }  | 
1044  | 0  |   }  | 
1045  |  |  | 
1046  | 2.05k  |   env->num_mem++;  | 
1047  | 2.05k  |   return env->num_mem;  | 
1048  | 2.05k  | }  | 
1049  |  |  | 
1050  |  | static int  | 
1051  |  | scan_env_set_mem_node(ScanEnv* env, int num, Node* node)  | 
1052  | 2.05k  | { | 
1053  | 2.05k  |   if (env->num_mem >= num)  | 
1054  | 2.05k  |     SCANENV_MEM_NODES(env)[num] = node;  | 
1055  | 0  |   else  | 
1056  | 0  |     return ONIGERR_PARSER_BUG;  | 
1057  | 2.05k  |   return 0;  | 
1058  | 2.05k  | }  | 
1059  |  |  | 
1060  |  |  | 
1061  |  | extern void  | 
1062  |  | onig_node_free(Node* node)  | 
1063  | 82.2k  | { | 
1064  | 128k  |  start:  | 
1065  | 128k  |   if (IS_NULL(node)) return ;  | 
1066  |  |  | 
1067  | 110k  |   switch (NTYPE(node)) { | 
1068  | 17.6k  |   case NT_STR:  | 
1069  | 17.6k  |     if (NSTR(node)->capa != 0 &&  | 
1070  | 17.6k  |   IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { | 
1071  | 1.64k  |       xfree(NSTR(node)->s);  | 
1072  | 1.64k  |     }  | 
1073  | 17.6k  |     break;  | 
1074  |  |  | 
1075  | 40.6k  |   case NT_LIST:  | 
1076  | 46.4k  |   case NT_ALT:  | 
1077  | 46.4k  |     onig_node_free(NCAR(node));  | 
1078  | 46.4k  |     { | 
1079  | 46.4k  |       Node* next_node = NCDR(node);  | 
1080  |  |  | 
1081  | 46.4k  |       xfree(node);  | 
1082  | 46.4k  |       node = next_node;  | 
1083  | 46.4k  |       goto start;  | 
1084  | 40.6k  |     }  | 
1085  | 0  |     break;  | 
1086  |  |  | 
1087  | 9.86k  |   case NT_CCLASS:  | 
1088  | 9.86k  |     { | 
1089  | 9.86k  |       CClassNode* cc = NCCLASS(node);  | 
1090  |  |  | 
1091  | 9.86k  |       if (cc->mbuf)  | 
1092  | 0  |   bbuf_free(cc->mbuf);  | 
1093  | 9.86k  |     }  | 
1094  | 9.86k  |     break;  | 
1095  |  |  | 
1096  | 12.3k  |   case NT_QTFR:  | 
1097  | 12.3k  |     if (NQTFR(node)->target)  | 
1098  | 12.3k  |       onig_node_free(NQTFR(node)->target);  | 
1099  | 12.3k  |     break;  | 
1100  |  |  | 
1101  | 7.80k  |   case NT_ENCLOSE:  | 
1102  | 7.80k  |     if (NENCLOSE(node)->target)  | 
1103  | 7.80k  |       onig_node_free(NENCLOSE(node)->target);  | 
1104  | 7.80k  |     break;  | 
1105  |  |  | 
1106  | 0  |   case NT_BREF:  | 
1107  | 0  |     if (IS_NOT_NULL(NBREF(node)->back_dynamic))  | 
1108  | 0  |       xfree(NBREF(node)->back_dynamic);  | 
1109  | 0  |     break;  | 
1110  |  |  | 
1111  | 11.9k  |   case NT_ANCHOR:  | 
1112  | 11.9k  |     if (NANCHOR(node)->target)  | 
1113  | 0  |       onig_node_free(NANCHOR(node)->target);  | 
1114  | 11.9k  |     break;  | 
1115  | 110k  |   }  | 
1116  |  |  | 
1117  | 63.7k  |   xfree(node);  | 
1118  | 63.7k  | }  | 
1119  |  |  | 
1120  |  | static Node*  | 
1121  |  | node_new(void)  | 
1122  | 110k  | { | 
1123  | 110k  |   Node* node;  | 
1124  |  |  | 
1125  | 110k  |   node = (Node* )xmalloc(sizeof(Node));  | 
1126  |  |   /* xmemset(node, 0, sizeof(Node)); */  | 
1127  | 110k  |   return node;  | 
1128  | 110k  | }  | 
1129  |  |  | 
1130  |  | static void  | 
1131  |  | initialize_cclass(CClassNode* cc)  | 
1132  | 20.5k  | { | 
1133  | 20.5k  |   BITSET_CLEAR(cc->bs);  | 
1134  |  |   /* cc->base.flags = 0; */  | 
1135  | 20.5k  |   cc->flags = 0;  | 
1136  | 20.5k  |   cc->mbuf  = NULL;  | 
1137  | 20.5k  | }  | 
1138  |  |  | 
1139  |  | static Node*  | 
1140  |  | node_new_cclass(void)  | 
1141  | 9.86k  | { | 
1142  | 9.86k  |   Node* node = node_new();  | 
1143  | 9.86k  |   CHECK_NULL_RETURN(node);  | 
1144  |  |  | 
1145  | 9.86k  |   SET_NTYPE(node, NT_CCLASS);  | 
1146  | 9.86k  |   initialize_cclass(NCCLASS(node));  | 
1147  | 9.86k  |   return node;  | 
1148  | 9.86k  | }  | 
1149  |  |  | 
1150  |  | static Node*  | 
1151  |  | node_new_ctype(int type, int not, int ascii_range)  | 
1152  | 0  | { | 
1153  | 0  |   Node* node = node_new();  | 
1154  | 0  |   CHECK_NULL_RETURN(node);  | 
1155  |  |  | 
1156  | 0  |   SET_NTYPE(node, NT_CTYPE);  | 
1157  | 0  |   NCTYPE(node)->ctype = type;  | 
1158  | 0  |   NCTYPE(node)->not   = not;  | 
1159  | 0  |   NCTYPE(node)->ascii_range = ascii_range;  | 
1160  | 0  |   return node;  | 
1161  | 0  | }  | 
1162  |  |  | 
1163  |  | static Node*  | 
1164  |  | node_new_anychar(void)  | 
1165  | 4.11k  | { | 
1166  | 4.11k  |   Node* node = node_new();  | 
1167  | 4.11k  |   CHECK_NULL_RETURN(node);  | 
1168  |  |  | 
1169  | 4.11k  |   SET_NTYPE(node, NT_CANY);  | 
1170  | 4.11k  |   return node;  | 
1171  | 4.11k  | }  | 
1172  |  |  | 
1173  |  | static Node*  | 
1174  |  | node_new_list(Node* left, Node* right)  | 
1175  | 40.6k  | { | 
1176  | 40.6k  |   Node* node = node_new();  | 
1177  | 40.6k  |   CHECK_NULL_RETURN(node);  | 
1178  |  |  | 
1179  | 40.6k  |   SET_NTYPE(node, NT_LIST);  | 
1180  | 40.6k  |   NCAR(node)  = left;  | 
1181  | 40.6k  |   NCDR(node) = right;  | 
1182  | 40.6k  |   return node;  | 
1183  | 40.6k  | }  | 
1184  |  |  | 
1185  |  | extern Node*  | 
1186  |  | onig_node_new_list(Node* left, Node* right)  | 
1187  | 0  | { | 
1188  | 0  |   return node_new_list(left, right);  | 
1189  | 0  | }  | 
1190  |  |  | 
1191  |  | extern Node*  | 
1192  |  | onig_node_list_add(Node* list, Node* x)  | 
1193  | 0  | { | 
1194  | 0  |   Node *n;  | 
1195  |  | 
  | 
1196  | 0  |   n = onig_node_new_list(x, NULL);  | 
1197  | 0  |   if (IS_NULL(n)) return NULL_NODE;  | 
1198  |  |  | 
1199  | 0  |   if (IS_NOT_NULL(list)) { | 
1200  | 0  |     while (IS_NOT_NULL(NCDR(list)))  | 
1201  | 0  |       list = NCDR(list);  | 
1202  |  | 
  | 
1203  | 0  |     NCDR(list) = n;  | 
1204  | 0  |   }  | 
1205  |  | 
  | 
1206  | 0  |   return n;  | 
1207  | 0  | }  | 
1208  |  |  | 
1209  |  | extern Node*  | 
1210  |  | onig_node_new_alt(Node* left, Node* right)  | 
1211  | 5.75k  | { | 
1212  | 5.75k  |   Node* node = node_new();  | 
1213  | 5.75k  |   CHECK_NULL_RETURN(node);  | 
1214  |  |  | 
1215  | 5.75k  |   SET_NTYPE(node, NT_ALT);  | 
1216  | 5.75k  |   NCAR(node)  = left;  | 
1217  | 5.75k  |   NCDR(node) = right;  | 
1218  | 5.75k  |   return node;  | 
1219  | 5.75k  | }  | 
1220  |  |  | 
1221  |  | extern Node*  | 
1222  |  | onig_node_new_anchor(int type)  | 
1223  | 11.9k  | { | 
1224  | 11.9k  |   Node* node = node_new();  | 
1225  | 11.9k  |   CHECK_NULL_RETURN(node);  | 
1226  |  |  | 
1227  | 11.9k  |   SET_NTYPE(node, NT_ANCHOR);  | 
1228  | 11.9k  |   NANCHOR(node)->type     = type;  | 
1229  | 11.9k  |   NANCHOR(node)->target   = NULL;  | 
1230  | 11.9k  |   NANCHOR(node)->char_len = -1;  | 
1231  | 11.9k  |   NANCHOR(node)->ascii_range = 0;  | 
1232  | 11.9k  |   return node;  | 
1233  | 11.9k  | }  | 
1234  |  |  | 
1235  |  | static Node*  | 
1236  |  | node_new_backref(int back_num, int* backrefs, int by_name,  | 
1237  |  | #ifdef USE_BACKREF_WITH_LEVEL  | 
1238  |  |      int exist_level, int nest_level,  | 
1239  |  | #endif  | 
1240  |  |      ScanEnv* env)  | 
1241  | 0  | { | 
1242  | 0  |   int i;  | 
1243  | 0  |   Node* node = node_new();  | 
1244  |  | 
  | 
1245  | 0  |   CHECK_NULL_RETURN(node);  | 
1246  |  |  | 
1247  | 0  |   SET_NTYPE(node, NT_BREF);  | 
1248  | 0  |   NBREF(node)->state    = 0;  | 
1249  | 0  |   NBREF(node)->back_num = back_num;  | 
1250  | 0  |   NBREF(node)->back_dynamic = (int* )NULL;  | 
1251  | 0  |   if (by_name != 0)  | 
1252  | 0  |     NBREF(node)->state |= NST_NAME_REF;  | 
1253  |  | 
  | 
1254  | 0  | #ifdef USE_BACKREF_WITH_LEVEL  | 
1255  | 0  |   if (exist_level != 0) { | 
1256  | 0  |     NBREF(node)->state |= NST_NEST_LEVEL;  | 
1257  | 0  |     NBREF(node)->nest_level  = nest_level;  | 
1258  | 0  |   }  | 
1259  | 0  | #endif  | 
1260  |  | 
  | 
1261  | 0  |   for (i = 0; i < back_num; i++) { | 
1262  | 0  |     if (backrefs[i] <= env->num_mem &&  | 
1263  | 0  |   IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { | 
1264  | 0  |       NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */  | 
1265  | 0  |       break;  | 
1266  | 0  |     }  | 
1267  | 0  |   }  | 
1268  |  | 
  | 
1269  | 0  |   if (back_num <= NODE_BACKREFS_SIZE) { | 
1270  | 0  |     for (i = 0; i < back_num; i++)  | 
1271  | 0  |       NBREF(node)->back_static[i] = backrefs[i];  | 
1272  | 0  |   }  | 
1273  | 0  |   else { | 
1274  | 0  |     int* p = (int* )xmalloc(sizeof(int) * back_num);  | 
1275  | 0  |     if (IS_NULL(p)) { | 
1276  | 0  |       onig_node_free(node);  | 
1277  | 0  |       return NULL;  | 
1278  | 0  |     }  | 
1279  | 0  |     NBREF(node)->back_dynamic = p;  | 
1280  | 0  |     for (i = 0; i < back_num; i++)  | 
1281  | 0  |       p[i] = backrefs[i];  | 
1282  | 0  |   }  | 
1283  | 0  |   return node;  | 
1284  | 0  | }  | 
1285  |  |  | 
1286  |  | #ifdef USE_SUBEXP_CALL  | 
1287  |  | static Node*  | 
1288  |  | node_new_call(UChar* name, UChar* name_end, int gnum)  | 
1289  | 0  | { | 
1290  | 0  |   Node* node = node_new();  | 
1291  | 0  |   CHECK_NULL_RETURN(node);  | 
1292  |  |  | 
1293  | 0  |   SET_NTYPE(node, NT_CALL);  | 
1294  | 0  |   NCALL(node)->state     = 0;  | 
1295  | 0  |   NCALL(node)->target    = NULL_NODE;  | 
1296  | 0  |   NCALL(node)->name      = name;  | 
1297  | 0  |   NCALL(node)->name_end  = name_end;  | 
1298  | 0  |   NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */  | 
1299  | 0  |   return node;  | 
1300  | 0  | }  | 
1301  |  | #endif  | 
1302  |  |  | 
1303  |  | static Node*  | 
1304  |  | node_new_quantifier(int lower, int upper, int by_number)  | 
1305  | 12.3k  | { | 
1306  | 12.3k  |   Node* node = node_new();  | 
1307  | 12.3k  |   CHECK_NULL_RETURN(node);  | 
1308  |  |  | 
1309  | 12.3k  |   SET_NTYPE(node, NT_QTFR);  | 
1310  | 12.3k  |   NQTFR(node)->state  = 0;  | 
1311  | 12.3k  |   NQTFR(node)->target = NULL;  | 
1312  | 12.3k  |   NQTFR(node)->lower  = lower;  | 
1313  | 12.3k  |   NQTFR(node)->upper  = upper;  | 
1314  | 12.3k  |   NQTFR(node)->greedy = 1;  | 
1315  | 12.3k  |   NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;  | 
1316  | 12.3k  |   NQTFR(node)->head_exact        = NULL_NODE;  | 
1317  | 12.3k  |   NQTFR(node)->next_head_exact   = NULL_NODE;  | 
1318  | 12.3k  |   NQTFR(node)->is_referred       = 0;  | 
1319  | 12.3k  |   if (by_number != 0)  | 
1320  | 0  |     NQTFR(node)->state |= NST_BY_NUMBER;  | 
1321  |  |  | 
1322  |  | #ifdef USE_COMBINATION_EXPLOSION_CHECK  | 
1323  |  |   NQTFR(node)->comb_exp_check_num = 0;  | 
1324  |  | #endif  | 
1325  |  |  | 
1326  | 12.3k  |   return node;  | 
1327  | 12.3k  | }  | 
1328  |  |  | 
1329  |  | static Node*  | 
1330  |  | node_new_enclose(int type)  | 
1331  | 7.80k  | { | 
1332  | 7.80k  |   Node* node = node_new();  | 
1333  | 7.80k  |   CHECK_NULL_RETURN(node);  | 
1334  |  |  | 
1335  | 7.80k  |   SET_NTYPE(node, NT_ENCLOSE);  | 
1336  | 7.80k  |   NENCLOSE(node)->type      = type;  | 
1337  | 7.80k  |   NENCLOSE(node)->state     =  0;  | 
1338  | 7.80k  |   NENCLOSE(node)->regnum    =  0;  | 
1339  | 7.80k  |   NENCLOSE(node)->option    =  0;  | 
1340  | 7.80k  |   NENCLOSE(node)->target    = NULL;  | 
1341  | 7.80k  |   NENCLOSE(node)->call_addr = -1;  | 
1342  | 7.80k  |   NENCLOSE(node)->opt_count =  0;  | 
1343  | 7.80k  |   return node;  | 
1344  | 7.80k  | }  | 
1345  |  |  | 
1346  |  | extern Node*  | 
1347  |  | onig_node_new_enclose(int type)  | 
1348  | 5.34k  | { | 
1349  | 5.34k  |   return node_new_enclose(type);  | 
1350  | 5.34k  | }  | 
1351  |  |  | 
1352  |  | static Node*  | 
1353  |  | node_new_enclose_memory(OnigOptionType option, int is_named)  | 
1354  | 2.05k  | { | 
1355  | 2.05k  |   Node* node = node_new_enclose(ENCLOSE_MEMORY);  | 
1356  | 2.05k  |   CHECK_NULL_RETURN(node);  | 
1357  | 2.05k  |   if (is_named != 0)  | 
1358  | 1.64k  |     SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);  | 
1359  |  |  | 
1360  | 2.05k  | #ifdef USE_SUBEXP_CALL  | 
1361  | 2.05k  |   NENCLOSE(node)->option = option;  | 
1362  | 2.05k  | #endif  | 
1363  | 2.05k  |   return node;  | 
1364  | 2.05k  | }  | 
1365  |  |  | 
1366  |  | static Node*  | 
1367  |  | node_new_option(OnigOptionType option)  | 
1368  | 411  | { | 
1369  | 411  |   Node* node = node_new_enclose(ENCLOSE_OPTION);  | 
1370  | 411  |   CHECK_NULL_RETURN(node);  | 
1371  | 411  |   NENCLOSE(node)->option = option;  | 
1372  | 411  |   return node;  | 
1373  | 411  | }  | 
1374  |  |  | 
1375  |  | extern int  | 
1376  |  | onig_node_str_cat(Node* node, const UChar* s, const UChar* end)  | 
1377  | 151k  | { | 
1378  | 151k  |   ptrdiff_t addlen = end - s;  | 
1379  |  |  | 
1380  | 151k  |   if (addlen > 0) { | 
1381  | 151k  |     ptrdiff_t len  = NSTR(node)->end - NSTR(node)->s;  | 
1382  |  |  | 
1383  | 151k  |     if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { | 
1384  | 24.6k  |       UChar* p;  | 
1385  | 24.6k  |       ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;  | 
1386  |  |  | 
1387  | 24.6k  |       if (capa <= NSTR(node)->capa) { | 
1388  | 0  |   onig_strcpy(NSTR(node)->s + len, s, end);  | 
1389  | 0  |       }  | 
1390  | 24.6k  |       else { | 
1391  | 24.6k  |   if (NSTR(node)->s == NSTR(node)->buf)  | 
1392  | 1.64k  |     p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,  | 
1393  | 1.64k  |               s, end, capa);  | 
1394  | 23.0k  |   else  | 
1395  | 23.0k  |     p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);  | 
1396  |  |  | 
1397  | 24.6k  |   CHECK_NULL_RETURN_MEMERR(p);  | 
1398  | 24.6k  |   NSTR(node)->s    = p;  | 
1399  | 24.6k  |   NSTR(node)->capa = (int )capa;  | 
1400  | 24.6k  |       }  | 
1401  | 24.6k  |     }  | 
1402  | 126k  |     else { | 
1403  | 126k  |       onig_strcpy(NSTR(node)->s + len, s, end);  | 
1404  | 126k  |     }  | 
1405  | 151k  |     NSTR(node)->end = NSTR(node)->s + len + addlen;  | 
1406  | 151k  |   }  | 
1407  |  |  | 
1408  | 151k  |   return 0;  | 
1409  | 151k  | }  | 
1410  |  |  | 
1411  |  | extern int  | 
1412  |  | onig_node_str_set(Node* node, const UChar* s, const UChar* end)  | 
1413  | 0  | { | 
1414  | 0  |   onig_node_str_clear(node);  | 
1415  | 0  |   return onig_node_str_cat(node, s, end);  | 
1416  | 0  | }  | 
1417  |  |  | 
1418  |  | static int  | 
1419  |  | node_str_cat_char(Node* node, UChar c)  | 
1420  | 0  | { | 
1421  | 0  |   UChar s[1];  | 
1422  |  | 
  | 
1423  | 0  |   s[0] = c;  | 
1424  | 0  |   return onig_node_str_cat(node, s, s + 1);  | 
1425  | 0  | }  | 
1426  |  |  | 
1427  |  | static int  | 
1428  |  | node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)  | 
1429  | 0  | { | 
1430  | 0  |   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];  | 
1431  | 0  |   int num = ONIGENC_CODE_TO_MBC(enc, c, buf);  | 
1432  | 0  |   if (num < 0) return num;  | 
1433  | 0  |   return onig_node_str_cat(node, buf, buf + num);  | 
1434  | 0  | }  | 
1435  |  |  | 
1436  |  | #if 0  | 
1437  |  | extern void  | 
1438  |  | onig_node_conv_to_str_node(Node* node, int flag)  | 
1439  |  | { | 
1440  |  |   SET_NTYPE(node, NT_STR);  | 
1441  |  |   NSTR(node)->flag = flag;  | 
1442  |  |   NSTR(node)->capa = 0;  | 
1443  |  |   NSTR(node)->s    = NSTR(node)->buf;  | 
1444  |  |   NSTR(node)->end  = NSTR(node)->buf;  | 
1445  |  | }  | 
1446  |  | #endif  | 
1447  |  |  | 
1448  |  | extern void  | 
1449  |  | onig_node_str_clear(Node* node)  | 
1450  | 0  | { | 
1451  | 0  |   if (NSTR(node)->capa != 0 &&  | 
1452  | 0  |       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { | 
1453  | 0  |     xfree(NSTR(node)->s);  | 
1454  | 0  |   }  | 
1455  |  | 
  | 
1456  | 0  |   NSTR(node)->capa = 0;  | 
1457  | 0  |   NSTR(node)->flag = 0;  | 
1458  | 0  |   NSTR(node)->s    = NSTR(node)->buf;  | 
1459  | 0  |   NSTR(node)->end  = NSTR(node)->buf;  | 
1460  | 0  | }  | 
1461  |  |  | 
1462  |  | static Node*  | 
1463  |  | node_new_str(const UChar* s, const UChar* end)  | 
1464  | 17.6k  | { | 
1465  | 17.6k  |   Node* node = node_new();  | 
1466  | 17.6k  |   CHECK_NULL_RETURN(node);  | 
1467  |  |  | 
1468  | 17.6k  |   SET_NTYPE(node, NT_STR);  | 
1469  | 17.6k  |   NSTR(node)->capa = 0;  | 
1470  | 17.6k  |   NSTR(node)->flag = 0;  | 
1471  | 17.6k  |   NSTR(node)->s    = NSTR(node)->buf;  | 
1472  | 17.6k  |   NSTR(node)->end  = NSTR(node)->buf;  | 
1473  | 17.6k  |   if (onig_node_str_cat(node, s, end)) { | 
1474  | 0  |     onig_node_free(node);  | 
1475  | 0  |     return NULL;  | 
1476  | 0  |   }  | 
1477  | 17.6k  |   return node;  | 
1478  | 17.6k  | }  | 
1479  |  |  | 
1480  |  | extern Node*  | 
1481  |  | onig_node_new_str(const UChar* s, const UChar* end)  | 
1482  | 0  | { | 
1483  | 0  |   return node_new_str(s, end);  | 
1484  | 0  | }  | 
1485  |  |  | 
1486  |  | static Node*  | 
1487  |  | node_new_str_raw(UChar* s, UChar* end)  | 
1488  | 0  | { | 
1489  | 0  |   Node* node = node_new_str(s, end);  | 
1490  | 0  |   if (IS_NOT_NULL(node))  | 
1491  | 0  |     NSTRING_SET_RAW(node);  | 
1492  | 0  |   return node;  | 
1493  | 0  | }  | 
1494  |  |  | 
1495  |  | static Node*  | 
1496  |  | node_new_empty(void)  | 
1497  | 411  | { | 
1498  | 411  |   return node_new_str(NULL, NULL);  | 
1499  | 411  | }  | 
1500  |  |  | 
1501  |  | static Node*  | 
1502  |  | node_new_str_raw_char(UChar c)  | 
1503  | 0  | { | 
1504  | 0  |   UChar p[1];  | 
1505  |  | 
  | 
1506  | 0  |   p[0] = c;  | 
1507  | 0  |   return node_new_str_raw(p, p + 1);  | 
1508  | 0  | }  | 
1509  |  |  | 
1510  |  | static Node*  | 
1511  |  | str_node_split_last_char(StrNode* sn, OnigEncoding enc)  | 
1512  | 0  | { | 
1513  | 0  |   const UChar *p;  | 
1514  | 0  |   Node* n = NULL_NODE;  | 
1515  |  | 
  | 
1516  | 0  |   if (sn->end > sn->s) { | 
1517  | 0  |     p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);  | 
1518  | 0  |     if (p && p > sn->s) { /* can be split. */ | 
1519  | 0  |       n = node_new_str(p, sn->end);  | 
1520  | 0  |       if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)  | 
1521  | 0  |   NSTRING_SET_RAW(n);  | 
1522  | 0  |       sn->end = (UChar* )p;  | 
1523  | 0  |     }  | 
1524  | 0  |   }  | 
1525  | 0  |   return n;  | 
1526  | 0  | }  | 
1527  |  |  | 
1528  |  | static int  | 
1529  |  | str_node_can_be_split(StrNode* sn, OnigEncoding enc)  | 
1530  | 0  | { | 
1531  | 0  |   if (sn->end > sn->s) { | 
1532  | 0  |     return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s)  ?  1 : 0);  | 
1533  | 0  |   }  | 
1534  | 0  |   return 0;  | 
1535  | 0  | }  | 
1536  |  |  | 
1537  |  | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR  | 
1538  |  | static int  | 
1539  |  | node_str_head_pad(StrNode* sn, int num, UChar val)  | 
1540  |  | { | 
1541  |  |   UChar buf[NODE_STR_BUF_SIZE];  | 
1542  |  |   int i, len;  | 
1543  |  |  | 
1544  |  |   len = sn->end - sn->s;  | 
1545  |  |   onig_strcpy(buf, sn->s, sn->end);  | 
1546  |  |   onig_strcpy(&(sn->s[num]), buf, buf + len);  | 
1547  |  |   sn->end += num;  | 
1548  |  |  | 
1549  |  |   for (i = 0; i < num; i++) { | 
1550  |  |     sn->s[i] = val;  | 
1551  |  |   }  | 
1552  |  | }  | 
1553  |  | #endif  | 
1554  |  |  | 
1555  |  | extern int  | 
1556  |  | onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)  | 
1557  | 0  | { | 
1558  | 0  |   unsigned int num, val;  | 
1559  | 0  |   OnigCodePoint c;  | 
1560  | 0  |   UChar* p = *src;  | 
1561  | 0  |   PFETCH_READY;  | 
1562  |  | 
  | 
1563  | 0  |   num = 0;  | 
1564  | 0  |   while (!PEND) { | 
1565  | 0  |     PFETCH(c);  | 
1566  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
1567  | 0  |       val = (unsigned int )DIGITVAL(c);  | 
1568  | 0  |       if ((INT_MAX_LIMIT - val) / 10UL < num)  | 
1569  | 0  |   return -1;  /* overflow */  | 
1570  |  |  | 
1571  | 0  |       num = num * 10 + val;  | 
1572  | 0  |     }  | 
1573  | 0  |     else { | 
1574  | 0  |       PUNFETCH;  | 
1575  | 0  |       break;  | 
1576  | 0  |     }  | 
1577  | 0  |   }  | 
1578  | 0  |   *src = p;  | 
1579  | 0  |   return num;  | 
1580  | 0  | }  | 
1581  |  |  | 
1582  |  | static int  | 
1583  |  | scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,  | 
1584  |  |          int maxlen, OnigEncoding enc)  | 
1585  | 0  | { | 
1586  | 0  |   OnigCodePoint c;  | 
1587  | 0  |   unsigned int num, val;  | 
1588  | 0  |   int restlen;  | 
1589  | 0  |   UChar* p = *src;  | 
1590  | 0  |   PFETCH_READY;  | 
1591  |  | 
  | 
1592  | 0  |   restlen = maxlen - minlen;  | 
1593  | 0  |   num = 0;  | 
1594  | 0  |   while (!PEND && maxlen-- != 0) { | 
1595  | 0  |     PFETCH(c);  | 
1596  | 0  |     if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { | 
1597  | 0  |       val = (unsigned int )XDIGITVAL(enc,c);  | 
1598  | 0  |       if ((INT_MAX_LIMIT - val) / 16UL < num)  | 
1599  | 0  |   return -1;  /* overflow */  | 
1600  |  |  | 
1601  | 0  |       num = (num << 4) + XDIGITVAL(enc,c);  | 
1602  | 0  |     }  | 
1603  | 0  |     else { | 
1604  | 0  |       PUNFETCH;  | 
1605  | 0  |       maxlen++;  | 
1606  | 0  |       break;  | 
1607  | 0  |     }  | 
1608  | 0  |   }  | 
1609  | 0  |   if (maxlen > restlen)  | 
1610  | 0  |     return -2;  /* not enough digits */  | 
1611  | 0  |   *src = p;  | 
1612  | 0  |   return num;  | 
1613  | 0  | }  | 
1614  |  |  | 
1615  |  | static int  | 
1616  |  | scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,  | 
1617  |  |          OnigEncoding enc)  | 
1618  | 0  | { | 
1619  | 0  |   OnigCodePoint c;  | 
1620  | 0  |   unsigned int num, val;  | 
1621  | 0  |   UChar* p = *src;  | 
1622  | 0  |   PFETCH_READY;  | 
1623  |  | 
  | 
1624  | 0  |   num = 0;  | 
1625  | 0  |   while (!PEND && maxlen-- != 0) { | 
1626  | 0  |     PFETCH(c);  | 
1627  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { | 
1628  | 0  |       val = ODIGITVAL(c);  | 
1629  | 0  |       if ((INT_MAX_LIMIT - val) / 8UL < num)  | 
1630  | 0  |   return -1;  /* overflow */  | 
1631  |  |  | 
1632  | 0  |       num = (num << 3) + val;  | 
1633  | 0  |     }  | 
1634  | 0  |     else { | 
1635  | 0  |       PUNFETCH;  | 
1636  | 0  |       break;  | 
1637  | 0  |     }  | 
1638  | 0  |   }  | 
1639  | 0  |   *src = p;  | 
1640  | 0  |   return num;  | 
1641  | 0  | }  | 
1642  |  |  | 
1643  |  |  | 
1644  |  | #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \  | 
1645  | 404k  |     BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)  | 
1646  |  |  | 
1647  |  | /* data format:  | 
1648  |  |      [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]  | 
1649  |  |      (all data size is OnigCodePoint)  | 
1650  |  |  */  | 
1651  |  | static int  | 
1652  |  | new_code_range(BBuf** pbuf)  | 
1653  | 5.34k  | { | 
1654  | 5.34k  | #define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)  | 
1655  | 5.34k  |   int r;  | 
1656  | 5.34k  |   OnigCodePoint n;  | 
1657  | 5.34k  |   BBuf* bbuf;  | 
1658  |  |  | 
1659  | 5.34k  |   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));  | 
1660  | 5.34k  |   CHECK_NULL_RETURN_MEMERR(*pbuf);  | 
1661  | 5.34k  |   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);  | 
1662  | 5.34k  |   if (r) return r;  | 
1663  |  |  | 
1664  | 5.34k  |   n = 0;  | 
1665  | 5.34k  |   BBUF_WRITE_CODE_POINT(bbuf, 0, n);  | 
1666  | 5.34k  |   return 0;  | 
1667  | 5.34k  | }  | 
1668  |  |  | 
1669  |  | static int  | 
1670  |  | add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,  | 
1671  |  |   int checkdup)  | 
1672  | 133k  | { | 
1673  | 133k  |   int r, inc_n, pos;  | 
1674  | 133k  |   OnigCodePoint low, high, bound, x;  | 
1675  | 133k  |   OnigCodePoint n, *data;  | 
1676  | 133k  |   BBuf* bbuf;  | 
1677  |  |  | 
1678  | 133k  |   if (from > to) { | 
1679  | 0  |     n = from; from = to; to = n;  | 
1680  | 0  |   }  | 
1681  |  |  | 
1682  | 133k  |   if (IS_NULL(*pbuf)) { | 
1683  | 5.34k  |     r = new_code_range(pbuf);  | 
1684  | 5.34k  |     if (r) return r;  | 
1685  | 5.34k  |     bbuf = *pbuf;  | 
1686  | 5.34k  |     n = 0;  | 
1687  | 5.34k  |   }  | 
1688  | 127k  |   else { | 
1689  | 127k  |     bbuf = *pbuf;  | 
1690  | 127k  |     GET_CODE_POINT(n, bbuf->p);  | 
1691  | 127k  |   }  | 
1692  | 133k  |   data = (OnigCodePoint* )(bbuf->p);  | 
1693  | 133k  |   data++;  | 
1694  |  |  | 
1695  | 133k  |   bound = (from == 0) ? 0 : n;  | 
1696  | 605k  |   for (low = 0; low < bound; ) { | 
1697  | 472k  |     x = (low + bound) >> 1;  | 
1698  | 472k  |     if (from - 1 > data[x*2 + 1])  | 
1699  | 472k  |       low = x + 1;  | 
1700  | 0  |     else  | 
1701  | 0  |       bound = x;  | 
1702  | 472k  |   }  | 
1703  |  |  | 
1704  | 133k  |   high = (to == ONIG_LAST_CODE_POINT) ? n : low;  | 
1705  | 133k  |   for (bound = n; high < bound; ) { | 
1706  | 0  |     x = (high + bound) >> 1;  | 
1707  | 0  |     if (to + 1 >= data[x*2])  | 
1708  | 0  |       high = x + 1;  | 
1709  | 0  |     else  | 
1710  | 0  |       bound = x;  | 
1711  | 0  |   }  | 
1712  |  |   /* data[(low-1)*2+1] << from <= data[low*2]  | 
1713  |  |    * data[(high-1)*2+1] <= to << data[high*2]  | 
1714  |  |    */  | 
1715  |  |  | 
1716  | 133k  |   inc_n = low + 1 - high;  | 
1717  | 133k  |   if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)  | 
1718  | 0  |     return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;  | 
1719  |  |  | 
1720  | 133k  |   if (inc_n != 1) { | 
1721  | 0  |     if (checkdup && from <= data[low*2+1]  | 
1722  | 0  |   && (data[low*2] <= from || data[low*2+1] <= to))  | 
1723  | 0  |       CC_DUP_WARN(env, from, to);  | 
1724  | 0  |     if (from > data[low*2])  | 
1725  | 0  |       from = data[low*2];  | 
1726  | 0  |     if (to < data[(high - 1)*2 + 1])  | 
1727  | 0  |       to = data[(high - 1)*2 + 1];  | 
1728  | 0  |   }  | 
1729  |  |  | 
1730  | 133k  |   if (inc_n != 0) { | 
1731  | 133k  |     int from_pos = SIZE_CODE_POINT * (1 + high * 2);  | 
1732  | 133k  |     int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);  | 
1733  |  |  | 
1734  | 133k  |     if (inc_n > 0) { | 
1735  | 133k  |       if (high < n) { | 
1736  | 0  |   int size = (n - high) * 2 * SIZE_CODE_POINT;  | 
1737  | 0  |   BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);  | 
1738  | 0  |       }  | 
1739  | 133k  |     }  | 
1740  | 0  |     else { | 
1741  | 0  |       BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);  | 
1742  | 0  |     }  | 
1743  | 133k  |   }  | 
1744  |  |  | 
1745  | 133k  |   pos = SIZE_CODE_POINT * (1 + low * 2);  | 
1746  | 133k  |   BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);  | 
1747  | 133k  |   BBUF_WRITE_CODE_POINT(bbuf, pos, from);  | 
1748  | 133k  |   BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);  | 
1749  | 133k  |   n += inc_n;  | 
1750  | 133k  |   BBUF_WRITE_CODE_POINT(bbuf, 0, n);  | 
1751  |  |  | 
1752  | 133k  |   return 0;  | 
1753  | 133k  | }  | 
1754  |  |  | 
1755  |  | static int  | 
1756  |  | add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)  | 
1757  | 133k  | { | 
1758  | 133k  |   return add_code_range_to_buf0(pbuf, env, from, to, 1);  | 
1759  | 133k  | }  | 
1760  |  |  | 
1761  |  | static int  | 
1762  |  | add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)  | 
1763  | 0  | { | 
1764  | 0  |   if (from > to) { | 
1765  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))  | 
1766  | 0  |       return 0;  | 
1767  | 0  |     else  | 
1768  | 0  |       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;  | 
1769  | 0  |   }  | 
1770  |  |  | 
1771  | 0  |   return add_code_range_to_buf0(pbuf, env, from, to, checkdup);  | 
1772  | 0  | }  | 
1773  |  |  | 
1774  |  | static int  | 
1775  |  | add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)  | 
1776  | 0  | { | 
1777  | 0  |   return add_code_range0(pbuf, env, from, to, 1);  | 
1778  | 0  | }  | 
1779  |  |  | 
1780  |  | static int  | 
1781  |  | not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)  | 
1782  | 0  | { | 
1783  | 0  |   int r, i, n;  | 
1784  | 0  |   OnigCodePoint pre, from, *data, to = 0;  | 
1785  |  | 
  | 
1786  | 0  |   *pbuf = (BBuf* )NULL;  | 
1787  | 0  |   if (IS_NULL(bbuf)) { | 
1788  | 0  |   set_all:  | 
1789  | 0  |     return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);  | 
1790  | 0  |   }  | 
1791  |  |  | 
1792  | 0  |   data = (OnigCodePoint* )(bbuf->p);  | 
1793  | 0  |   GET_CODE_POINT(n, data);  | 
1794  | 0  |   data++;  | 
1795  | 0  |   if (n <= 0) goto set_all;  | 
1796  |  |  | 
1797  | 0  |   r = 0;  | 
1798  | 0  |   pre = MBCODE_START_POS(enc);  | 
1799  | 0  |   for (i = 0; i < n; i++) { | 
1800  | 0  |     from = data[i*2];  | 
1801  | 0  |     to   = data[i*2+1];  | 
1802  | 0  |     if (pre <= from - 1) { | 
1803  | 0  |       r = add_code_range_to_buf(pbuf, env, pre, from - 1);  | 
1804  | 0  |       if (r != 0) return r;  | 
1805  | 0  |     }  | 
1806  | 0  |     if (to == ONIG_LAST_CODE_POINT) break;  | 
1807  | 0  |     pre = to + 1;  | 
1808  | 0  |   }  | 
1809  | 0  |   if (to < ONIG_LAST_CODE_POINT) { | 
1810  | 0  |     r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);  | 
1811  | 0  |   }  | 
1812  | 0  |   return r;  | 
1813  | 0  | }  | 
1814  |  |  | 
1815  | 0  | #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ | 
1816  | 0  |   BBuf *tbuf; \  | 
1817  | 0  |   int  tnot; \  | 
1818  | 0  |   tnot = not1;  not1  = not2;  not2  = tnot; \  | 
1819  | 0  |   tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \  | 
1820  | 0  | } while (0)  | 
1821  |  |  | 
1822  |  | static int  | 
1823  |  | or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,  | 
1824  |  |                   BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)  | 
1825  | 5.34k  | { | 
1826  | 5.34k  |   int r;  | 
1827  | 5.34k  |   OnigCodePoint i, n1, *data1;  | 
1828  | 5.34k  |   OnigCodePoint from, to;  | 
1829  |  |  | 
1830  | 5.34k  |   *pbuf = (BBuf* )NULL;  | 
1831  | 5.34k  |   if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { | 
1832  | 5.34k  |     if (not1 != 0 || not2 != 0)  | 
1833  | 0  |       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);  | 
1834  | 5.34k  |     return 0;  | 
1835  | 5.34k  |   }  | 
1836  |  |  | 
1837  | 0  |   r = 0;  | 
1838  | 0  |   if (IS_NULL(bbuf2))  | 
1839  | 0  |     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);  | 
1840  |  | 
  | 
1841  | 0  |   if (IS_NULL(bbuf1)) { | 
1842  | 0  |     if (not1 != 0) { | 
1843  | 0  |       return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);  | 
1844  | 0  |     }  | 
1845  | 0  |     else { | 
1846  | 0  |       if (not2 == 0) { | 
1847  | 0  |   return bbuf_clone(pbuf, bbuf2);  | 
1848  | 0  |       }  | 
1849  | 0  |       else { | 
1850  | 0  |   return not_code_range_buf(enc, bbuf2, pbuf, env);  | 
1851  | 0  |       }  | 
1852  | 0  |     }  | 
1853  | 0  |   }  | 
1854  |  |  | 
1855  | 0  |   if (not1 != 0)  | 
1856  | 0  |     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);  | 
1857  |  | 
  | 
1858  | 0  |   data1 = (OnigCodePoint* )(bbuf1->p);  | 
1859  | 0  |   GET_CODE_POINT(n1, data1);  | 
1860  | 0  |   data1++;  | 
1861  |  | 
  | 
1862  | 0  |   if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ | 
1863  | 0  |     r = bbuf_clone(pbuf, bbuf2);  | 
1864  | 0  |   }  | 
1865  | 0  |   else if (not1 == 0) { /* 1 OR (not 2) */ | 
1866  | 0  |     r = not_code_range_buf(enc, bbuf2, pbuf, env);  | 
1867  | 0  |   }  | 
1868  | 0  |   if (r != 0) return r;  | 
1869  |  |  | 
1870  | 0  |   for (i = 0; i < n1; i++) { | 
1871  | 0  |     from = data1[i*2];  | 
1872  | 0  |     to   = data1[i*2+1];  | 
1873  | 0  |     r = add_code_range_to_buf(pbuf, env, from, to);  | 
1874  | 0  |     if (r != 0) return r;  | 
1875  | 0  |   }  | 
1876  | 0  |   return 0;  | 
1877  | 0  | }  | 
1878  |  |  | 
1879  |  | static int  | 
1880  |  | and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,  | 
1881  |  |     OnigCodePoint* data, int n)  | 
1882  | 0  | { | 
1883  | 0  |   int i, r;  | 
1884  | 0  |   OnigCodePoint from2, to2;  | 
1885  |  | 
  | 
1886  | 0  |   for (i = 0; i < n; i++) { | 
1887  | 0  |     from2 = data[i*2];  | 
1888  | 0  |     to2   = data[i*2+1];  | 
1889  | 0  |     if (from2 < from1) { | 
1890  | 0  |       if (to2 < from1) continue;  | 
1891  | 0  |       else { | 
1892  | 0  |   from1 = to2 + 1;  | 
1893  | 0  |       }  | 
1894  | 0  |     }  | 
1895  | 0  |     else if (from2 <= to1) { | 
1896  | 0  |       if (to2 < to1) { | 
1897  | 0  |   if (from1 <= from2 - 1) { | 
1898  | 0  |     r = add_code_range_to_buf(pbuf, env, from1, from2-1);  | 
1899  | 0  |     if (r != 0) return r;  | 
1900  | 0  |   }  | 
1901  | 0  |   from1 = to2 + 1;  | 
1902  | 0  |       }  | 
1903  | 0  |       else { | 
1904  | 0  |   to1 = from2 - 1;  | 
1905  | 0  |       }  | 
1906  | 0  |     }  | 
1907  | 0  |     else { | 
1908  | 0  |       from1 = from2;  | 
1909  | 0  |     }  | 
1910  | 0  |     if (from1 > to1) break;  | 
1911  | 0  |   }  | 
1912  | 0  |   if (from1 <= to1) { | 
1913  | 0  |     r = add_code_range_to_buf(pbuf, env, from1, to1);  | 
1914  | 0  |     if (r != 0) return r;  | 
1915  | 0  |   }  | 
1916  | 0  |   return 0;  | 
1917  | 0  | }  | 
1918  |  |  | 
1919  |  | static int  | 
1920  |  | and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)  | 
1921  | 5.34k  | { | 
1922  | 5.34k  |   int r;  | 
1923  | 5.34k  |   OnigCodePoint i, j, n1, n2, *data1, *data2;  | 
1924  | 5.34k  |   OnigCodePoint from, to, from1, to1, from2, to2;  | 
1925  |  |  | 
1926  | 5.34k  |   *pbuf = (BBuf* )NULL;  | 
1927  | 5.34k  |   if (IS_NULL(bbuf1)) { | 
1928  | 0  |     if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */  | 
1929  | 0  |       return bbuf_clone(pbuf, bbuf2);  | 
1930  | 0  |     return 0;  | 
1931  | 0  |   }  | 
1932  | 5.34k  |   else if (IS_NULL(bbuf2)) { | 
1933  | 5.34k  |     if (not2 != 0)  | 
1934  | 0  |       return bbuf_clone(pbuf, bbuf1);  | 
1935  | 5.34k  |     return 0;  | 
1936  | 5.34k  |   }  | 
1937  |  |  | 
1938  | 0  |   if (not1 != 0)  | 
1939  | 0  |     SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);  | 
1940  |  | 
  | 
1941  | 0  |   data1 = (OnigCodePoint* )(bbuf1->p);  | 
1942  | 0  |   data2 = (OnigCodePoint* )(bbuf2->p);  | 
1943  | 0  |   GET_CODE_POINT(n1, data1);  | 
1944  | 0  |   GET_CODE_POINT(n2, data2);  | 
1945  | 0  |   data1++;  | 
1946  | 0  |   data2++;  | 
1947  |  | 
  | 
1948  | 0  |   if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ | 
1949  | 0  |     for (i = 0; i < n1; i++) { | 
1950  | 0  |       from1 = data1[i*2];  | 
1951  | 0  |       to1   = data1[i*2+1];  | 
1952  | 0  |       for (j = 0; j < n2; j++) { | 
1953  | 0  |   from2 = data2[j*2];  | 
1954  | 0  |   to2   = data2[j*2+1];  | 
1955  | 0  |   if (from2 > to1) break;  | 
1956  | 0  |   if (to2 < from1) continue;  | 
1957  | 0  |   from = MAX(from1, from2);  | 
1958  | 0  |   to   = MIN(to1, to2);  | 
1959  | 0  |   r = add_code_range_to_buf(pbuf, env, from, to);  | 
1960  | 0  |   if (r != 0) return r;  | 
1961  | 0  |       }  | 
1962  | 0  |     }  | 
1963  | 0  |   }  | 
1964  | 0  |   else if (not1 == 0) { /* 1 AND (not 2) */ | 
1965  | 0  |     for (i = 0; i < n1; i++) { | 
1966  | 0  |       from1 = data1[i*2];  | 
1967  | 0  |       to1   = data1[i*2+1];  | 
1968  | 0  |       r = and_code_range1(pbuf, env, from1, to1, data2, n2);  | 
1969  | 0  |       if (r != 0) return r;  | 
1970  | 0  |     }  | 
1971  | 0  |   }  | 
1972  |  |  | 
1973  | 0  |   return 0;  | 
1974  | 0  | }  | 
1975  |  |  | 
1976  |  | static int  | 
1977  |  | and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)  | 
1978  | 5.34k  | { | 
1979  | 5.34k  |   OnigEncoding enc = env->enc;  | 
1980  | 5.34k  |   int r, not1, not2;  | 
1981  | 5.34k  |   BBuf *buf1, *buf2, *pbuf = 0;  | 
1982  | 5.34k  |   BitSetRef bsr1, bsr2;  | 
1983  | 5.34k  |   BitSet bs1, bs2;  | 
1984  |  |  | 
1985  | 5.34k  |   not1 = IS_NCCLASS_NOT(dest);  | 
1986  | 5.34k  |   bsr1 = dest->bs;  | 
1987  | 5.34k  |   buf1 = dest->mbuf;  | 
1988  | 5.34k  |   not2 = IS_NCCLASS_NOT(cc);  | 
1989  | 5.34k  |   bsr2 = cc->bs;  | 
1990  | 5.34k  |   buf2 = cc->mbuf;  | 
1991  |  |  | 
1992  | 5.34k  |   if (not1 != 0) { | 
1993  | 0  |     bitset_invert_to(bsr1, bs1);  | 
1994  | 0  |     bsr1 = bs1;  | 
1995  | 0  |   }  | 
1996  | 5.34k  |   if (not2 != 0) { | 
1997  | 0  |     bitset_invert_to(bsr2, bs2);  | 
1998  | 0  |     bsr2 = bs2;  | 
1999  | 0  |   }  | 
2000  | 5.34k  |   bitset_and(bsr1, bsr2);  | 
2001  | 5.34k  |   if (bsr1 != dest->bs) { | 
2002  | 0  |     bitset_copy(dest->bs, bsr1);  | 
2003  | 0  |     bsr1 = dest->bs;  | 
2004  | 0  |   }  | 
2005  | 5.34k  |   if (not1 != 0) { | 
2006  | 0  |     bitset_invert(dest->bs);  | 
2007  | 0  |   }  | 
2008  |  |  | 
2009  | 5.34k  |   if (! ONIGENC_IS_SINGLEBYTE(enc)) { | 
2010  | 5.34k  |     if (not1 != 0 && not2 != 0) { | 
2011  | 0  |       r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);  | 
2012  | 0  |     }  | 
2013  | 5.34k  |     else { | 
2014  | 5.34k  |       r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);  | 
2015  | 5.34k  |       if (r == 0 && not1 != 0) { | 
2016  | 0  |   BBuf *tbuf = 0;  | 
2017  | 0  |   r = not_code_range_buf(enc, pbuf, &tbuf, env);  | 
2018  | 0  |   bbuf_free(pbuf);  | 
2019  | 0  |   pbuf = tbuf;  | 
2020  | 0  |       }  | 
2021  | 5.34k  |     }  | 
2022  | 5.34k  |     if (r != 0) { | 
2023  | 0  |       bbuf_free(pbuf);  | 
2024  | 0  |       return r;  | 
2025  | 0  |     }  | 
2026  |  |  | 
2027  | 5.34k  |     dest->mbuf = pbuf;  | 
2028  | 5.34k  |     bbuf_free(buf1);  | 
2029  | 5.34k  |     return r;  | 
2030  | 5.34k  |   }  | 
2031  | 0  |   return 0;  | 
2032  | 5.34k  | }  | 
2033  |  |  | 
2034  |  | static int  | 
2035  |  | or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)  | 
2036  | 5.34k  | { | 
2037  | 5.34k  |   OnigEncoding enc = env->enc;  | 
2038  | 5.34k  |   int r, not1, not2;  | 
2039  | 5.34k  |   BBuf *buf1, *buf2, *pbuf = 0;  | 
2040  | 5.34k  |   BitSetRef bsr1, bsr2;  | 
2041  | 5.34k  |   BitSet bs1, bs2;  | 
2042  |  |  | 
2043  | 5.34k  |   not1 = IS_NCCLASS_NOT(dest);  | 
2044  | 5.34k  |   bsr1 = dest->bs;  | 
2045  | 5.34k  |   buf1 = dest->mbuf;  | 
2046  | 5.34k  |   not2 = IS_NCCLASS_NOT(cc);  | 
2047  | 5.34k  |   bsr2 = cc->bs;  | 
2048  | 5.34k  |   buf2 = cc->mbuf;  | 
2049  |  |  | 
2050  | 5.34k  |   if (not1 != 0) { | 
2051  | 0  |     bitset_invert_to(bsr1, bs1);  | 
2052  | 0  |     bsr1 = bs1;  | 
2053  | 0  |   }  | 
2054  | 5.34k  |   if (not2 != 0) { | 
2055  | 0  |     bitset_invert_to(bsr2, bs2);  | 
2056  | 0  |     bsr2 = bs2;  | 
2057  | 0  |   }  | 
2058  | 5.34k  |   bitset_or(bsr1, bsr2);  | 
2059  | 5.34k  |   if (bsr1 != dest->bs) { | 
2060  | 0  |     bitset_copy(dest->bs, bsr1);  | 
2061  | 0  |     bsr1 = dest->bs;  | 
2062  | 0  |   }  | 
2063  | 5.34k  |   if (not1 != 0) { | 
2064  | 0  |     bitset_invert(dest->bs);  | 
2065  | 0  |   }  | 
2066  |  |  | 
2067  | 5.34k  |   if (! ONIGENC_IS_SINGLEBYTE(enc)) { | 
2068  | 5.34k  |     if (not1 != 0 && not2 != 0) { | 
2069  | 0  |       r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);  | 
2070  | 0  |     }  | 
2071  | 5.34k  |     else { | 
2072  | 5.34k  |       r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);  | 
2073  | 5.34k  |       if (r == 0 && not1 != 0) { | 
2074  | 0  |   BBuf *tbuf = 0;  | 
2075  | 0  |   r = not_code_range_buf(enc, pbuf, &tbuf, env);  | 
2076  | 0  |   bbuf_free(pbuf);  | 
2077  | 0  |   pbuf = tbuf;  | 
2078  | 0  |       }  | 
2079  | 5.34k  |     }  | 
2080  | 5.34k  |     if (r != 0) { | 
2081  | 0  |       bbuf_free(pbuf);  | 
2082  | 0  |       return r;  | 
2083  | 0  |     }  | 
2084  |  |  | 
2085  | 5.34k  |     dest->mbuf = pbuf;  | 
2086  | 5.34k  |     bbuf_free(buf1);  | 
2087  | 5.34k  |     return r;  | 
2088  | 5.34k  |   }  | 
2089  | 0  |   else  | 
2090  | 0  |     return 0;  | 
2091  | 5.34k  | }  | 
2092  |  |  | 
2093  |  | static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);  | 
2094  |  |  | 
2095  |  | static OnigCodePoint  | 
2096  |  | conv_backslash_value(OnigCodePoint c, ScanEnv* env)  | 
2097  | 7.39k  | { | 
2098  | 7.39k  |   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { | 
2099  | 7.39k  |     switch (c) { | 
2100  | 822  |     case 'n': return '\n';  | 
2101  | 3.28k  |     case 't': return '\t';  | 
2102  | 822  |     case 'r': return '\r';  | 
2103  | 0  |     case 'f': return '\f';  | 
2104  | 0  |     case 'a': return '\007';  | 
2105  | 0  |     case 'b': return '\010';  | 
2106  | 0  |     case 'e': return '\033';  | 
2107  | 0  |     case 'v':  | 
2108  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))  | 
2109  | 0  |   return '\v';  | 
2110  | 0  |       break;  | 
2111  |  |  | 
2112  | 2.46k  |     default:  | 
2113  | 2.46k  |       if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) | 
2114  | 0  |     UNKNOWN_ESC_WARN(env, c);  | 
2115  | 2.46k  |       break;  | 
2116  | 7.39k  |     }  | 
2117  | 7.39k  |   }  | 
2118  | 2.46k  |   return c;  | 
2119  | 7.39k  | }  | 
2120  |  |  | 
2121  |  | #ifdef USE_NO_INVALID_QUANTIFIER  | 
2122  | 12.3k  | # define is_invalid_quantifier_target(node) 0  | 
2123  |  | #else  | 
2124  |  | static int  | 
2125  |  | is_invalid_quantifier_target(Node* node)  | 
2126  |  | { | 
2127  |  |   switch (NTYPE(node)) { | 
2128  |  |   case NT_ANCHOR:  | 
2129  |  |     return 1;  | 
2130  |  |     break;  | 
2131  |  |  | 
2132  |  |   case NT_ENCLOSE:  | 
2133  |  |     /* allow enclosed elements */  | 
2134  |  |     /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */  | 
2135  |  |     break;  | 
2136  |  |  | 
2137  |  |   case NT_LIST:  | 
2138  |  |     do { | 
2139  |  |       if (! is_invalid_quantifier_target(NCAR(node))) return 0;  | 
2140  |  |     } while (IS_NOT_NULL(node = NCDR(node)));  | 
2141  |  |     return 0;  | 
2142  |  |     break;  | 
2143  |  |  | 
2144  |  |   case NT_ALT:  | 
2145  |  |     do { | 
2146  |  |       if (is_invalid_quantifier_target(NCAR(node))) return 1;  | 
2147  |  |     } while (IS_NOT_NULL(node = NCDR(node)));  | 
2148  |  |     break;  | 
2149  |  |  | 
2150  |  |   default:  | 
2151  |  |     break;  | 
2152  |  |   }  | 
2153  |  |   return 0;  | 
2154  |  | }  | 
2155  |  | #endif  | 
2156  |  |  | 
2157  |  | /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */  | 
2158  |  | static int  | 
2159  |  | popular_quantifier_num(QtfrNode* q)  | 
2160  | 0  | { | 
2161  | 0  |   if (q->greedy) { | 
2162  | 0  |     if (q->lower == 0) { | 
2163  | 0  |       if (q->upper == 1) return 0;  | 
2164  | 0  |       else if (IS_REPEAT_INFINITE(q->upper)) return 1;  | 
2165  | 0  |     }  | 
2166  | 0  |     else if (q->lower == 1) { | 
2167  | 0  |       if (IS_REPEAT_INFINITE(q->upper)) return 2;  | 
2168  | 0  |     }  | 
2169  | 0  |   }  | 
2170  | 0  |   else { | 
2171  | 0  |     if (q->lower == 0) { | 
2172  | 0  |       if (q->upper == 1) return 3;  | 
2173  | 0  |       else if (IS_REPEAT_INFINITE(q->upper)) return 4;  | 
2174  | 0  |     }  | 
2175  | 0  |     else if (q->lower == 1) { | 
2176  | 0  |       if (IS_REPEAT_INFINITE(q->upper)) return 5;  | 
2177  | 0  |     }  | 
2178  | 0  |   }  | 
2179  | 0  |   return -1;  | 
2180  | 0  | }  | 
2181  |  |  | 
2182  |  |  | 
2183  |  | enum ReduceType { | 
2184  |  |   RQ_ASIS = 0, /* as is */  | 
2185  |  |   RQ_DEL  = 1, /* delete parent */  | 
2186  |  |   RQ_A,        /* to '*'    */  | 
2187  |  |   RQ_AQ,       /* to '*?'   */  | 
2188  |  |   RQ_QQ,       /* to '??'   */  | 
2189  |  |   RQ_P_QQ,     /* to '+)??' */  | 
2190  |  |   RQ_PQ_Q      /* to '+?)?' */  | 
2191  |  | };  | 
2192  |  |  | 
2193  |  | static enum ReduceType const ReduceTypeTable[6][6] = { | 
2194  |  | /* '?',     '*',     '+',    '??',    '*?',    '+?'      p / c   */  | 
2195  |  |   {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */ | 
2196  |  |   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */ | 
2197  |  |   {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */ | 
2198  |  |   {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */ | 
2199  |  |   {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */ | 
2200  |  |   {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */ | 
2201  |  | };  | 
2202  |  |  | 
2203  |  | extern void  | 
2204  |  | onig_reduce_nested_quantifier(Node* pnode, Node* cnode)  | 
2205  | 0  | { | 
2206  | 0  |   int pnum, cnum;  | 
2207  | 0  |   QtfrNode *p, *c;  | 
2208  |  | 
  | 
2209  | 0  |   p = NQTFR(pnode);  | 
2210  | 0  |   c = NQTFR(cnode);  | 
2211  | 0  |   pnum = popular_quantifier_num(p);  | 
2212  | 0  |   cnum = popular_quantifier_num(c);  | 
2213  | 0  |   if (pnum < 0 || cnum < 0) return ;  | 
2214  |  |  | 
2215  | 0  |   switch (ReduceTypeTable[cnum][pnum]) { | 
2216  | 0  |   case RQ_DEL:  | 
2217  | 0  |     *pnode = *cnode;  | 
2218  | 0  |     break;  | 
2219  | 0  |   case RQ_A:  | 
2220  | 0  |     p->target = c->target;  | 
2221  | 0  |     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 1;  | 
2222  | 0  |     break;  | 
2223  | 0  |   case RQ_AQ:  | 
2224  | 0  |     p->target = c->target;  | 
2225  | 0  |     p->lower  = 0;  p->upper = REPEAT_INFINITE;  p->greedy = 0;  | 
2226  | 0  |     break;  | 
2227  | 0  |   case RQ_QQ:  | 
2228  | 0  |     p->target = c->target;  | 
2229  | 0  |     p->lower  = 0;  p->upper = 1;  p->greedy = 0;  | 
2230  | 0  |     break;  | 
2231  | 0  |   case RQ_P_QQ:  | 
2232  | 0  |     p->target = cnode;  | 
2233  | 0  |     p->lower  = 0;  p->upper = 1;  p->greedy = 0;  | 
2234  | 0  |     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 1;  | 
2235  | 0  |     return ;  | 
2236  | 0  |     break;  | 
2237  | 0  |   case RQ_PQ_Q:  | 
2238  | 0  |     p->target = cnode;  | 
2239  | 0  |     p->lower  = 0;  p->upper = 1;  p->greedy = 1;  | 
2240  | 0  |     c->lower  = 1;  c->upper = REPEAT_INFINITE;  c->greedy = 0;  | 
2241  | 0  |     return ;  | 
2242  | 0  |     break;  | 
2243  | 0  |   case RQ_ASIS:  | 
2244  | 0  |     p->target = cnode;  | 
2245  | 0  |     return ;  | 
2246  | 0  |     break;  | 
2247  | 0  |   }  | 
2248  |  |  | 
2249  | 0  |   c->target = NULL_NODE;  | 
2250  | 0  |   onig_node_free(cnode);  | 
2251  | 0  | }  | 
2252  |  |  | 
2253  |  |  | 
2254  |  | enum TokenSyms { | 
2255  |  |   TK_EOT      = 0,   /* end of token */  | 
2256  |  |   TK_RAW_BYTE = 1,  | 
2257  |  |   TK_CHAR,  | 
2258  |  |   TK_STRING,  | 
2259  |  |   TK_CODE_POINT,  | 
2260  |  |   TK_ANYCHAR,  | 
2261  |  |   TK_CHAR_TYPE,  | 
2262  |  |   TK_BACKREF,  | 
2263  |  |   TK_CALL,  | 
2264  |  |   TK_ANCHOR,  | 
2265  |  |   TK_OP_REPEAT,  | 
2266  |  |   TK_INTERVAL,  | 
2267  |  |   TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */  | 
2268  |  |   TK_ALT,  | 
2269  |  |   TK_SUBEXP_OPEN,  | 
2270  |  |   TK_SUBEXP_CLOSE,  | 
2271  |  |   TK_CC_OPEN,  | 
2272  |  |   TK_QUOTE_OPEN,  | 
2273  |  |   TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */ | 
2274  |  |   TK_LINEBREAK,  | 
2275  |  |   TK_EXTENDED_GRAPHEME_CLUSTER,  | 
2276  |  |   TK_KEEP,  | 
2277  |  |   /* in cc */  | 
2278  |  |   TK_CC_CLOSE,  | 
2279  |  |   TK_CC_RANGE,  | 
2280  |  |   TK_POSIX_BRACKET_OPEN,  | 
2281  |  |   TK_CC_AND,             /* && */  | 
2282  |  |   TK_CC_CC_OPEN          /* [ */  | 
2283  |  | };  | 
2284  |  |  | 
2285  |  | typedef struct { | 
2286  |  |   enum TokenSyms type;  | 
2287  |  |   int escaped;  | 
2288  |  |   int base;   /* is number: 8, 16 (used in [....]) */  | 
2289  |  |   UChar* backp;  | 
2290  |  |   union { | 
2291  |  |     UChar* s;  | 
2292  |  |     int   c;  | 
2293  |  |     OnigCodePoint code;  | 
2294  |  |     struct { | 
2295  |  |       int subtype;  | 
2296  |  |       int ascii_range;  | 
2297  |  |     } anchor;  | 
2298  |  |     struct { | 
2299  |  |       int lower;  | 
2300  |  |       int upper;  | 
2301  |  |       int greedy;  | 
2302  |  |       int possessive;  | 
2303  |  |     } repeat;  | 
2304  |  |     struct { | 
2305  |  |       int  num;  | 
2306  |  |       int  ref1;  | 
2307  |  |       int* refs;  | 
2308  |  |       int  by_name;  | 
2309  |  | #ifdef USE_BACKREF_WITH_LEVEL  | 
2310  |  |       int  exist_level;  | 
2311  |  |       int  level;   /* \k<name+n> */  | 
2312  |  | #endif  | 
2313  |  |     } backref;  | 
2314  |  |     struct { | 
2315  |  |       UChar* name;  | 
2316  |  |       UChar* name_end;  | 
2317  |  |       int    gnum;  | 
2318  |  |       int    rel;  | 
2319  |  |     } call;  | 
2320  |  |     struct { | 
2321  |  |       int ctype;  | 
2322  |  |       int not;  | 
2323  |  |     } prop;  | 
2324  |  |   } u;  | 
2325  |  | } OnigToken;  | 
2326  |  |  | 
2327  |  |  | 
2328  |  | static int  | 
2329  |  | fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)  | 
2330  | 0  | { | 
2331  | 0  |   int low, up, syn_allow, non_low = 0;  | 
2332  | 0  |   int r = 0;  | 
2333  | 0  |   OnigCodePoint c;  | 
2334  | 0  |   OnigEncoding enc = env->enc;  | 
2335  | 0  |   UChar* p = *src;  | 
2336  | 0  |   PFETCH_READY;  | 
2337  |  | 
  | 
2338  | 0  |   syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);  | 
2339  |  | 
  | 
2340  | 0  |   if (PEND) { | 
2341  | 0  |     if (syn_allow)  | 
2342  | 0  |       return 1;  /* "....{" : OK! */ | 
2343  | 0  |     else  | 
2344  | 0  |       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */ | 
2345  | 0  |   }  | 
2346  |  |  | 
2347  | 0  |   if (! syn_allow) { | 
2348  | 0  |     c = PPEEK;  | 
2349  | 0  |     if (c == ')' || c == '(' || c == '|') { | 
2350  | 0  |       return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  | 
2351  | 0  |     }  | 
2352  | 0  |   }  | 
2353  |  |  | 
2354  | 0  |   low = onig_scan_unsigned_number(&p, end, env->enc);  | 
2355  | 0  |   if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  | 
2356  | 0  |   if (low > ONIG_MAX_REPEAT_NUM)  | 
2357  | 0  |     return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  | 
2358  |  |  | 
2359  | 0  |   if (p == *src) { /* can't read low */ | 
2360  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { | 
2361  |  |       /* allow {,n} as {0,n} */ | 
2362  | 0  |       low = 0;  | 
2363  | 0  |       non_low = 1;  | 
2364  | 0  |     }  | 
2365  | 0  |     else  | 
2366  | 0  |       goto invalid;  | 
2367  | 0  |   }  | 
2368  |  |  | 
2369  | 0  |   if (PEND) goto invalid;  | 
2370  | 0  |   PFETCH(c);  | 
2371  | 0  |   if (c == ',') { | 
2372  | 0  |     UChar* prev = p;  | 
2373  | 0  |     up = onig_scan_unsigned_number(&p, end, env->enc);  | 
2374  | 0  |     if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  | 
2375  | 0  |     if (up > ONIG_MAX_REPEAT_NUM)  | 
2376  | 0  |       return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;  | 
2377  |  |  | 
2378  | 0  |     if (p == prev) { | 
2379  | 0  |       if (non_low != 0)  | 
2380  | 0  |   goto invalid;  | 
2381  | 0  |       up = REPEAT_INFINITE;  /* {n,} : {n,infinite} */ | 
2382  | 0  |     }  | 
2383  | 0  |   }  | 
2384  | 0  |   else { | 
2385  | 0  |     if (non_low != 0)  | 
2386  | 0  |       goto invalid;  | 
2387  |  |  | 
2388  | 0  |     PUNFETCH;  | 
2389  | 0  |     up = low;  /* {n} : exact n times */ | 
2390  | 0  |     r = 2;     /* fixed */  | 
2391  | 0  |   }  | 
2392  |  |  | 
2393  | 0  |   if (PEND) goto invalid;  | 
2394  | 0  |   PFETCH(c);  | 
2395  | 0  |   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { | 
2396  | 0  |     if (c != MC_ESC(env->syntax)) goto invalid;  | 
2397  | 0  |     if (PEND) goto invalid;  | 
2398  | 0  |     PFETCH(c);  | 
2399  | 0  |   }  | 
2400  | 0  |   if (c != '}') goto invalid;  | 
2401  |  |  | 
2402  | 0  |   if (!IS_REPEAT_INFINITE(up) && low > up) { | 
2403  | 0  |     return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;  | 
2404  | 0  |   }  | 
2405  |  |  | 
2406  | 0  |   tok->type = TK_INTERVAL;  | 
2407  | 0  |   tok->u.repeat.lower = low;  | 
2408  | 0  |   tok->u.repeat.upper = up;  | 
2409  | 0  |   *src = p;  | 
2410  | 0  |   return r; /* 0: normal {n,m}, 2: fixed {n} */ | 
2411  |  |  | 
2412  | 0  |  invalid:  | 
2413  | 0  |   if (syn_allow)  | 
2414  | 0  |     return 1;  /* OK */  | 
2415  | 0  |   else  | 
2416  | 0  |     return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;  | 
2417  | 0  | }  | 
2418  |  |  | 
2419  |  | /* \M-, \C-, \c, or \... */  | 
2420  |  | static int  | 
2421  |  | fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)  | 
2422  | 7.39k  | { | 
2423  | 7.39k  |   int v;  | 
2424  | 7.39k  |   OnigCodePoint c;  | 
2425  | 7.39k  |   OnigEncoding enc = env->enc;  | 
2426  | 7.39k  |   UChar* p = *src;  | 
2427  |  |  | 
2428  | 7.39k  |   if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;  | 
2429  |  |  | 
2430  | 7.39k  |   PFETCH_S(c);  | 
2431  | 7.39k  |   switch (c) { | 
2432  | 0  |   case 'M':  | 
2433  | 0  |     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { | 
2434  | 0  |       if (PEND) return ONIGERR_END_PATTERN_AT_META;  | 
2435  | 0  |       PFETCH_S(c);  | 
2436  | 0  |       if (c != '-') return ONIGERR_META_CODE_SYNTAX;  | 
2437  | 0  |       if (PEND) return ONIGERR_END_PATTERN_AT_META;  | 
2438  | 0  |       PFETCH_S(c);  | 
2439  | 0  |       if (c == MC_ESC(env->syntax)) { | 
2440  | 0  |   v = fetch_escaped_value(&p, end, env, &c);  | 
2441  | 0  |   if (v < 0) return v;  | 
2442  | 0  |       }  | 
2443  | 0  |       c = ((c & 0xff) | 0x80);  | 
2444  | 0  |     }  | 
2445  | 0  |     else  | 
2446  | 0  |       goto backslash;  | 
2447  | 0  |     break;  | 
2448  |  |  | 
2449  | 0  |   case 'C':  | 
2450  | 0  |     if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { | 
2451  | 0  |       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;  | 
2452  | 0  |       PFETCH_S(c);  | 
2453  | 0  |       if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;  | 
2454  | 0  |       goto control;  | 
2455  | 0  |     }  | 
2456  | 0  |     else  | 
2457  | 0  |       goto backslash;  | 
2458  |  |  | 
2459  | 0  |   case 'c':  | 
2460  | 0  |     if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { | 
2461  | 0  |     control:  | 
2462  | 0  |       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;  | 
2463  | 0  |       PFETCH_S(c);  | 
2464  | 0  |       if (c == '?') { | 
2465  | 0  |   c = 0177;  | 
2466  | 0  |       }  | 
2467  | 0  |       else { | 
2468  | 0  |   if (c == MC_ESC(env->syntax)) { | 
2469  | 0  |     v = fetch_escaped_value(&p, end, env, &c);  | 
2470  | 0  |     if (v < 0) return v;  | 
2471  | 0  |   }  | 
2472  | 0  |   c &= 0x9f;  | 
2473  | 0  |       }  | 
2474  | 0  |       break;  | 
2475  | 0  |     }  | 
2476  |  |     /* fall through */  | 
2477  |  |  | 
2478  | 7.39k  |   default:  | 
2479  | 7.39k  |     { | 
2480  | 7.39k  |     backslash:  | 
2481  | 7.39k  |       c = conv_backslash_value(c, env);  | 
2482  | 7.39k  |     }  | 
2483  | 7.39k  |     break;  | 
2484  | 7.39k  |   }  | 
2485  |  |  | 
2486  | 7.39k  |   *src = p;  | 
2487  | 7.39k  |   *val = c;  | 
2488  | 7.39k  |   return 0;  | 
2489  | 7.39k  | }  | 
2490  |  |  | 
2491  |  | static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);  | 
2492  |  |  | 
2493  |  | static OnigCodePoint  | 
2494  |  | get_name_end_code_point(OnigCodePoint start)  | 
2495  | 1.64k  | { | 
2496  | 1.64k  |   switch (start) { | 
2497  | 1.64k  |   case '<':  return (OnigCodePoint )'>'; break;  | 
2498  | 0  |   case '\'': return (OnigCodePoint )'\''; break;  | 
2499  | 0  |   case '(':  return (OnigCodePoint )')'; break; | 
2500  | 0  |   case '{':  return (OnigCodePoint )'}'; break; | 
2501  | 0  |   default:  | 
2502  | 0  |     break;  | 
2503  | 1.64k  |   }  | 
2504  |  |  | 
2505  | 0  |   return (OnigCodePoint )0;  | 
2506  | 1.64k  | }  | 
2507  |  |  | 
2508  |  | #ifdef USE_NAMED_GROUP  | 
2509  |  | # ifdef RUBY  | 
2510  |  | #  define ONIGENC_IS_CODE_NAME(enc, c)  TRUE  | 
2511  |  | # else  | 
2512  | 6.16k  | #  define ONIGENC_IS_CODE_NAME(enc, c)  ONIGENC_IS_CODE_WORD(enc, c)  | 
2513  |  | # endif  | 
2514  |  |  | 
2515  |  | # ifdef USE_BACKREF_WITH_LEVEL  | 
2516  |  | /*  | 
2517  |  |    \k<name+n>, \k<name-n>  | 
2518  |  |    \k<num+n>,  \k<num-n>  | 
2519  |  |    \k<-num+n>, \k<-num-n>  | 
2520  |  | */  | 
2521  |  | static int  | 
2522  |  | fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,  | 
2523  |  |           UChar** rname_end, ScanEnv* env,  | 
2524  |  |           int* rback_num, int* rlevel)  | 
2525  | 0  | { | 
2526  | 0  |   int r, sign, is_num, exist_level;  | 
2527  | 0  |   OnigCodePoint end_code;  | 
2528  | 0  |   OnigCodePoint c = 0;  | 
2529  | 0  |   OnigEncoding enc = env->enc;  | 
2530  | 0  |   UChar *name_end;  | 
2531  | 0  |   UChar *pnum_head;  | 
2532  | 0  |   UChar *p = *src;  | 
2533  | 0  |   PFETCH_READY;  | 
2534  |  | 
  | 
2535  | 0  |   *rback_num = 0;  | 
2536  | 0  |   is_num = exist_level = 0;  | 
2537  | 0  |   sign = 1;  | 
2538  | 0  |   pnum_head = *src;  | 
2539  |  | 
  | 
2540  | 0  |   end_code = get_name_end_code_point(start_code);  | 
2541  |  | 
  | 
2542  | 0  |   name_end = end;  | 
2543  | 0  |   r = 0;  | 
2544  | 0  |   if (PEND) { | 
2545  | 0  |     return ONIGERR_EMPTY_GROUP_NAME;  | 
2546  | 0  |   }  | 
2547  | 0  |   else { | 
2548  | 0  |     PFETCH(c);  | 
2549  | 0  |     if (c == end_code)  | 
2550  | 0  |       return ONIGERR_EMPTY_GROUP_NAME;  | 
2551  |  |  | 
2552  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
2553  | 0  |       is_num = 1;  | 
2554  | 0  |     }  | 
2555  | 0  |     else if (c == '-') { | 
2556  | 0  |       is_num = 2;  | 
2557  | 0  |       sign = -1;  | 
2558  | 0  |       pnum_head = p;  | 
2559  | 0  |     }  | 
2560  | 0  |     else if (!ONIGENC_IS_CODE_NAME(enc, c)) { | 
2561  | 0  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2562  | 0  |     }  | 
2563  | 0  |   }  | 
2564  |  |  | 
2565  | 0  |   while (!PEND) { | 
2566  | 0  |     name_end = p;  | 
2567  | 0  |     PFETCH(c);  | 
2568  | 0  |     if (c == end_code || c == ')' || c == '+' || c == '-') { | 
2569  | 0  |       if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;  | 
2570  | 0  |       break;  | 
2571  | 0  |     }  | 
2572  |  |  | 
2573  | 0  |     if (is_num != 0) { | 
2574  | 0  |       if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
2575  | 0  |   is_num = 1;  | 
2576  | 0  |       }  | 
2577  | 0  |       else { | 
2578  | 0  |   r = ONIGERR_INVALID_GROUP_NAME;  | 
2579  | 0  |   is_num = 0;  | 
2580  | 0  |       }  | 
2581  | 0  |     }  | 
2582  | 0  |     else if (!ONIGENC_IS_CODE_NAME(enc, c)) { | 
2583  | 0  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2584  | 0  |     }  | 
2585  | 0  |   }  | 
2586  |  | 
  | 
2587  | 0  |   if (r == 0 && c != end_code) { | 
2588  | 0  |     if (c == '+' || c == '-') { | 
2589  | 0  |       int level;  | 
2590  | 0  |       int flag = (c == '-' ? -1 : 1);  | 
2591  |  | 
  | 
2592  | 0  |       if (PEND) { | 
2593  | 0  |   r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2594  | 0  |   goto end;  | 
2595  | 0  |       }  | 
2596  | 0  |       PFETCH(c);  | 
2597  | 0  |       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;  | 
2598  | 0  |       PUNFETCH;  | 
2599  | 0  |       level = onig_scan_unsigned_number(&p, end, enc);  | 
2600  | 0  |       if (level < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
2601  | 0  |       *rlevel = (level * flag);  | 
2602  | 0  |       exist_level = 1;  | 
2603  |  | 
  | 
2604  | 0  |       if (!PEND) { | 
2605  | 0  |   PFETCH(c);  | 
2606  | 0  |   if (c == end_code)  | 
2607  | 0  |     goto end;  | 
2608  | 0  |       }  | 
2609  | 0  |     }  | 
2610  |  |  | 
2611  | 0  |   err:  | 
2612  | 0  |     r = ONIGERR_INVALID_GROUP_NAME;  | 
2613  | 0  |     name_end = end;  | 
2614  | 0  |   }  | 
2615  |  |  | 
2616  | 0  |  end:  | 
2617  | 0  |   if (r == 0) { | 
2618  | 0  |     if (is_num != 0) { | 
2619  | 0  |       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);  | 
2620  | 0  |       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
2621  | 0  |       else if (*rback_num == 0) goto err;  | 
2622  |  |  | 
2623  | 0  |       *rback_num *= sign;  | 
2624  | 0  |     }  | 
2625  |  |  | 
2626  | 0  |     *rname_end = name_end;  | 
2627  | 0  |     *src = p;  | 
2628  | 0  |     return (exist_level ? 1 : 0);  | 
2629  | 0  |   }  | 
2630  | 0  |   else { | 
2631  | 0  |     onig_scan_env_set_error_string(env, r, *src, name_end);  | 
2632  | 0  |     return r;  | 
2633  | 0  |   }  | 
2634  | 0  | }  | 
2635  |  | # endif /* USE_BACKREF_WITH_LEVEL */  | 
2636  |  |  | 
2637  |  | /*  | 
2638  |  |   ref: 0 -> define name    (don't allow number name)  | 
2639  |  |        1 -> reference name (allow number name)  | 
2640  |  | */  | 
2641  |  | static int  | 
2642  |  | fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,  | 
2643  |  |      UChar** rname_end, ScanEnv* env, int* rback_num, int ref)  | 
2644  | 1.64k  | { | 
2645  | 1.64k  |   int r, is_num, sign;  | 
2646  | 1.64k  |   OnigCodePoint end_code;  | 
2647  | 1.64k  |   OnigCodePoint c = 0;  | 
2648  | 1.64k  |   OnigEncoding enc = env->enc;  | 
2649  | 1.64k  |   UChar *name_end;  | 
2650  | 1.64k  |   UChar *pnum_head;  | 
2651  | 1.64k  |   UChar *p = *src;  | 
2652  |  |  | 
2653  | 1.64k  |   *rback_num = 0;  | 
2654  |  |  | 
2655  | 1.64k  |   end_code = get_name_end_code_point(start_code);  | 
2656  |  |  | 
2657  | 1.64k  |   name_end = end;  | 
2658  | 1.64k  |   pnum_head = *src;  | 
2659  | 1.64k  |   r = 0;  | 
2660  | 1.64k  |   is_num = 0;  | 
2661  | 1.64k  |   sign = 1;  | 
2662  | 1.64k  |   if (PEND) { | 
2663  | 0  |     return ONIGERR_EMPTY_GROUP_NAME;  | 
2664  | 0  |   }  | 
2665  | 1.64k  |   else { | 
2666  | 1.64k  |     PFETCH_S(c);  | 
2667  | 1.64k  |     if (c == end_code)  | 
2668  | 0  |       return ONIGERR_EMPTY_GROUP_NAME;  | 
2669  |  |  | 
2670  | 1.64k  |     if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
2671  | 0  |       if (ref == 1)  | 
2672  | 0  |   is_num = 1;  | 
2673  | 0  |       else { | 
2674  | 0  |   r = ONIGERR_INVALID_GROUP_NAME;  | 
2675  | 0  |   is_num = 0;  | 
2676  | 0  |       }  | 
2677  | 0  |     }  | 
2678  | 1.64k  |     else if (c == '-') { | 
2679  | 0  |       if (ref == 1) { | 
2680  | 0  |   is_num = 2;  | 
2681  | 0  |   sign = -1;  | 
2682  | 0  |   pnum_head = p;  | 
2683  | 0  |       }  | 
2684  | 0  |       else { | 
2685  | 0  |   r = ONIGERR_INVALID_GROUP_NAME;  | 
2686  | 0  |   is_num = 0;  | 
2687  | 0  |       }  | 
2688  | 0  |     }  | 
2689  | 1.64k  |     else if (!ONIGENC_IS_CODE_NAME(enc, c)) { | 
2690  | 0  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2691  | 0  |     }  | 
2692  | 1.64k  |   }  | 
2693  |  |  | 
2694  | 1.64k  |   if (r == 0) { | 
2695  | 6.16k  |     while (!PEND) { | 
2696  | 6.16k  |       name_end = p;  | 
2697  | 6.16k  |       PFETCH_S(c);  | 
2698  | 6.16k  |       if (c == end_code || c == ')') { | 
2699  | 1.64k  |   if (is_num == 2) { | 
2700  | 0  |     r = ONIGERR_INVALID_GROUP_NAME;  | 
2701  | 0  |     goto teardown;  | 
2702  | 0  |   }  | 
2703  | 1.64k  |   break;  | 
2704  | 1.64k  |       }  | 
2705  |  |  | 
2706  | 4.52k  |       if (is_num != 0) { | 
2707  | 0  |   if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
2708  | 0  |     is_num = 1;  | 
2709  | 0  |   }  | 
2710  | 0  |   else { | 
2711  | 0  |     if (!ONIGENC_IS_CODE_WORD(enc, c))  | 
2712  | 0  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2713  | 0  |     else  | 
2714  | 0  |       r = ONIGERR_INVALID_GROUP_NAME;  | 
2715  | 0  |     goto teardown;  | 
2716  | 0  |   }  | 
2717  | 0  |       }  | 
2718  | 4.52k  |       else { | 
2719  | 4.52k  |   if (!ONIGENC_IS_CODE_NAME(enc, c)) { | 
2720  | 0  |     r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2721  | 0  |     goto teardown;  | 
2722  | 0  |   }  | 
2723  | 4.52k  |       }  | 
2724  | 4.52k  |     }  | 
2725  |  |  | 
2726  | 1.64k  |     if (c != end_code) { | 
2727  | 0  |       r = ONIGERR_INVALID_GROUP_NAME;  | 
2728  | 0  |       name_end = end;  | 
2729  | 0  |       goto err;  | 
2730  | 0  |     }  | 
2731  |  |  | 
2732  | 1.64k  |     if (is_num != 0) { | 
2733  | 0  |       *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);  | 
2734  | 0  |       if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
2735  | 0  |       else if (*rback_num == 0) { | 
2736  | 0  |   r = ONIGERR_INVALID_GROUP_NAME;  | 
2737  | 0  |   goto err;  | 
2738  | 0  |       }  | 
2739  |  |  | 
2740  | 0  |       *rback_num *= sign;  | 
2741  | 0  |     }  | 
2742  |  |  | 
2743  | 1.64k  |     *rname_end = name_end;  | 
2744  | 1.64k  |     *src = p;  | 
2745  | 1.64k  |     return 0;  | 
2746  | 1.64k  |   }  | 
2747  | 0  |   else { | 
2748  | 0  | teardown:  | 
2749  | 0  |     while (!PEND) { | 
2750  | 0  |       name_end = p;  | 
2751  | 0  |       PFETCH_S(c);  | 
2752  | 0  |       if (c == end_code || c == ')')  | 
2753  | 0  |   break;  | 
2754  | 0  |     }  | 
2755  | 0  |     if (PEND)  | 
2756  | 0  |       name_end = end;  | 
2757  |  | 
  | 
2758  | 0  |   err:  | 
2759  | 0  |     onig_scan_env_set_error_string(env, r, *src, name_end);  | 
2760  | 0  |     return r;  | 
2761  | 0  |   }  | 
2762  | 1.64k  | }  | 
2763  |  | #else  | 
2764  |  | static int  | 
2765  |  | fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,  | 
2766  |  |      UChar** rname_end, ScanEnv* env, int* rback_num, int ref)  | 
2767  |  | { | 
2768  |  |   int r, is_num, sign;  | 
2769  |  |   OnigCodePoint end_code;  | 
2770  |  |   OnigCodePoint c = 0;  | 
2771  |  |   UChar *name_end;  | 
2772  |  |   OnigEncoding enc = env->enc;  | 
2773  |  |   UChar *pnum_head;  | 
2774  |  |   UChar *p = *src;  | 
2775  |  |   PFETCH_READY;  | 
2776  |  |  | 
2777  |  |   *rback_num = 0;  | 
2778  |  |  | 
2779  |  |   end_code = get_name_end_code_point(start_code);  | 
2780  |  |  | 
2781  |  |   *rname_end = name_end = end;  | 
2782  |  |   r = 0;  | 
2783  |  |   pnum_head = *src;  | 
2784  |  |   is_num = 0;  | 
2785  |  |   sign = 1;  | 
2786  |  |  | 
2787  |  |   if (PEND) { | 
2788  |  |     return ONIGERR_EMPTY_GROUP_NAME;  | 
2789  |  |   }  | 
2790  |  |   else { | 
2791  |  |     PFETCH(c);  | 
2792  |  |     if (c == end_code)  | 
2793  |  |       return ONIGERR_EMPTY_GROUP_NAME;  | 
2794  |  |  | 
2795  |  |     if (ONIGENC_IS_CODE_DIGIT(enc, c)) { | 
2796  |  |       is_num = 1;  | 
2797  |  |     }  | 
2798  |  |     else if (c == '-') { | 
2799  |  |       is_num = 2;  | 
2800  |  |       sign = -1;  | 
2801  |  |       pnum_head = p;  | 
2802  |  |     }  | 
2803  |  |     else { | 
2804  |  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2805  |  |     }  | 
2806  |  |   }  | 
2807  |  |  | 
2808  |  |   while (!PEND) { | 
2809  |  |     name_end = p;  | 
2810  |  |  | 
2811  |  |     PFETCH(c);  | 
2812  |  |     if (c == end_code || c == ')') break;  | 
2813  |  |     if (! ONIGENC_IS_CODE_DIGIT(enc, c))  | 
2814  |  |       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;  | 
2815  |  |   }  | 
2816  |  |   if (r == 0 && c != end_code) { | 
2817  |  |     r = ONIGERR_INVALID_GROUP_NAME;  | 
2818  |  |     name_end = end;  | 
2819  |  |   }  | 
2820  |  |  | 
2821  |  |   if (r == 0) { | 
2822  |  |     *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);  | 
2823  |  |     if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
2824  |  |     else if (*rback_num == 0) { | 
2825  |  |       r = ONIGERR_INVALID_GROUP_NAME;  | 
2826  |  |       goto err;  | 
2827  |  |     }  | 
2828  |  |     *rback_num *= sign;  | 
2829  |  |  | 
2830  |  |     *rname_end = name_end;  | 
2831  |  |     *src = p;  | 
2832  |  |     return 0;  | 
2833  |  |   }  | 
2834  |  |   else { | 
2835  |  |   err:  | 
2836  |  |     onig_scan_env_set_error_string(env, r, *src, name_end);  | 
2837  |  |     return r;  | 
2838  |  |   }  | 
2839  |  | }  | 
2840  |  | #endif /* USE_NAMED_GROUP */  | 
2841  |  |  | 
2842  |  |  | 
2843  |  | static void  | 
2844  |  | onig_syntax_warn(ScanEnv *env, const char *fmt, ...)  | 
2845  | 0  | { | 
2846  | 0  |     va_list args;  | 
2847  | 0  |     UChar buf[WARN_BUFSIZE];  | 
2848  | 0  |     va_start(args, fmt);  | 
2849  | 0  |     onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,  | 
2850  | 0  |     env->pattern, env->pattern_end,  | 
2851  | 0  |     (const UChar *)fmt, args);  | 
2852  | 0  |     va_end(args);  | 
2853  |  | #ifdef RUBY  | 
2854  |  |     if (env->sourcefile == NULL)  | 
2855  |  |       rb_warn("%s", (char *)buf); | 
2856  |  |     else  | 
2857  |  |       rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);  | 
2858  |  | #else  | 
2859  | 0  |     (*onig_warn)((char* )buf);  | 
2860  | 0  | #endif  | 
2861  | 0  | }  | 
2862  |  |  | 
2863  |  | static void  | 
2864  |  | CC_ESC_WARN(ScanEnv *env, UChar *c)  | 
2865  | 0  | { | 
2866  | 0  |   if (onig_warn == onig_null_warn) return ;  | 
2867  |  |  | 
2868  | 0  |   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&  | 
2869  | 0  |       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { | 
2870  | 0  |     onig_syntax_warn(env, "character class has '%s' without escape", c);  | 
2871  | 0  |   }  | 
2872  | 0  | }  | 
2873  |  |  | 
2874  |  | static void  | 
2875  |  | CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)  | 
2876  | 0  | { | 
2877  | 0  |   if (onig_warn == onig_null_warn) return ;  | 
2878  |  |  | 
2879  | 0  |   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { | 
2880  | 0  |     onig_syntax_warn(env, "regular expression has '%s' without escape", c);  | 
2881  | 0  |   }  | 
2882  | 0  | }  | 
2883  |  |  | 
2884  |  | #ifndef RTEST  | 
2885  | 0  | # define RTEST(v)  1  | 
2886  |  | #endif  | 
2887  |  |  | 
2888  |  | static void  | 
2889  |  | CC_DUP_WARN(ScanEnv *env, OnigCodePoint from ARG_UNUSED, OnigCodePoint to ARG_UNUSED)  | 
2890  | 0  | { | 
2891  | 0  |   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;  | 
2892  |  |  | 
2893  | 0  |   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) &&  | 
2894  | 0  |       !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { | 
2895  |  | #ifdef WARN_ALL_CC_DUP  | 
2896  |  |     onig_syntax_warn(env, "character class has duplicated range: %04x-%04x", from, to);  | 
2897  |  | #else  | 
2898  | 0  |     env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;  | 
2899  | 0  |     onig_syntax_warn(env, "character class has duplicated range");  | 
2900  | 0  | #endif  | 
2901  | 0  |   }  | 
2902  | 0  | }  | 
2903  |  |  | 
2904  |  | static void  | 
2905  |  | UNKNOWN_ESC_WARN(ScanEnv *env, int c)  | 
2906  | 0  | { | 
2907  | 0  |   if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;  | 
2908  | 0  |   onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);  | 
2909  | 0  | }  | 
2910  |  |  | 
2911  |  | static UChar*  | 
2912  |  | find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,  | 
2913  |  |       UChar **next, OnigEncoding enc)  | 
2914  | 0  | { | 
2915  | 0  |   int i;  | 
2916  | 0  |   OnigCodePoint x;  | 
2917  | 0  |   UChar *q;  | 
2918  | 0  |   UChar *p = from;  | 
2919  |  | 
  | 
2920  | 0  |   while (p < to) { | 
2921  | 0  |     x = ONIGENC_MBC_TO_CODE(enc, p, to);  | 
2922  | 0  |     q = p + enclen(enc, p, to);  | 
2923  | 0  |     if (x == s[0]) { | 
2924  | 0  |       for (i = 1; i < n && q < to; i++) { | 
2925  | 0  |   x = ONIGENC_MBC_TO_CODE(enc, q, to);  | 
2926  | 0  |   if (x != s[i]) break;  | 
2927  | 0  |   q += enclen(enc, q, to);  | 
2928  | 0  |       }  | 
2929  | 0  |       if (i >= n) { | 
2930  | 0  |   if (IS_NOT_NULL(next))  | 
2931  | 0  |     *next = q;  | 
2932  | 0  |   return p;  | 
2933  | 0  |       }  | 
2934  | 0  |     }  | 
2935  | 0  |     p = q;  | 
2936  | 0  |   }  | 
2937  | 0  |   return NULL_UCHARP;  | 
2938  | 0  | }  | 
2939  |  |  | 
2940  |  | static int  | 
2941  |  | str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,  | 
2942  |  |      OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)  | 
2943  | 0  | { | 
2944  | 0  |   int i, in_esc;  | 
2945  | 0  |   OnigCodePoint x;  | 
2946  | 0  |   UChar *q;  | 
2947  | 0  |   UChar *p = from;  | 
2948  |  | 
  | 
2949  | 0  |   in_esc = 0;  | 
2950  | 0  |   while (p < to) { | 
2951  | 0  |     if (in_esc) { | 
2952  | 0  |       in_esc = 0;  | 
2953  | 0  |       p += enclen(enc, p, to);  | 
2954  | 0  |     }  | 
2955  | 0  |     else { | 
2956  | 0  |       x = ONIGENC_MBC_TO_CODE(enc, p, to);  | 
2957  | 0  |       q = p + enclen(enc, p, to);  | 
2958  | 0  |       if (x == s[0]) { | 
2959  | 0  |   for (i = 1; i < n && q < to; i++) { | 
2960  | 0  |     x = ONIGENC_MBC_TO_CODE(enc, q, to);  | 
2961  | 0  |     if (x != s[i]) break;  | 
2962  | 0  |     q += enclen(enc, q, to);  | 
2963  | 0  |   }  | 
2964  | 0  |   if (i >= n) return 1;  | 
2965  | 0  |   p += enclen(enc, p, to);  | 
2966  | 0  |       }  | 
2967  | 0  |       else { | 
2968  | 0  |   x = ONIGENC_MBC_TO_CODE(enc, p, to);  | 
2969  | 0  |   if (x == bad) return 0;  | 
2970  | 0  |   else if (x == MC_ESC(syn)) in_esc = 1;  | 
2971  | 0  |   p = q;  | 
2972  | 0  |       }  | 
2973  | 0  |     }  | 
2974  | 0  |   }  | 
2975  | 0  |   return 0;  | 
2976  | 0  | }  | 
2977  |  |  | 
2978  |  | static int  | 
2979  |  | fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)  | 
2980  | 25.0k  | { | 
2981  | 25.0k  |   int num;  | 
2982  | 25.0k  |   OnigCodePoint c, c2;  | 
2983  | 25.0k  |   const OnigSyntaxType* syn = env->syntax;  | 
2984  | 25.0k  |   OnigEncoding enc = env->enc;  | 
2985  | 25.0k  |   UChar* prev;  | 
2986  | 25.0k  |   UChar* p = *src;  | 
2987  | 25.0k  |   PFETCH_READY;  | 
2988  |  |  | 
2989  | 25.0k  |   if (PEND) { | 
2990  | 0  |     tok->type = TK_EOT;  | 
2991  | 0  |     return tok->type;  | 
2992  | 0  |   }  | 
2993  |  |  | 
2994  | 25.0k  |   PFETCH(c);  | 
2995  | 25.0k  |   tok->type = TK_CHAR;  | 
2996  | 25.0k  |   tok->base = 0;  | 
2997  | 25.0k  |   tok->u.c  = c;  | 
2998  | 25.0k  |   tok->escaped = 0;  | 
2999  |  |  | 
3000  | 25.0k  |   if (c == ']') { | 
3001  | 6.16k  |     tok->type = TK_CC_CLOSE;  | 
3002  | 6.16k  |   }  | 
3003  | 18.9k  |   else if (c == '-') { | 
3004  | 0  |     tok->type = TK_CC_RANGE;  | 
3005  | 0  |   }  | 
3006  | 18.9k  |   else if (c == MC_ESC(syn)) { | 
3007  | 6.98k  |     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))  | 
3008  | 0  |       goto end;  | 
3009  |  |  | 
3010  | 6.98k  |     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;  | 
3011  |  |  | 
3012  | 6.98k  |     PFETCH(c);  | 
3013  | 6.98k  |     tok->escaped = 1;  | 
3014  | 6.98k  |     tok->u.c = c;  | 
3015  | 6.98k  |     switch (c) { | 
3016  | 0  |     case 'w':  | 
3017  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3018  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;  | 
3019  | 0  |       tok->u.prop.not   = 0;  | 
3020  | 0  |       break;  | 
3021  | 0  |     case 'W':  | 
3022  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3023  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;  | 
3024  | 0  |       tok->u.prop.not   = 1;  | 
3025  | 0  |       break;  | 
3026  | 0  |     case 'd':  | 
3027  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3028  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;  | 
3029  | 0  |       tok->u.prop.not   = 0;  | 
3030  | 0  |       break;  | 
3031  | 0  |     case 'D':  | 
3032  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3033  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;  | 
3034  | 0  |       tok->u.prop.not   = 1;  | 
3035  | 0  |       break;  | 
3036  | 1.64k  |     case 's':  | 
3037  | 1.64k  |       tok->type = TK_CHAR_TYPE;  | 
3038  | 1.64k  |       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;  | 
3039  | 1.64k  |       tok->u.prop.not   = 0;  | 
3040  | 1.64k  |       break;  | 
3041  | 0  |     case 'S':  | 
3042  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3043  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;  | 
3044  | 0  |       tok->u.prop.not   = 1;  | 
3045  | 0  |       break;  | 
3046  | 0  |     case 'h':  | 
3047  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;  | 
3048  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3049  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;  | 
3050  | 0  |       tok->u.prop.not   = 0;  | 
3051  | 0  |       break;  | 
3052  | 0  |     case 'H':  | 
3053  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;  | 
3054  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3055  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;  | 
3056  | 0  |       tok->u.prop.not   = 1;  | 
3057  | 0  |       break;  | 
3058  |  |  | 
3059  | 0  |     case 'p':  | 
3060  | 0  |     case 'P':  | 
3061  | 0  |       if (PEND) break;  | 
3062  |  |  | 
3063  | 0  |       c2 = PPEEK;  | 
3064  | 0  |       if (c2 == '{' && | 
3065  | 0  |     IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { | 
3066  | 0  |   PINC;  | 
3067  | 0  |   tok->type = TK_CHAR_PROPERTY;  | 
3068  | 0  |   tok->u.prop.not = (c == 'P' ? 1 : 0);  | 
3069  |  | 
  | 
3070  | 0  |   if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { | 
3071  | 0  |     PFETCH(c2);  | 
3072  | 0  |     if (c2 == '^') { | 
3073  | 0  |       tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);  | 
3074  | 0  |     }  | 
3075  | 0  |     else  | 
3076  | 0  |       PUNFETCH;  | 
3077  | 0  |   }  | 
3078  | 0  |       }  | 
3079  | 0  |       else { | 
3080  | 0  |   onig_syntax_warn(env, "invalid Unicode Property \\%c", c);  | 
3081  | 0  |       }  | 
3082  | 0  |       break;  | 
3083  |  |  | 
3084  | 0  |     case 'x':  | 
3085  | 0  |       if (PEND) break;  | 
3086  |  |  | 
3087  | 0  |       prev = p;  | 
3088  | 0  |       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { | 
3089  | 0  |   PINC;  | 
3090  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);  | 
3091  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;  | 
3092  | 0  |   if (!PEND) { | 
3093  | 0  |     c2 = PPEEK;  | 
3094  | 0  |     if (ONIGENC_IS_CODE_XDIGIT(enc, c2))  | 
3095  | 0  |       return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;  | 
3096  | 0  |   }  | 
3097  |  |  | 
3098  | 0  |   if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { | 
3099  | 0  |     PINC;  | 
3100  | 0  |     tok->type   = TK_CODE_POINT;  | 
3101  | 0  |     tok->base   = 16;  | 
3102  | 0  |     tok->u.code = (OnigCodePoint )num;  | 
3103  | 0  |   }  | 
3104  | 0  |   else { | 
3105  |  |     /* can't read nothing or invalid format */  | 
3106  | 0  |     p = prev;  | 
3107  | 0  |   }  | 
3108  | 0  |       }  | 
3109  | 0  |       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { | 
3110  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);  | 
3111  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
3112  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3113  | 0  |     num = 0; /* but, it's not error */  | 
3114  | 0  |   }  | 
3115  | 0  |   tok->type = TK_RAW_BYTE;  | 
3116  | 0  |   tok->base = 16;  | 
3117  | 0  |   tok->u.c  = num;  | 
3118  | 0  |       }  | 
3119  | 0  |       break;  | 
3120  |  |  | 
3121  | 0  |     case 'u':  | 
3122  | 0  |       if (PEND) break;  | 
3123  |  |  | 
3124  | 0  |       prev = p;  | 
3125  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { | 
3126  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);  | 
3127  | 0  |   if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;  | 
3128  | 0  |   else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
3129  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3130  | 0  |     num = 0; /* but, it's not error */  | 
3131  | 0  |   }  | 
3132  | 0  |   tok->type   = TK_CODE_POINT;  | 
3133  | 0  |   tok->base   = 16;  | 
3134  | 0  |   tok->u.code = (OnigCodePoint )num;  | 
3135  | 0  |       }  | 
3136  | 0  |       break;  | 
3137  |  |  | 
3138  | 0  |     case 'o':  | 
3139  | 0  |       if (PEND) break;  | 
3140  |  |  | 
3141  | 0  |       prev = p;  | 
3142  | 0  |       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { | 
3143  | 0  |   PINC;  | 
3144  | 0  |   num = scan_unsigned_octal_number(&p, end, 11, enc);  | 
3145  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;  | 
3146  | 0  |   if (!PEND) { | 
3147  | 0  |     c2 = PPEEK;  | 
3148  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')  | 
3149  | 0  |       return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;  | 
3150  | 0  |   }  | 
3151  |  |  | 
3152  | 0  |   if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { | 
3153  | 0  |     PINC;  | 
3154  | 0  |     tok->type   = TK_CODE_POINT;  | 
3155  | 0  |     tok->base   = 8;  | 
3156  | 0  |     tok->u.code = (OnigCodePoint )num;  | 
3157  | 0  |   }  | 
3158  | 0  |   else { | 
3159  |  |     /* can't read nothing or invalid format */  | 
3160  | 0  |     p = prev;  | 
3161  | 0  |   }  | 
3162  | 0  |       }  | 
3163  | 0  |       break;  | 
3164  |  |  | 
3165  | 0  |     case '0':  | 
3166  | 0  |     case '1': case '2': case '3': case '4': case '5': case '6': case '7':  | 
3167  | 0  |       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { | 
3168  | 0  |   PUNFETCH;  | 
3169  | 0  |   prev = p;  | 
3170  | 0  |   num = scan_unsigned_octal_number(&p, end, 3, enc);  | 
3171  | 0  |   if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;  | 
3172  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3173  | 0  |     num = 0; /* but, it's not error */  | 
3174  | 0  |   }  | 
3175  | 0  |   tok->type = TK_RAW_BYTE;  | 
3176  | 0  |   tok->base = 8;  | 
3177  | 0  |   tok->u.c  = num;  | 
3178  | 0  |       }  | 
3179  | 0  |       break;  | 
3180  |  |  | 
3181  | 5.34k  |     default:  | 
3182  | 5.34k  |       PUNFETCH;  | 
3183  | 5.34k  |       num = fetch_escaped_value(&p, end, env, &c2);  | 
3184  | 5.34k  |       if (num < 0) return num;  | 
3185  | 5.34k  |       if ((OnigCodePoint )tok->u.c != c2) { | 
3186  | 4.93k  |   tok->u.code = (OnigCodePoint )c2;  | 
3187  | 4.93k  |   tok->type   = TK_CODE_POINT;  | 
3188  | 4.93k  |       }  | 
3189  | 5.34k  |       break;  | 
3190  | 6.98k  |     }  | 
3191  | 6.98k  |   }  | 
3192  | 11.9k  |   else if (c == '[') { | 
3193  | 0  |     if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { | 
3194  | 0  |       OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; | 
3195  | 0  |       tok->backp = p; /* point at '[' is read */  | 
3196  | 0  |       PINC;  | 
3197  | 0  |       if (str_exist_check_with_esc(send, 2, p, end,  | 
3198  | 0  |                                    (OnigCodePoint )']', enc, syn)) { | 
3199  | 0  |   tok->type = TK_POSIX_BRACKET_OPEN;  | 
3200  | 0  |       }  | 
3201  | 0  |       else { | 
3202  | 0  |   PUNFETCH;  | 
3203  | 0  |   goto cc_in_cc;  | 
3204  | 0  |       }  | 
3205  | 0  |     }  | 
3206  | 0  |     else { | 
3207  | 0  |     cc_in_cc:  | 
3208  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { | 
3209  | 0  |   tok->type = TK_CC_CC_OPEN;  | 
3210  | 0  |       }  | 
3211  | 0  |       else { | 
3212  | 0  |   CC_ESC_WARN(env, (UChar* )"[");  | 
3213  | 0  |       }  | 
3214  | 0  |     }  | 
3215  | 0  |   }  | 
3216  | 11.9k  |   else if (c == '&') { | 
3217  | 0  |     if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&  | 
3218  | 0  |   !PEND && (PPEEK_IS('&'))) { | 
3219  | 0  |       PINC;  | 
3220  | 0  |       tok->type = TK_CC_AND;  | 
3221  | 0  |     }  | 
3222  | 0  |   }  | 
3223  |  |  | 
3224  | 25.0k  |  end:  | 
3225  | 25.0k  |   *src = p;  | 
3226  | 25.0k  |   return tok->type;  | 
3227  | 25.0k  | }  | 
3228  |  |  | 
3229  |  | #ifdef USE_NAMED_GROUP  | 
3230  |  | static int  | 
3231  |  | fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,  | 
3232  |  |         UChar* end, ScanEnv* env)  | 
3233  | 0  | { | 
3234  | 0  |   int r, num;  | 
3235  | 0  |   const OnigSyntaxType* syn = env->syntax;  | 
3236  | 0  |   UChar* prev;  | 
3237  | 0  |   UChar* p = *src;  | 
3238  | 0  |   UChar* name_end;  | 
3239  | 0  |   int* backs;  | 
3240  | 0  |   int back_num;  | 
3241  |  | 
  | 
3242  | 0  |   prev = p;  | 
3243  |  | 
  | 
3244  | 0  | # ifdef USE_BACKREF_WITH_LEVEL  | 
3245  | 0  |   name_end = NULL_UCHARP; /* no need. escape gcc warning. */  | 
3246  | 0  |   r = fetch_name_with_level(c, &p, end, &name_end,  | 
3247  | 0  |           env, &back_num, &tok->u.backref.level);  | 
3248  | 0  |   if (r == 1) tok->u.backref.exist_level = 1;  | 
3249  | 0  |   else        tok->u.backref.exist_level = 0;  | 
3250  |  | # else  | 
3251  |  |   r = fetch_name(&p, end, &name_end, env, &back_num, 1);  | 
3252  |  | # endif  | 
3253  | 0  |   if (r < 0) return r;  | 
3254  |  |  | 
3255  | 0  |   if (back_num != 0) { | 
3256  | 0  |     if (back_num < 0) { | 
3257  | 0  |       back_num = BACKREF_REL_TO_ABS(back_num, env);  | 
3258  | 0  |       if (back_num <= 0)  | 
3259  | 0  |   return ONIGERR_INVALID_BACKREF;  | 
3260  | 0  |     }  | 
3261  |  |  | 
3262  | 0  |     if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { | 
3263  | 0  |       if (back_num > env->num_mem ||  | 
3264  | 0  |     IS_NULL(SCANENV_MEM_NODES(env)[back_num]))  | 
3265  | 0  |   return ONIGERR_INVALID_BACKREF;  | 
3266  | 0  |     }  | 
3267  | 0  |     tok->type = TK_BACKREF;  | 
3268  | 0  |     tok->u.backref.by_name = 0;  | 
3269  | 0  |     tok->u.backref.num  = 1;  | 
3270  | 0  |     tok->u.backref.ref1 = back_num;  | 
3271  | 0  |   }  | 
3272  | 0  |   else { | 
3273  | 0  |     num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);  | 
3274  | 0  |     if (num <= 0) { | 
3275  | 0  |       onig_scan_env_set_error_string(env,  | 
3276  | 0  |          ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);  | 
3277  | 0  |       return ONIGERR_UNDEFINED_NAME_REFERENCE;  | 
3278  | 0  |     }  | 
3279  | 0  |     if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { | 
3280  | 0  |       int i;  | 
3281  | 0  |       for (i = 0; i < num; i++) { | 
3282  | 0  |   if (backs[i] > env->num_mem ||  | 
3283  | 0  |       IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))  | 
3284  | 0  |     return ONIGERR_INVALID_BACKREF;  | 
3285  | 0  |       }  | 
3286  | 0  |     }  | 
3287  |  |  | 
3288  | 0  |     tok->type = TK_BACKREF;  | 
3289  | 0  |     tok->u.backref.by_name = 1;  | 
3290  | 0  |     if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { | 
3291  | 0  |       tok->u.backref.num  = 1;  | 
3292  | 0  |       tok->u.backref.ref1 = backs[0];  | 
3293  | 0  |     }  | 
3294  | 0  |     else { | 
3295  | 0  |       tok->u.backref.num  = num;  | 
3296  | 0  |       tok->u.backref.refs = backs;  | 
3297  | 0  |     }  | 
3298  | 0  |   }  | 
3299  | 0  |   *src = p;  | 
3300  | 0  |   return 0;  | 
3301  | 0  | }  | 
3302  |  | #endif  | 
3303  |  |  | 
3304  |  | static int  | 
3305  |  | fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)  | 
3306  | 212k  | { | 
3307  | 212k  |   int r, num;  | 
3308  | 212k  |   OnigCodePoint c;  | 
3309  | 212k  |   OnigEncoding enc = env->enc;  | 
3310  | 212k  |   const OnigSyntaxType* syn = env->syntax;  | 
3311  | 212k  |   UChar* prev;  | 
3312  | 212k  |   UChar* p = *src;  | 
3313  | 212k  |   PFETCH_READY;  | 
3314  |  |  | 
3315  | 212k  |  start:  | 
3316  | 212k  |   if (PEND) { | 
3317  | 9.45k  |     tok->type = TK_EOT;  | 
3318  | 9.45k  |     return tok->type;  | 
3319  | 9.45k  |   }  | 
3320  |  |  | 
3321  | 202k  |   tok->type  = TK_STRING;  | 
3322  | 202k  |   tok->base  = 0;  | 
3323  | 202k  |   tok->backp = p;  | 
3324  |  |  | 
3325  | 202k  |   PFETCH(c);  | 
3326  | 202k  |   if (p > end) return ONIGERR_PREMATURE_END_OF_CHAR_CLASS;  | 
3327  | 202k  |   if (IS_MC_ESC_CODE(c, syn)) { | 
3328  | 7.39k  |     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;  | 
3329  |  |  | 
3330  | 7.39k  |     tok->backp = p;  | 
3331  | 7.39k  |     PFETCH(c);  | 
3332  |  |  | 
3333  | 7.39k  |     tok->u.c = c;  | 
3334  | 7.39k  |     tok->escaped = 1;  | 
3335  | 7.39k  |     switch (c) { | 
3336  | 0  |     case '*':  | 
3337  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;  | 
3338  | 0  |       tok->type = TK_OP_REPEAT;  | 
3339  | 0  |       tok->u.repeat.lower = 0;  | 
3340  | 0  |       tok->u.repeat.upper = REPEAT_INFINITE;  | 
3341  | 0  |       goto greedy_check;  | 
3342  | 0  |       break;  | 
3343  |  |  | 
3344  | 0  |     case '+':  | 
3345  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;  | 
3346  | 0  |       tok->type = TK_OP_REPEAT;  | 
3347  | 0  |       tok->u.repeat.lower = 1;  | 
3348  | 0  |       tok->u.repeat.upper = REPEAT_INFINITE;  | 
3349  | 0  |       goto greedy_check;  | 
3350  | 0  |       break;  | 
3351  |  |  | 
3352  | 0  |     case '?':  | 
3353  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;  | 
3354  | 0  |       tok->type = TK_OP_REPEAT;  | 
3355  | 0  |       tok->u.repeat.lower = 0;  | 
3356  | 0  |       tok->u.repeat.upper = 1;  | 
3357  | 12.3k  |     greedy_check:  | 
3358  | 12.3k  |       if (!PEND && PPEEK_IS('?') && | 
3359  | 12.3k  |     IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { | 
3360  | 411  |   PFETCH(c);  | 
3361  | 411  |   tok->u.repeat.greedy     = 0;  | 
3362  | 411  |   tok->u.repeat.possessive = 0;  | 
3363  | 411  |       }  | 
3364  | 11.9k  |       else { | 
3365  | 11.9k  |       possessive_check:  | 
3366  | 11.9k  |   if (!PEND && PPEEK_IS('+') && | 
3367  | 11.9k  |       ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&  | 
3368  | 0  |         tok->type != TK_INTERVAL)  ||  | 
3369  | 0  |        (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&  | 
3370  | 0  |         tok->type == TK_INTERVAL))) { | 
3371  | 0  |     PFETCH(c);  | 
3372  | 0  |     tok->u.repeat.greedy     = 1;  | 
3373  | 0  |     tok->u.repeat.possessive = 1;  | 
3374  | 0  |   }  | 
3375  | 11.9k  |   else { | 
3376  | 11.9k  |     tok->u.repeat.greedy     = 1;  | 
3377  | 11.9k  |     tok->u.repeat.possessive = 0;  | 
3378  | 11.9k  |   }  | 
3379  | 11.9k  |       }  | 
3380  | 12.3k  |       break;  | 
3381  |  |  | 
3382  | 12.3k  |     case '{': | 
3383  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;  | 
3384  | 0  |       r = fetch_range_quantifier(&p, end, tok, env);  | 
3385  | 0  |       if (r < 0) return r;  /* error */  | 
3386  | 0  |       if (r == 0) goto greedy_check;  | 
3387  | 0  |       else if (r == 2) { /* {n} */ | 
3388  | 0  |   if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))  | 
3389  | 0  |     goto possessive_check;  | 
3390  |  |  | 
3391  | 0  |   goto greedy_check;  | 
3392  | 0  |       }  | 
3393  |  |       /* r == 1 : normal char */  | 
3394  | 0  |       break;  | 
3395  |  |  | 
3396  | 0  |     case '|':  | 
3397  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;  | 
3398  | 0  |       tok->type = TK_ALT;  | 
3399  | 0  |       break;  | 
3400  |  |  | 
3401  | 822  |     case '(': | 
3402  | 822  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;  | 
3403  | 0  |       tok->type = TK_SUBEXP_OPEN;  | 
3404  | 0  |       break;  | 
3405  |  |  | 
3406  | 411  |     case ')':  | 
3407  | 411  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;  | 
3408  | 0  |       tok->type = TK_SUBEXP_CLOSE;  | 
3409  | 0  |       break;  | 
3410  |  |  | 
3411  | 0  |     case 'w':  | 
3412  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;  | 
3413  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3414  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;  | 
3415  | 0  |       tok->u.prop.not   = 0;  | 
3416  | 0  |       break;  | 
3417  |  |  | 
3418  | 0  |     case 'W':  | 
3419  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;  | 
3420  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3421  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_WORD;  | 
3422  | 0  |       tok->u.prop.not   = 1;  | 
3423  | 0  |       break;  | 
3424  |  |  | 
3425  | 411  |     case 'b':  | 
3426  | 411  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;  | 
3427  | 411  |       tok->type = TK_ANCHOR;  | 
3428  | 411  |       tok->u.anchor.subtype = ANCHOR_WORD_BOUND;  | 
3429  | 411  |       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)  | 
3430  | 411  |     && ! IS_WORD_BOUND_ALL_RANGE(env->option);  | 
3431  | 411  |       break;  | 
3432  |  |  | 
3433  | 0  |     case 'B':  | 
3434  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;  | 
3435  | 0  |       tok->type = TK_ANCHOR;  | 
3436  | 0  |       tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;  | 
3437  | 0  |       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)  | 
3438  | 0  |     && ! IS_WORD_BOUND_ALL_RANGE(env->option);  | 
3439  | 0  |       break;  | 
3440  |  |  | 
3441  | 0  | #ifdef USE_WORD_BEGIN_END  | 
3442  | 0  |     case '<':  | 
3443  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;  | 
3444  | 0  |       tok->type = TK_ANCHOR;  | 
3445  | 0  |       tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;  | 
3446  | 0  |       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);  | 
3447  | 0  |       break;  | 
3448  |  |  | 
3449  | 0  |     case '>':  | 
3450  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;  | 
3451  | 0  |       tok->type = TK_ANCHOR;  | 
3452  | 0  |       tok->u.anchor.subtype = ANCHOR_WORD_END;  | 
3453  | 0  |       tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);  | 
3454  | 0  |       break;  | 
3455  | 0  | #endif  | 
3456  |  |  | 
3457  | 2.05k  |     case 's':  | 
3458  | 2.05k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;  | 
3459  | 2.05k  |       tok->type = TK_CHAR_TYPE;  | 
3460  | 2.05k  |       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;  | 
3461  | 2.05k  |       tok->u.prop.not   = 0;  | 
3462  | 2.05k  |       break;  | 
3463  |  |  | 
3464  | 0  |     case 'S':  | 
3465  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;  | 
3466  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3467  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;  | 
3468  | 0  |       tok->u.prop.not   = 1;  | 
3469  | 0  |       break;  | 
3470  |  |  | 
3471  | 1.64k  |     case 'd':  | 
3472  | 1.64k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;  | 
3473  | 1.64k  |       tok->type = TK_CHAR_TYPE;  | 
3474  | 1.64k  |       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;  | 
3475  | 1.64k  |       tok->u.prop.not   = 0;  | 
3476  | 1.64k  |       break;  | 
3477  |  |  | 
3478  | 0  |     case 'D':  | 
3479  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;  | 
3480  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3481  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;  | 
3482  | 0  |       tok->u.prop.not   = 1;  | 
3483  | 0  |       break;  | 
3484  |  |  | 
3485  | 0  |     case 'h':  | 
3486  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;  | 
3487  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3488  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;  | 
3489  | 0  |       tok->u.prop.not   = 0;  | 
3490  | 0  |       break;  | 
3491  |  |  | 
3492  | 0  |     case 'H':  | 
3493  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;  | 
3494  | 0  |       tok->type = TK_CHAR_TYPE;  | 
3495  | 0  |       tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;  | 
3496  | 0  |       tok->u.prop.not   = 1;  | 
3497  | 0  |       break;  | 
3498  |  |  | 
3499  | 0  |     case 'A':  | 
3500  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;  | 
3501  | 0  |     begin_buf:  | 
3502  | 0  |       tok->type = TK_ANCHOR;  | 
3503  | 0  |       tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;  | 
3504  | 0  |       break;  | 
3505  |  |  | 
3506  | 0  |     case 'Z':  | 
3507  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;  | 
3508  | 0  |       tok->type = TK_ANCHOR;  | 
3509  | 0  |       tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;  | 
3510  | 0  |       break;  | 
3511  |  |  | 
3512  | 0  |     case 'z':  | 
3513  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;  | 
3514  | 0  |     end_buf:  | 
3515  | 0  |       tok->type = TK_ANCHOR;  | 
3516  | 0  |       tok->u.anchor.subtype = ANCHOR_END_BUF;  | 
3517  | 0  |       break;  | 
3518  |  |  | 
3519  | 0  |     case 'G':  | 
3520  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;  | 
3521  | 0  |       tok->type = TK_ANCHOR;  | 
3522  | 0  |       tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;  | 
3523  | 0  |       break;  | 
3524  |  |  | 
3525  | 0  |     case '`':  | 
3526  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;  | 
3527  | 0  |       goto begin_buf;  | 
3528  | 0  |       break;  | 
3529  |  |  | 
3530  | 0  |     case '\'':  | 
3531  | 0  |       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;  | 
3532  | 0  |       goto end_buf;  | 
3533  | 0  |       break;  | 
3534  |  |  | 
3535  | 0  |     case 'x':  | 
3536  | 0  |       if (PEND) break;  | 
3537  |  |  | 
3538  | 0  |       prev = p;  | 
3539  | 0  |       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { | 
3540  | 0  |   PINC;  | 
3541  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);  | 
3542  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;  | 
3543  | 0  |   if (!PEND) { | 
3544  | 0  |     if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))  | 
3545  | 0  |       return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;  | 
3546  | 0  |   }  | 
3547  |  |  | 
3548  | 0  |   if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { | 
3549  | 0  |     PINC;  | 
3550  | 0  |     tok->type   = TK_CODE_POINT;  | 
3551  | 0  |     tok->u.code = (OnigCodePoint )num;  | 
3552  | 0  |   }  | 
3553  | 0  |   else { | 
3554  |  |     /* can't read nothing or invalid format */  | 
3555  | 0  |     p = prev;  | 
3556  | 0  |   }  | 
3557  | 0  |       }  | 
3558  | 0  |       else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { | 
3559  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);  | 
3560  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
3561  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3562  | 0  |     num = 0; /* but, it's not error */  | 
3563  | 0  |   }  | 
3564  | 0  |   tok->type = TK_RAW_BYTE;  | 
3565  | 0  |   tok->base = 16;  | 
3566  | 0  |   tok->u.c  = num;  | 
3567  | 0  |       }  | 
3568  | 0  |       break;  | 
3569  |  |  | 
3570  | 0  |     case 'u':  | 
3571  | 0  |       if (PEND) break;  | 
3572  |  |  | 
3573  | 0  |       prev = p;  | 
3574  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { | 
3575  | 0  |   num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);  | 
3576  | 0  |   if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;  | 
3577  | 0  |   else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;  | 
3578  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3579  | 0  |     num = 0; /* but, it's not error */  | 
3580  | 0  |   }  | 
3581  | 0  |   tok->type   = TK_CODE_POINT;  | 
3582  | 0  |   tok->base   = 16;  | 
3583  | 0  |   tok->u.code = (OnigCodePoint )num;  | 
3584  | 0  |       }  | 
3585  | 0  |       break;  | 
3586  |  |  | 
3587  | 0  |     case 'o':  | 
3588  | 0  |       if (PEND) break;  | 
3589  |  |  | 
3590  | 0  |       prev = p;  | 
3591  | 0  |       if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { | 
3592  | 0  |   PINC;  | 
3593  | 0  |   num = scan_unsigned_octal_number(&p, end, 11, enc);  | 
3594  | 0  |   if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;  | 
3595  | 0  |   if (!PEND) { | 
3596  | 0  |     OnigCodePoint c = PPEEK;  | 
3597  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')  | 
3598  | 0  |       return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;  | 
3599  | 0  |   }  | 
3600  |  |  | 
3601  | 0  |   if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { | 
3602  | 0  |     PINC;  | 
3603  | 0  |     tok->type   = TK_CODE_POINT;  | 
3604  | 0  |     tok->u.code = (OnigCodePoint )num;  | 
3605  | 0  |   }  | 
3606  | 0  |   else { | 
3607  |  |     /* can't read nothing or invalid format */  | 
3608  | 0  |     p = prev;  | 
3609  | 0  |   }  | 
3610  | 0  |       }  | 
3611  | 0  |       break;  | 
3612  |  |  | 
3613  | 0  |     case '1': case '2': case '3': case '4':  | 
3614  | 0  |     case '5': case '6': case '7': case '8': case '9':  | 
3615  | 0  |       PUNFETCH;  | 
3616  | 0  |       prev = p;  | 
3617  | 0  |       num = onig_scan_unsigned_number(&p, end, enc);  | 
3618  | 0  |       if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { | 
3619  | 0  |   goto skip_backref;  | 
3620  | 0  |       }  | 
3621  |  |  | 
3622  | 0  |       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&  | 
3623  | 0  |     (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ | 
3624  | 0  |   if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { | 
3625  | 0  |     if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))  | 
3626  | 0  |       return ONIGERR_INVALID_BACKREF;  | 
3627  | 0  |   }  | 
3628  |  |  | 
3629  | 0  |   tok->type = TK_BACKREF;  | 
3630  | 0  |   tok->u.backref.num     = 1;  | 
3631  | 0  |   tok->u.backref.ref1    = num;  | 
3632  | 0  |   tok->u.backref.by_name = 0;  | 
3633  | 0  | #ifdef USE_BACKREF_WITH_LEVEL  | 
3634  | 0  |   tok->u.backref.exist_level = 0;  | 
3635  | 0  | #endif  | 
3636  | 0  |   break;  | 
3637  | 0  |       }  | 
3638  |  |  | 
3639  | 0  |     skip_backref:  | 
3640  | 0  |       if (c == '8' || c == '9') { | 
3641  |  |   /* normal char */  | 
3642  | 0  |   p = prev; PINC;  | 
3643  | 0  |   break;  | 
3644  | 0  |       }  | 
3645  |  |  | 
3646  | 0  |       p = prev;  | 
3647  |  |       /* fall through */  | 
3648  | 0  |     case '0':  | 
3649  | 0  |       if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { | 
3650  | 0  |   prev = p;  | 
3651  | 0  |   num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);  | 
3652  | 0  |   if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER;  | 
3653  | 0  |   if (p == prev) {  /* can't read nothing. */ | 
3654  | 0  |     num = 0; /* but, it's not error */  | 
3655  | 0  |   }  | 
3656  | 0  |   tok->type = TK_RAW_BYTE;  | 
3657  | 0  |   tok->base = 8;  | 
3658  | 0  |   tok->u.c  = num;  | 
3659  | 0  |       }  | 
3660  | 0  |       else if (c != '0') { | 
3661  | 0  |   PINC;  | 
3662  | 0  |       }  | 
3663  | 0  |       break;  | 
3664  |  |  | 
3665  | 0  | #ifdef USE_NAMED_GROUP  | 
3666  | 0  |     case 'k':  | 
3667  | 0  |       if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { | 
3668  | 0  |   PFETCH(c);  | 
3669  | 0  |   if (c == '<' || c == '\'') { | 
3670  | 0  |     r = fetch_named_backref_token(c, tok, &p, end, env);  | 
3671  | 0  |     if (r < 0) return r;  | 
3672  | 0  |   }  | 
3673  | 0  |   else { | 
3674  | 0  |     PUNFETCH;  | 
3675  | 0  |     onig_syntax_warn(env, "invalid back reference");  | 
3676  | 0  |   }  | 
3677  | 0  |       }  | 
3678  | 0  |       break;  | 
3679  | 0  | #endif  | 
3680  |  |  | 
3681  | 0  | #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)  | 
3682  | 0  |     case 'g':  | 
3683  | 0  | # ifdef USE_NAMED_GROUP  | 
3684  | 0  |       if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) { | 
3685  | 0  |   PFETCH(c);  | 
3686  | 0  |   if (c == '{') { | 
3687  | 0  |     r = fetch_named_backref_token(c, tok, &p, end, env);  | 
3688  | 0  |     if (r < 0) return r;  | 
3689  | 0  |   }  | 
3690  | 0  |   else  | 
3691  | 0  |     PUNFETCH;  | 
3692  | 0  |       }  | 
3693  | 0  | # endif  | 
3694  | 0  | # ifdef USE_SUBEXP_CALL  | 
3695  | 0  |       if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { | 
3696  | 0  |   PFETCH(c);  | 
3697  | 0  |   if (c == '<' || c == '\'') { | 
3698  | 0  |     int gnum = -1, rel = 0;  | 
3699  | 0  |     UChar* name_end;  | 
3700  | 0  |     OnigCodePoint cnext;  | 
3701  |  | 
  | 
3702  | 0  |     cnext = PPEEK;  | 
3703  | 0  |     if (cnext == '0') { | 
3704  | 0  |       PINC;  | 
3705  | 0  |       if (PPEEK_IS(get_name_end_code_point(c))) {  /* \g<0>, \g'0' */ | 
3706  | 0  |         PINC;  | 
3707  | 0  |         name_end = p;  | 
3708  | 0  |         gnum = 0;  | 
3709  | 0  |       }  | 
3710  | 0  |     }  | 
3711  | 0  |     else if (cnext == '+') { | 
3712  | 0  |       PINC;  | 
3713  | 0  |       rel = 1;  | 
3714  | 0  |     }  | 
3715  | 0  |     prev = p;  | 
3716  | 0  |     if (gnum < 0) { | 
3717  | 0  |       r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);  | 
3718  | 0  |       if (r < 0) return r;  | 
3719  | 0  |     }  | 
3720  |  |  | 
3721  | 0  |     tok->type = TK_CALL;  | 
3722  | 0  |     tok->u.call.name     = prev;  | 
3723  | 0  |     tok->u.call.name_end = name_end;  | 
3724  | 0  |     tok->u.call.gnum     = gnum;  | 
3725  | 0  |     tok->u.call.rel      = rel;  | 
3726  | 0  |   }  | 
3727  | 0  |   else { | 
3728  | 0  |     onig_syntax_warn(env, "invalid subexp call");  | 
3729  | 0  |     PUNFETCH;  | 
3730  | 0  |   }  | 
3731  | 0  |       }  | 
3732  | 0  | # endif  | 
3733  | 0  |       break;  | 
3734  | 0  | #endif  | 
3735  |  |  | 
3736  | 0  |     case 'Q':  | 
3737  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { | 
3738  | 0  |   tok->type = TK_QUOTE_OPEN;  | 
3739  | 0  |       }  | 
3740  | 0  |       break;  | 
3741  |  |  | 
3742  | 0  |     case 'p':  | 
3743  | 0  |     case 'P':  | 
3744  | 0  |       if (PPEEK_IS('{') && | 
3745  | 0  |     IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { | 
3746  | 0  |   PINC;  | 
3747  | 0  |   tok->type = TK_CHAR_PROPERTY;  | 
3748  | 0  |   tok->u.prop.not = (c == 'P' ? 1 : 0);  | 
3749  |  | 
  | 
3750  | 0  |   if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { | 
3751  | 0  |     PFETCH(c);  | 
3752  | 0  |     if (c == '^') { | 
3753  | 0  |       tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);  | 
3754  | 0  |     }  | 
3755  | 0  |     else  | 
3756  | 0  |       PUNFETCH;  | 
3757  | 0  |   }  | 
3758  | 0  |       }  | 
3759  | 0  |       else { | 
3760  | 0  |   onig_syntax_warn(env, "invalid Unicode Property \\%c", c);  | 
3761  | 0  |       }  | 
3762  | 0  |       break;  | 
3763  |  |  | 
3764  | 0  |     case 'R':  | 
3765  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK)) { | 
3766  | 0  |   tok->type = TK_LINEBREAK;  | 
3767  | 0  |       }  | 
3768  | 0  |       break;  | 
3769  |  |  | 
3770  | 0  |     case 'X':  | 
3771  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER)) { | 
3772  | 0  |   tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;  | 
3773  | 0  |       }  | 
3774  | 0  |       break;  | 
3775  |  |  | 
3776  | 0  |     case 'K':  | 
3777  | 0  |       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) { | 
3778  | 0  |   tok->type = TK_KEEP;  | 
3779  | 0  |       }  | 
3780  | 0  |       break;  | 
3781  |  |  | 
3782  | 2.05k  |     default:  | 
3783  | 2.05k  |       { | 
3784  | 2.05k  |   OnigCodePoint c2;  | 
3785  |  |  | 
3786  | 2.05k  |   PUNFETCH;  | 
3787  | 2.05k  |   num = fetch_escaped_value(&p, end, env, &c2);  | 
3788  | 2.05k  |   if (num < 0) return num;  | 
3789  |  |   /* set_raw: */  | 
3790  | 2.05k  |   if ((OnigCodePoint )tok->u.c != c2) { | 
3791  | 0  |     tok->type = TK_CODE_POINT;  | 
3792  | 0  |     tok->u.code = (OnigCodePoint )c2;  | 
3793  | 0  |   }  | 
3794  | 2.05k  |   else { /* string */ | 
3795  | 2.05k  |     p = tok->backp + enclen(enc, tok->backp, end);  | 
3796  | 2.05k  |   }  | 
3797  | 2.05k  |       }  | 
3798  | 0  |       break;  | 
3799  | 7.39k  |     }  | 
3800  | 7.39k  |   }  | 
3801  | 195k  |   else { | 
3802  | 195k  |     tok->u.c = c;  | 
3803  | 195k  |     tok->escaped = 0;  | 
3804  |  |  | 
3805  | 195k  | #ifdef USE_VARIABLE_META_CHARS  | 
3806  | 195k  |     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&  | 
3807  | 195k  |   IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { | 
3808  | 0  |       if (c == MC_ANYCHAR(syn))  | 
3809  | 0  |   goto any_char;  | 
3810  | 0  |       else if (c == MC_ANYTIME(syn))  | 
3811  | 0  |   goto anytime;  | 
3812  | 0  |       else if (c == MC_ZERO_OR_ONE_TIME(syn))  | 
3813  | 0  |   goto zero_or_one_time;  | 
3814  | 0  |       else if (c == MC_ONE_OR_MORE_TIME(syn))  | 
3815  | 0  |   goto one_or_more_time;  | 
3816  | 0  |       else if (c == MC_ANYCHAR_ANYTIME(syn)) { | 
3817  | 0  |   tok->type = TK_ANYCHAR_ANYTIME;  | 
3818  | 0  |   goto out;  | 
3819  | 0  |       }  | 
3820  | 0  |     }  | 
3821  | 195k  | #endif  | 
3822  |  |  | 
3823  | 195k  |     switch (c) { | 
3824  | 4.11k  |     case '.':  | 
3825  | 4.11k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;  | 
3826  | 4.11k  | #ifdef USE_VARIABLE_META_CHARS  | 
3827  | 4.11k  |     any_char:  | 
3828  | 4.11k  | #endif  | 
3829  | 4.11k  |       tok->type = TK_ANYCHAR;  | 
3830  | 4.11k  |       break;  | 
3831  |  |  | 
3832  | 4.52k  |     case '*':  | 
3833  | 4.52k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;  | 
3834  | 4.52k  | #ifdef USE_VARIABLE_META_CHARS  | 
3835  | 4.52k  |     anytime:  | 
3836  | 4.52k  | #endif  | 
3837  | 4.52k  |       tok->type = TK_OP_REPEAT;  | 
3838  | 4.52k  |       tok->u.repeat.lower = 0;  | 
3839  | 4.52k  |       tok->u.repeat.upper = REPEAT_INFINITE;  | 
3840  | 4.52k  |       goto greedy_check;  | 
3841  | 0  |       break;  | 
3842  |  |  | 
3843  | 7.39k  |     case '+':  | 
3844  | 7.39k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;  | 
3845  | 7.39k  | #ifdef USE_VARIABLE_META_CHARS  | 
3846  | 7.39k  |     one_or_more_time:  | 
3847  | 7.39k  | #endif  | 
3848  | 7.39k  |       tok->type = TK_OP_REPEAT;  | 
3849  | 7.39k  |       tok->u.repeat.lower = 1;  | 
3850  | 7.39k  |       tok->u.repeat.upper = REPEAT_INFINITE;  | 
3851  | 7.39k  |       goto greedy_check;  | 
3852  | 0  |       break;  | 
3853  |  |  | 
3854  | 411  |     case '?':  | 
3855  | 411  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;  | 
3856  | 411  | #ifdef USE_VARIABLE_META_CHARS  | 
3857  | 411  |     zero_or_one_time:  | 
3858  | 411  | #endif  | 
3859  | 411  |       tok->type = TK_OP_REPEAT;  | 
3860  | 411  |       tok->u.repeat.lower = 0;  | 
3861  | 411  |       tok->u.repeat.upper = 1;  | 
3862  | 411  |       goto greedy_check;  | 
3863  | 0  |       break;  | 
3864  |  |  | 
3865  | 0  |     case '{': | 
3866  | 0  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;  | 
3867  | 0  |       r = fetch_range_quantifier(&p, end, tok, env);  | 
3868  | 0  |       if (r < 0) return r;  /* error */  | 
3869  | 0  |       if (r == 0) goto greedy_check;  | 
3870  | 0  |       else if (r == 2) { /* {n} */ | 
3871  | 0  |   if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))  | 
3872  | 0  |     goto possessive_check;  | 
3873  |  |  | 
3874  | 0  |   goto greedy_check;  | 
3875  | 0  |       }  | 
3876  |  |       /* r == 1 : normal char */  | 
3877  | 0  |       break;  | 
3878  |  |  | 
3879  | 3.28k  |     case '|':  | 
3880  | 3.28k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;  | 
3881  | 3.28k  |       tok->type = TK_ALT;  | 
3882  | 3.28k  |       break;  | 
3883  |  |  | 
3884  | 4.93k  |     case '(': | 
3885  | 4.93k  |       if (PPEEK_IS('?') && | 
3886  | 4.93k  |     IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { | 
3887  | 4.52k  |   PINC;  | 
3888  | 4.52k  |   if (PPEEK_IS('#')) { | 
3889  | 0  |     PFETCH(c);  | 
3890  | 0  |     while (1) { | 
3891  | 0  |       if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;  | 
3892  | 0  |       PFETCH(c);  | 
3893  | 0  |       if (c == MC_ESC(syn)) { | 
3894  | 0  |         if (!PEND) PFETCH(c);  | 
3895  | 0  |       }  | 
3896  | 0  |       else { | 
3897  | 0  |         if (c == ')') break;  | 
3898  | 0  |       }  | 
3899  | 0  |     }  | 
3900  | 0  |     goto start;  | 
3901  | 0  |   }  | 
3902  | 4.52k  | #ifdef USE_PERL_SUBEXP_CALL  | 
3903  |  |   /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */  | 
3904  | 4.52k  |   c = PPEEK;  | 
3905  | 4.52k  |   if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&  | 
3906  | 4.52k  |       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { | 
3907  |  |     /* (?&name), (?n), (?R), (?0) */  | 
3908  | 0  |     int gnum;  | 
3909  | 0  |     UChar *name;  | 
3910  | 0  |     UChar *name_end;  | 
3911  |  | 
  | 
3912  | 0  |     if (c == 'R' || c == '0') { | 
3913  | 0  |       PINC;   /* skip 'R' / '0' */  | 
3914  | 0  |       if (!PPEEK_IS(')')) { | 
3915  | 0  |         r = ONIGERR_INVALID_GROUP_NAME;  | 
3916  | 0  |         onig_scan_env_set_error_string(env, r, p - 1, p + 1);  | 
3917  | 0  |         return r;  | 
3918  | 0  |       }  | 
3919  | 0  |       PINC;   /* skip ')' */  | 
3920  | 0  |       name_end = name = p;  | 
3921  | 0  |       gnum = 0;  | 
3922  | 0  |     }  | 
3923  | 0  |     else { | 
3924  | 0  |       int numref = 1;  | 
3925  | 0  |       if (c == '&') {     /* (?&name) */ | 
3926  | 0  |         PINC;  | 
3927  | 0  |         numref = 0;       /* don't allow number name */  | 
3928  | 0  |       }  | 
3929  | 0  |       name = p;  | 
3930  | 0  |       r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref); | 
3931  | 0  |       if (r < 0) return r;  | 
3932  | 0  |     }  | 
3933  |  |  | 
3934  | 0  |     tok->type = TK_CALL;  | 
3935  | 0  |     tok->u.call.name     = name;  | 
3936  | 0  |     tok->u.call.name_end = name_end;  | 
3937  | 0  |     tok->u.call.gnum     = gnum;  | 
3938  | 0  |     tok->u.call.rel      = 0;  | 
3939  | 0  |     break;  | 
3940  | 0  |   }  | 
3941  | 4.52k  |   else if ((c == '-' || c == '+') &&  | 
3942  | 4.52k  |       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_SUBEXP_CALL)) { | 
3943  |  |     /* (?+n), (?-n) */  | 
3944  | 0  |     int gnum;  | 
3945  | 0  |     UChar *name;  | 
3946  | 0  |     UChar *name_end;  | 
3947  | 0  |     OnigCodePoint cnext;  | 
3948  | 0  |     PFETCH_READY;  | 
3949  |  | 
  | 
3950  | 0  |     PINC;     /* skip '-' / '+' */  | 
3951  | 0  |     cnext = PPEEK;  | 
3952  | 0  |     if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) { | 
3953  | 0  |       if (c == '-') PUNFETCH;  | 
3954  | 0  |       name = p;  | 
3955  | 0  |       r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1); | 
3956  | 0  |       if (r < 0) return r;  | 
3957  |  |  | 
3958  | 0  |       tok->type = TK_CALL;  | 
3959  | 0  |       tok->u.call.name     = name;  | 
3960  | 0  |       tok->u.call.name_end = name_end;  | 
3961  | 0  |       tok->u.call.gnum     = gnum;  | 
3962  | 0  |       tok->u.call.rel      = 1;  | 
3963  | 0  |       break;  | 
3964  | 0  |     }  | 
3965  | 0  |   }  | 
3966  | 4.52k  | #endif /* USE_PERL_SUBEXP_CALL */  | 
3967  | 4.52k  | #ifdef USE_CAPITAL_P_NAMED_GROUP  | 
3968  | 4.52k  |   if (PPEEK_IS('P') && | 
3969  | 4.52k  |       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { | 
3970  | 0  |     int gnum;  | 
3971  | 0  |     UChar *name;  | 
3972  | 0  |     UChar *name_end;  | 
3973  | 0  |     PFETCH_READY;  | 
3974  |  | 
  | 
3975  | 0  |     PINC;     /* skip 'P' */  | 
3976  | 0  |     if (PEND) return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
3977  | 0  |     PFETCH(c);  | 
3978  | 0  |     if (c == '=') {       /* (?P=name): backref */ | 
3979  | 0  |       r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env); | 
3980  | 0  |       if (r < 0) return r;  | 
3981  | 0  |       break;  | 
3982  | 0  |     }  | 
3983  | 0  |     else if (c == '>') {  /* (?P>name): subexp call */ | 
3984  | 0  |       name = p;  | 
3985  | 0  |       r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0); | 
3986  | 0  |       if (r < 0) return r;  | 
3987  |  |  | 
3988  | 0  |       tok->type = TK_CALL;  | 
3989  | 0  |       tok->u.call.name     = name;  | 
3990  | 0  |       tok->u.call.name_end = name_end;  | 
3991  | 0  |       tok->u.call.gnum     = gnum;  | 
3992  | 0  |       tok->u.call.rel      = 0;  | 
3993  | 0  |       break;  | 
3994  | 0  |     }  | 
3995  | 0  |   }  | 
3996  | 4.52k  | #endif /* USE_CAPITAL_P_NAMED_GROUP */  | 
3997  | 4.52k  |   PUNFETCH;  | 
3998  | 4.52k  |       }  | 
3999  |  |  | 
4000  | 4.93k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;  | 
4001  | 4.93k  |       tok->type = TK_SUBEXP_OPEN;  | 
4002  | 4.93k  |       break;  | 
4003  |  |  | 
4004  | 4.93k  |     case ')':  | 
4005  | 4.93k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;  | 
4006  | 4.93k  |       tok->type = TK_SUBEXP_CLOSE;  | 
4007  | 4.93k  |       break;  | 
4008  |  |  | 
4009  | 8.22k  |     case '^':  | 
4010  | 8.22k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;  | 
4011  | 8.22k  |       tok->type = TK_ANCHOR;  | 
4012  | 8.22k  |       tok->u.anchor.subtype = (IS_SINGLELINE(env->option)  | 
4013  | 8.22k  |              ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);  | 
4014  | 8.22k  |       break;  | 
4015  |  |  | 
4016  | 3.28k  |     case '$':  | 
4017  | 3.28k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;  | 
4018  | 3.28k  |       tok->type = TK_ANCHOR;  | 
4019  | 3.28k  |       tok->u.anchor.subtype = (IS_SINGLELINE(env->option)  | 
4020  | 3.28k  |              ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);  | 
4021  | 3.28k  |       break;  | 
4022  |  |  | 
4023  | 6.16k  |     case '[':  | 
4024  | 6.16k  |       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;  | 
4025  | 6.16k  |       tok->type = TK_CC_OPEN;  | 
4026  | 6.16k  |       break;  | 
4027  |  |  | 
4028  | 0  |     case ']':  | 
4029  | 0  |       if (*src > env->pattern)   /* /].../ is allowed. */  | 
4030  | 0  |   CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");  | 
4031  | 0  |       break;  | 
4032  |  |  | 
4033  | 0  |     case '#':  | 
4034  | 0  |       if (IS_EXTEND(env->option)) { | 
4035  | 0  |   while (!PEND) { | 
4036  | 0  |     PFETCH(c);  | 
4037  | 0  |     if (ONIGENC_IS_CODE_NEWLINE(enc, c))  | 
4038  | 0  |       break;  | 
4039  | 0  |   }  | 
4040  | 0  |   goto start;  | 
4041  | 0  |   break;  | 
4042  | 0  |       }  | 
4043  | 0  |       break;  | 
4044  |  |  | 
4045  | 19.3k  |     case ' ': case '\t': case '\n': case '\r': case '\f':  | 
4046  | 19.3k  |       if (IS_EXTEND(env->option))  | 
4047  | 0  |   goto start;  | 
4048  | 19.3k  |       break;  | 
4049  |  |  | 
4050  | 128k  |     default:  | 
4051  |  |       /* string */  | 
4052  | 128k  |       break;  | 
4053  | 195k  |     }  | 
4054  | 195k  |   }  | 
4055  |  |  | 
4056  | 202k  | #ifdef USE_VARIABLE_META_CHARS  | 
4057  | 202k  |  out:  | 
4058  | 202k  | #endif  | 
4059  | 202k  |   *src = p;  | 
4060  | 202k  |   return tok->type;  | 
4061  | 202k  | }  | 
4062  |  |  | 
4063  |  | static int  | 
4064  |  | add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,  | 
4065  |  |                          ScanEnv* env,  | 
4066  |  |                          OnigCodePoint sb_out, const OnigCodePoint mbr[])  | 
4067  | 5.34k  | { | 
4068  | 5.34k  |   int i, r;  | 
4069  | 5.34k  |   OnigCodePoint j;  | 
4070  |  |  | 
4071  | 5.34k  |   int n = ONIGENC_CODE_RANGE_NUM(mbr);  | 
4072  |  |  | 
4073  | 5.34k  |   if (not == 0) { | 
4074  | 14.3k  |     for (i = 0; i < n; i++) { | 
4075  | 14.3k  |       for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);  | 
4076  | 53.0k  |     j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { | 
4077  | 43.9k  |   if (j >= sb_out) { | 
4078  | 5.34k  |     if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { | 
4079  | 0  |       r = add_code_range_to_buf(&(cc->mbuf), env, j,  | 
4080  | 0  |               ONIGENC_CODE_RANGE_TO(mbr, i));  | 
4081  | 0  |       if (r != 0) return r;  | 
4082  | 0  |       i++;  | 
4083  | 0  |     }  | 
4084  |  |  | 
4085  | 5.34k  |     goto sb_end;  | 
4086  | 5.34k  |   }  | 
4087  | 38.6k  |   BITSET_SET_BIT_CHKDUP(cc->bs, j);  | 
4088  | 38.6k  |       }  | 
4089  | 14.3k  |     }  | 
4090  |  |  | 
4091  | 5.34k  |   sb_end:  | 
4092  | 138k  |     for ( ; i < n; i++) { | 
4093  | 133k  |       r = add_code_range_to_buf(&(cc->mbuf), env,  | 
4094  | 133k  |                                 ONIGENC_CODE_RANGE_FROM(mbr, i),  | 
4095  | 133k  |                                 ONIGENC_CODE_RANGE_TO(mbr, i));  | 
4096  | 133k  |       if (r != 0) return r;  | 
4097  | 133k  |     }  | 
4098  | 5.34k  |   }  | 
4099  | 0  |   else { | 
4100  | 0  |     OnigCodePoint prev = 0;  | 
4101  |  | 
  | 
4102  | 0  |     for (i = 0; i < n; i++) { | 
4103  | 0  |       for (j = prev;  | 
4104  | 0  |      j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { | 
4105  | 0  |   if (j >= sb_out) { | 
4106  | 0  |     goto sb_end2;  | 
4107  | 0  |   }  | 
4108  | 0  |   BITSET_SET_BIT_CHKDUP(cc->bs, j);  | 
4109  | 0  |       }  | 
4110  | 0  |       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;  | 
4111  | 0  |     }  | 
4112  | 0  |     for (j = prev; j < sb_out; j++) { | 
4113  | 0  |       BITSET_SET_BIT_CHKDUP(cc->bs, j);  | 
4114  | 0  |     }  | 
4115  |  | 
  | 
4116  | 0  |   sb_end2:  | 
4117  | 0  |     prev = sb_out;  | 
4118  |  | 
  | 
4119  | 0  |     for (i = 0; i < n; i++) { | 
4120  | 0  |       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { | 
4121  | 0  |   r = add_code_range_to_buf(&(cc->mbuf), env, prev,  | 
4122  | 0  |                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);  | 
4123  | 0  |   if (r != 0) return r;  | 
4124  | 0  |       }  | 
4125  | 0  |       prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;  | 
4126  | 0  |     }  | 
4127  | 0  |     if (prev < 0x7fffffff) { | 
4128  | 0  |       r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);  | 
4129  | 0  |       if (r != 0) return r;  | 
4130  | 0  |     }  | 
4131  | 0  |   }  | 
4132  |  |  | 
4133  | 5.34k  |   return 0;  | 
4134  | 5.34k  | }  | 
4135  |  |  | 
4136  |  | static int  | 
4137  |  | add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)  | 
4138  | 5.34k  | { | 
4139  | 5.34k  |   int maxcode;  | 
4140  | 5.34k  |   int c, r;  | 
4141  | 5.34k  |   const OnigCodePoint *ranges;  | 
4142  | 5.34k  |   OnigCodePoint sb_out;  | 
4143  | 5.34k  |   OnigEncoding enc = env->enc;  | 
4144  |  |  | 
4145  | 5.34k  |   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);  | 
4146  | 5.34k  |   if (r == 0) { | 
4147  | 5.34k  |     if (ascii_range) { | 
4148  | 5.34k  |       CClassNode ccwork;  | 
4149  | 5.34k  |       initialize_cclass(&ccwork);  | 
4150  | 5.34k  |       r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,  | 
4151  | 5.34k  |            ranges);  | 
4152  | 5.34k  |       if (r == 0) { | 
4153  | 5.34k  |   if (not) { | 
4154  | 0  |     r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);  | 
4155  | 0  |   }  | 
4156  | 5.34k  |   else { | 
4157  | 5.34k  |     CClassNode ccascii;  | 
4158  | 5.34k  |     initialize_cclass(&ccascii);  | 
4159  | 5.34k  |     if (ONIGENC_MBC_MINLEN(env->enc) > 1) { | 
4160  | 0  |       r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);  | 
4161  | 0  |     }  | 
4162  | 5.34k  |     else { | 
4163  | 5.34k  |       bitset_set_range(env, ccascii.bs, 0x00, 0x7F);  | 
4164  | 5.34k  |       r = 0;  | 
4165  | 5.34k  |     }  | 
4166  | 5.34k  |     if (r == 0) { | 
4167  | 5.34k  |       r = and_cclass(&ccwork, &ccascii, env);  | 
4168  | 5.34k  |     }  | 
4169  | 5.34k  |     if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);  | 
4170  | 5.34k  |   }  | 
4171  | 5.34k  |   if (r == 0) { | 
4172  | 5.34k  |     r = or_cclass(cc, &ccwork, env);  | 
4173  | 5.34k  |   }  | 
4174  | 5.34k  |   if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);  | 
4175  | 5.34k  |       }  | 
4176  | 5.34k  |     }  | 
4177  | 0  |     else { | 
4178  | 0  |       r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);  | 
4179  | 0  |     }  | 
4180  | 5.34k  |     return r;  | 
4181  | 5.34k  |   }  | 
4182  | 0  |   else if (r != ONIG_NO_SUPPORT_CONFIG) { | 
4183  | 0  |     return r;  | 
4184  | 0  |   }  | 
4185  |  |  | 
4186  | 0  |   maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;  | 
4187  | 0  |   r = 0;  | 
4188  | 0  |   switch (ctype) { | 
4189  | 0  |   case ONIGENC_CTYPE_ALPHA:  | 
4190  | 0  |   case ONIGENC_CTYPE_BLANK:  | 
4191  | 0  |   case ONIGENC_CTYPE_CNTRL:  | 
4192  | 0  |   case ONIGENC_CTYPE_DIGIT:  | 
4193  | 0  |   case ONIGENC_CTYPE_LOWER:  | 
4194  | 0  |   case ONIGENC_CTYPE_PUNCT:  | 
4195  | 0  |   case ONIGENC_CTYPE_SPACE:  | 
4196  | 0  |   case ONIGENC_CTYPE_UPPER:  | 
4197  | 0  |   case ONIGENC_CTYPE_XDIGIT:  | 
4198  | 0  |   case ONIGENC_CTYPE_ASCII:  | 
4199  | 0  |   case ONIGENC_CTYPE_ALNUM:  | 
4200  | 0  |     if (not != 0) { | 
4201  | 0  |       for (c = 0; c < SINGLE_BYTE_SIZE; c++) { | 
4202  | 0  |   if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))  | 
4203  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4204  | 0  |       }  | 
4205  | 0  |       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);  | 
4206  | 0  |     }  | 
4207  | 0  |     else { | 
4208  | 0  |       for (c = 0; c < SINGLE_BYTE_SIZE; c++) { | 
4209  | 0  |   if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))  | 
4210  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4211  | 0  |       }  | 
4212  | 0  |     }  | 
4213  | 0  |     break;  | 
4214  |  |  | 
4215  | 0  |   case ONIGENC_CTYPE_GRAPH:  | 
4216  | 0  |   case ONIGENC_CTYPE_PRINT:  | 
4217  | 0  |     if (not != 0) { | 
4218  | 0  |       for (c = 0; c < SINGLE_BYTE_SIZE; c++) { | 
4219  | 0  |   if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)  | 
4220  | 0  |       || c >= maxcode)  | 
4221  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4222  | 0  |       }  | 
4223  | 0  |       if (ascii_range)  | 
4224  | 0  |   ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);  | 
4225  | 0  |     }  | 
4226  | 0  |     else { | 
4227  | 0  |       for (c = 0; c < maxcode; c++) { | 
4228  | 0  |   if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))  | 
4229  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4230  | 0  |       }  | 
4231  | 0  |       if (! ascii_range)  | 
4232  | 0  |   ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);  | 
4233  | 0  |     }  | 
4234  | 0  |     break;  | 
4235  |  |  | 
4236  | 0  |   case ONIGENC_CTYPE_WORD:  | 
4237  | 0  |     if (not == 0) { | 
4238  | 0  |       for (c = 0; c < maxcode; c++) { | 
4239  | 0  |   if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4240  | 0  |       }  | 
4241  | 0  |       if (! ascii_range)  | 
4242  | 0  |   ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);  | 
4243  | 0  |     }  | 
4244  | 0  |     else { | 
4245  | 0  |       for (c = 0; c < SINGLE_BYTE_SIZE; c++) { | 
4246  | 0  |   if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */  | 
4247  | 0  |       && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))  | 
4248  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, c);  | 
4249  | 0  |       }  | 
4250  | 0  |       if (ascii_range)  | 
4251  | 0  |   ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);  | 
4252  | 0  |     }  | 
4253  | 0  |     break;  | 
4254  |  |  | 
4255  | 0  |   default:  | 
4256  | 0  |     return ONIGERR_PARSER_BUG;  | 
4257  | 0  |     break;  | 
4258  | 0  |   }  | 
4259  |  |  | 
4260  | 0  |   return r;  | 
4261  | 0  | }  | 
4262  |  |  | 
4263  |  | static int  | 
4264  |  | parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,  | 
4265  |  |         UChar** src, UChar* end, ScanEnv* env)  | 
4266  | 0  | { | 
4267  | 0  | #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20  | 
4268  | 0  | #define POSIX_BRACKET_NAME_MIN_LEN         4  | 
4269  |  | 
  | 
4270  | 0  |   static const PosixBracketEntryType PBS[] = { | 
4271  | 0  |     POSIX_BRACKET_ENTRY_INIT("alnum",  ONIGENC_CTYPE_ALNUM), | 
4272  | 0  |     POSIX_BRACKET_ENTRY_INIT("alpha",  ONIGENC_CTYPE_ALPHA), | 
4273  | 0  |     POSIX_BRACKET_ENTRY_INIT("blank",  ONIGENC_CTYPE_BLANK), | 
4274  | 0  |     POSIX_BRACKET_ENTRY_INIT("cntrl",  ONIGENC_CTYPE_CNTRL), | 
4275  | 0  |     POSIX_BRACKET_ENTRY_INIT("digit",  ONIGENC_CTYPE_DIGIT), | 
4276  | 0  |     POSIX_BRACKET_ENTRY_INIT("graph",  ONIGENC_CTYPE_GRAPH), | 
4277  | 0  |     POSIX_BRACKET_ENTRY_INIT("lower",  ONIGENC_CTYPE_LOWER), | 
4278  | 0  |     POSIX_BRACKET_ENTRY_INIT("print",  ONIGENC_CTYPE_PRINT), | 
4279  | 0  |     POSIX_BRACKET_ENTRY_INIT("punct",  ONIGENC_CTYPE_PUNCT), | 
4280  | 0  |     POSIX_BRACKET_ENTRY_INIT("space",  ONIGENC_CTYPE_SPACE), | 
4281  | 0  |     POSIX_BRACKET_ENTRY_INIT("upper",  ONIGENC_CTYPE_UPPER), | 
4282  | 0  |     POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT), | 
4283  | 0  |     POSIX_BRACKET_ENTRY_INIT("ascii",  ONIGENC_CTYPE_ASCII), | 
4284  | 0  |     POSIX_BRACKET_ENTRY_INIT("word",   ONIGENC_CTYPE_WORD), | 
4285  | 0  |   };  | 
4286  |  | 
  | 
4287  | 0  |   const PosixBracketEntryType *pb;  | 
4288  | 0  |   int not, i, r;  | 
4289  | 0  |   int ascii_range;  | 
4290  | 0  |   OnigCodePoint c;  | 
4291  | 0  |   OnigEncoding enc = env->enc;  | 
4292  | 0  |   UChar *p = *src;  | 
4293  |  | 
  | 
4294  | 0  |   if (PPEEK_IS('^')) { | 
4295  | 0  |     PINC_S;  | 
4296  | 0  |     not = 1;  | 
4297  | 0  |   }  | 
4298  | 0  |   else  | 
4299  | 0  |     not = 0;  | 
4300  |  | 
  | 
4301  | 0  |   if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)  | 
4302  | 0  |     goto not_posix_bracket;  | 
4303  |  |  | 
4304  | 0  |   ascii_range = IS_ASCII_RANGE(env->option) &&  | 
4305  | 0  |       ! IS_POSIX_BRACKET_ALL_RANGE(env->option);  | 
4306  | 0  |   for (pb = PBS; pb < PBS + numberof(PBS); pb++) { | 
4307  | 0  |     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { | 
4308  | 0  |       p = (UChar* )onigenc_step(enc, p, end, pb->len);  | 
4309  | 0  |       if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)  | 
4310  | 0  |   return ONIGERR_INVALID_POSIX_BRACKET_TYPE;  | 
4311  |  |  | 
4312  | 0  |       r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);  | 
4313  | 0  |       if (r != 0) return r;  | 
4314  |  |  | 
4315  | 0  |       if (IS_NOT_NULL(asc_cc)) { | 
4316  | 0  |   if (pb->ctype != ONIGENC_CTYPE_WORD &&  | 
4317  | 0  |       pb->ctype != ONIGENC_CTYPE_ASCII &&  | 
4318  | 0  |       !ascii_range)  | 
4319  | 0  |     r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);  | 
4320  | 0  |   if (r != 0) return r;  | 
4321  | 0  |       }  | 
4322  |  |  | 
4323  | 0  |       PINC_S; PINC_S;  | 
4324  | 0  |       *src = p;  | 
4325  | 0  |       return 0;  | 
4326  | 0  |     }  | 
4327  | 0  |   }  | 
4328  |  |  | 
4329  | 0  |  not_posix_bracket:  | 
4330  | 0  |   c = 0;  | 
4331  | 0  |   i = 0;  | 
4332  | 0  |   while (!PEND && ((c = PPEEK) != ':') && c != ']') { | 
4333  | 0  |     PINC_S;  | 
4334  | 0  |     if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;  | 
4335  | 0  |   }  | 
4336  | 0  |   if (c == ':' && ! PEND) { | 
4337  | 0  |     PINC_S;  | 
4338  | 0  |     if (! PEND) { | 
4339  | 0  |       PFETCH_S(c);  | 
4340  | 0  |       if (c == ']')  | 
4341  | 0  |   return ONIGERR_INVALID_POSIX_BRACKET_TYPE;  | 
4342  | 0  |     }  | 
4343  | 0  |   }  | 
4344  |  |  | 
4345  | 0  |   return 1;  /* 1: is not POSIX bracket, but no error. */  | 
4346  | 0  | }  | 
4347  |  |  | 
4348  |  | static int  | 
4349  |  | fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)  | 
4350  | 0  | { | 
4351  | 0  |   int r;  | 
4352  | 0  |   OnigCodePoint c;  | 
4353  | 0  |   OnigEncoding enc = env->enc;  | 
4354  | 0  |   UChar *prev, *start, *p = *src;  | 
4355  |  | 
  | 
4356  | 0  |   r = 0;  | 
4357  | 0  |   start = prev = p;  | 
4358  |  | 
  | 
4359  | 0  |   while (!PEND) { | 
4360  | 0  |     prev = p;  | 
4361  | 0  |     PFETCH_S(c);  | 
4362  | 0  |     if (c == '}') { | 
4363  | 0  |       r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);  | 
4364  | 0  |       if (r < 0) break;  | 
4365  |  |  | 
4366  | 0  |       *src = p;  | 
4367  | 0  |       return r;  | 
4368  | 0  |     }  | 
4369  | 0  |     else if (c == '(' || c == ')' || c == '{' || c == '|') { | 
4370  | 0  |       r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;  | 
4371  | 0  |       break;  | 
4372  | 0  |     }  | 
4373  | 0  |   }  | 
4374  |  |  | 
4375  | 0  |   onig_scan_env_set_error_string(env, r, *src, prev);  | 
4376  | 0  |   return r;  | 
4377  | 0  | }  | 
4378  |  |  | 
4379  |  | static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);  | 
4380  |  |  | 
4381  |  | static int  | 
4382  |  | parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,  | 
4383  |  |         ScanEnv* env)  | 
4384  | 0  | { | 
4385  | 0  |   int r, ctype;  | 
4386  | 0  |   CClassNode* cc;  | 
4387  |  | 
  | 
4388  | 0  |   ctype = fetch_char_property_to_ctype(src, end, env);  | 
4389  | 0  |   if (ctype < 0) return ctype;  | 
4390  |  |  | 
4391  | 0  |   *np = node_new_cclass();  | 
4392  | 0  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
4393  | 0  |   cc = NCCLASS(*np);  | 
4394  | 0  |   r = add_ctype_to_cc(cc, ctype, 0, 0, env);  | 
4395  | 0  |   if (r != 0) return r;  | 
4396  | 0  |   if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);  | 
4397  |  | 
  | 
4398  | 0  |   if (IS_IGNORECASE(env->option)) { | 
4399  | 0  |     if (ctype != ONIGENC_CTYPE_ASCII)  | 
4400  | 0  |       r = cclass_case_fold(np, cc, cc, env);  | 
4401  | 0  |   }  | 
4402  | 0  |   return r;  | 
4403  | 0  | }  | 
4404  |  |  | 
4405  |  |  | 
4406  |  | enum CCSTATE { | 
4407  |  |   CCS_VALUE,  | 
4408  |  |   CCS_RANGE,  | 
4409  |  |   CCS_COMPLETE,  | 
4410  |  |   CCS_START  | 
4411  |  | };  | 
4412  |  |  | 
4413  |  | enum CCVALTYPE { | 
4414  |  |   CCV_SB,  | 
4415  |  |   CCV_CODE_POINT,  | 
4416  |  |   CCV_CLASS  | 
4417  |  | };  | 
4418  |  |  | 
4419  |  | static int  | 
4420  |  | next_state_class(CClassNode* cc, CClassNode* asc_cc,  | 
4421  |  |      OnigCodePoint* vs, enum CCVALTYPE* type,  | 
4422  |  |      enum CCSTATE* state, ScanEnv* env)  | 
4423  | 1.64k  | { | 
4424  | 1.64k  |   int r;  | 
4425  |  |  | 
4426  | 1.64k  |   if (*state == CCS_RANGE)  | 
4427  | 0  |     return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;  | 
4428  |  |  | 
4429  | 1.64k  |   if (*state == CCS_VALUE && *type != CCV_CLASS) { | 
4430  | 0  |     if (*type == CCV_SB) { | 
4431  | 0  |       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));  | 
4432  | 0  |       if (IS_NOT_NULL(asc_cc))  | 
4433  | 0  |   BITSET_SET_BIT(asc_cc->bs, (int )(*vs));  | 
4434  | 0  |     }  | 
4435  | 0  |     else if (*type == CCV_CODE_POINT) { | 
4436  | 0  |       r = add_code_range(&(cc->mbuf), env, *vs, *vs);  | 
4437  | 0  |       if (r < 0) return r;  | 
4438  | 0  |       if (IS_NOT_NULL(asc_cc)) { | 
4439  | 0  |   r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);  | 
4440  | 0  |   if (r < 0) return r;  | 
4441  | 0  |       }  | 
4442  | 0  |     }  | 
4443  | 0  |   }  | 
4444  |  |  | 
4445  | 1.64k  |   *state = CCS_VALUE;  | 
4446  | 1.64k  |   *type  = CCV_CLASS;  | 
4447  | 1.64k  |   return 0;  | 
4448  | 1.64k  | }  | 
4449  |  |  | 
4450  |  | static int  | 
4451  |  | next_state_val(CClassNode* cc, CClassNode* asc_cc,  | 
4452  |  |          OnigCodePoint *from, OnigCodePoint to,  | 
4453  |  |          int* from_israw, int to_israw,  | 
4454  |  |          enum CCVALTYPE intype, enum CCVALTYPE* type,  | 
4455  |  |          enum CCSTATE* state, ScanEnv* env)  | 
4456  | 20.9k  | { | 
4457  | 20.9k  |   int r;  | 
4458  |  |  | 
4459  | 20.9k  |   switch (*state) { | 
4460  | 16.4k  |   case CCS_VALUE:  | 
4461  | 16.4k  |     if (*type == CCV_SB) { | 
4462  | 14.7k  |       BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from));  | 
4463  | 14.7k  |       if (IS_NOT_NULL(asc_cc))  | 
4464  | 14.7k  |   BITSET_SET_BIT(asc_cc->bs, (int )(*from));  | 
4465  | 14.7k  |     }  | 
4466  | 1.64k  |     else if (*type == CCV_CODE_POINT) { | 
4467  | 0  |       r = add_code_range(&(cc->mbuf), env, *from, *from);  | 
4468  | 0  |       if (r < 0) return r;  | 
4469  | 0  |       if (IS_NOT_NULL(asc_cc)) { | 
4470  | 0  |   r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0);  | 
4471  | 0  |   if (r < 0) return r;  | 
4472  | 0  |       }  | 
4473  | 0  |     }  | 
4474  | 16.4k  |     break;  | 
4475  |  |  | 
4476  | 16.4k  |   case CCS_RANGE:  | 
4477  | 0  |     if (intype == *type) { | 
4478  | 0  |       if (intype == CCV_SB) { | 
4479  | 0  |   if (*from > 0xff || to > 0xff)  | 
4480  | 0  |     return ONIGERR_INVALID_CODE_POINT_VALUE;  | 
4481  |  |  | 
4482  | 0  |   if (*from > to) { | 
4483  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))  | 
4484  | 0  |       goto ccs_range_end;  | 
4485  | 0  |     else  | 
4486  | 0  |       return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;  | 
4487  | 0  |   }  | 
4488  | 0  |   bitset_set_range(env, cc->bs, (int )*from, (int )to);  | 
4489  | 0  |   if (IS_NOT_NULL(asc_cc))  | 
4490  | 0  |     bitset_set_range(env, asc_cc->bs, (int )*from, (int )to);  | 
4491  | 0  |       }  | 
4492  | 0  |       else { | 
4493  | 0  |   r = add_code_range(&(cc->mbuf), env, *from, to);  | 
4494  | 0  |   if (r < 0) return r;  | 
4495  | 0  |   if (IS_NOT_NULL(asc_cc)) { | 
4496  | 0  |     r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0);  | 
4497  | 0  |     if (r < 0) return r;  | 
4498  | 0  |   }  | 
4499  | 0  |       }  | 
4500  | 0  |     }  | 
4501  | 0  |     else { | 
4502  | 0  |       if (*from > to) { | 
4503  | 0  |   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))  | 
4504  | 0  |     goto ccs_range_end;  | 
4505  | 0  |   else  | 
4506  | 0  |     return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;  | 
4507  | 0  |       }  | 
4508  | 0  |       bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));  | 
4509  | 0  |       r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);  | 
4510  | 0  |       if (r < 0) return r;  | 
4511  | 0  |       if (IS_NOT_NULL(asc_cc)) { | 
4512  | 0  |   bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));  | 
4513  | 0  |   r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0);  | 
4514  | 0  |   if (r < 0) return r;  | 
4515  | 0  |       }  | 
4516  | 0  |     }  | 
4517  | 0  |   ccs_range_end:  | 
4518  | 0  |     *state = CCS_COMPLETE;  | 
4519  | 0  |     break;  | 
4520  |  |  | 
4521  | 0  |   case CCS_COMPLETE:  | 
4522  | 4.52k  |   case CCS_START:  | 
4523  | 4.52k  |     *state = CCS_VALUE;  | 
4524  | 4.52k  |     break;  | 
4525  |  |  | 
4526  | 0  |   default:  | 
4527  | 0  |     break;  | 
4528  | 20.9k  |   }  | 
4529  |  |  | 
4530  | 20.9k  |   *from_israw = to_israw;  | 
4531  | 20.9k  |   *from       = to;  | 
4532  | 20.9k  |   *type       = intype;  | 
4533  | 20.9k  |   return 0;  | 
4534  | 20.9k  | }  | 
4535  |  |  | 
4536  |  | static int  | 
4537  |  | code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,  | 
4538  |  |      ScanEnv* env)  | 
4539  | 0  | { | 
4540  | 0  |   int in_esc;  | 
4541  | 0  |   OnigCodePoint code;  | 
4542  | 0  |   OnigEncoding enc = env->enc;  | 
4543  | 0  |   UChar* p = from;  | 
4544  |  | 
  | 
4545  | 0  |   in_esc = 0;  | 
4546  | 0  |   while (! PEND) { | 
4547  | 0  |     if (ignore_escaped && in_esc) { | 
4548  | 0  |       in_esc = 0;  | 
4549  | 0  |     }  | 
4550  | 0  |     else { | 
4551  | 0  |       PFETCH_S(code);  | 
4552  | 0  |       if (code == c) return 1;  | 
4553  | 0  |       if (code == MC_ESC(env->syntax)) in_esc = 1;  | 
4554  | 0  |     }  | 
4555  | 0  |   }  | 
4556  | 0  |   return 0;  | 
4557  | 0  | }  | 
4558  |  |  | 
4559  |  | static int  | 
4560  |  | parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,  | 
4561  |  |      ScanEnv* env)  | 
4562  | 6.16k  | { | 
4563  | 6.16k  |   int r, neg, len, fetched, and_start;  | 
4564  | 6.16k  |   OnigCodePoint v, vs;  | 
4565  | 6.16k  |   UChar *p;  | 
4566  | 6.16k  |   Node* node;  | 
4567  | 6.16k  |   Node* asc_node;  | 
4568  | 6.16k  |   CClassNode *cc, *prev_cc;  | 
4569  | 6.16k  |   CClassNode *asc_cc, *asc_prev_cc;  | 
4570  | 6.16k  |   CClassNode work_cc, asc_work_cc;  | 
4571  |  |  | 
4572  | 6.16k  |   enum CCSTATE state;  | 
4573  | 6.16k  |   enum CCVALTYPE val_type, in_type;  | 
4574  | 6.16k  |   int val_israw, in_israw;  | 
4575  |  |  | 
4576  | 6.16k  |   *np = *asc_np = NULL_NODE;  | 
4577  | 6.16k  |   env->parse_depth++;  | 
4578  | 6.16k  |   if (env->parse_depth > ParseDepthLimit)  | 
4579  | 0  |     return ONIGERR_PARSE_DEPTH_LIMIT_OVER;  | 
4580  | 6.16k  |   prev_cc = asc_prev_cc = (CClassNode* )NULL;  | 
4581  | 6.16k  |   r = fetch_token_in_cc(tok, src, end, env);  | 
4582  | 6.16k  |   if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { | 
4583  | 2.46k  |     neg = 1;  | 
4584  | 2.46k  |     r = fetch_token_in_cc(tok, src, end, env);  | 
4585  | 2.46k  |   }  | 
4586  | 3.69k  |   else { | 
4587  | 3.69k  |     neg = 0;  | 
4588  | 3.69k  |   }  | 
4589  |  |  | 
4590  | 6.16k  |   if (r < 0) return r;  | 
4591  | 6.16k  |   if (r == TK_CC_CLOSE) { | 
4592  | 0  |     if (! code_exist_check((OnigCodePoint )']',  | 
4593  | 0  |                            *src, env->pattern_end, 1, env))  | 
4594  | 0  |       return ONIGERR_EMPTY_CHAR_CLASS;  | 
4595  |  |  | 
4596  | 0  |     CC_ESC_WARN(env, (UChar* )"]");  | 
4597  | 0  |     r = tok->type = TK_CHAR;  /* allow []...] */  | 
4598  | 0  |   }  | 
4599  |  |  | 
4600  | 6.16k  |   *np = node = node_new_cclass();  | 
4601  | 6.16k  |   CHECK_NULL_RETURN_MEMERR(node);  | 
4602  | 6.16k  |   cc = NCCLASS(node);  | 
4603  |  |  | 
4604  | 6.16k  |   if (IS_IGNORECASE(env->option)) { | 
4605  | 0  |     *asc_np = asc_node = node_new_cclass();  | 
4606  | 0  |     CHECK_NULL_RETURN_MEMERR(asc_node);  | 
4607  | 0  |     asc_cc = NCCLASS(asc_node);  | 
4608  | 0  |   }  | 
4609  | 6.16k  |   else { | 
4610  | 6.16k  |     asc_node = NULL_NODE;  | 
4611  | 6.16k  |     asc_cc = NULL;  | 
4612  | 6.16k  |   }  | 
4613  |  |  | 
4614  | 6.16k  |   and_start = 0;  | 
4615  | 6.16k  |   state = CCS_START;  | 
4616  | 6.16k  |   p = *src;  | 
4617  | 22.6k  |   while (r != TK_CC_CLOSE) { | 
4618  | 16.4k  |     fetched = 0;  | 
4619  | 16.4k  |     switch (r) { | 
4620  | 9.86k  |     case TK_CHAR:  | 
4621  | 9.86k  |       if ((tok->u.code >= SINGLE_BYTE_SIZE) ||  | 
4622  | 9.86k  |     (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { | 
4623  | 0  |   in_type = CCV_CODE_POINT;  | 
4624  | 0  |       }  | 
4625  | 9.86k  |       else if (len < 0) { | 
4626  | 0  |   r = len;  | 
4627  | 0  |   goto err;  | 
4628  | 0  |       }  | 
4629  | 9.86k  |       else { | 
4630  | 9.86k  |       sb_char:  | 
4631  | 9.86k  |   in_type = CCV_SB;  | 
4632  | 9.86k  |       }  | 
4633  | 9.86k  |       v = (OnigCodePoint )tok->u.c;  | 
4634  | 9.86k  |       in_israw = 0;  | 
4635  | 9.86k  |       goto val_entry2;  | 
4636  | 0  |       break;  | 
4637  |  |  | 
4638  | 0  |     case TK_RAW_BYTE:  | 
4639  |  |       /* tok->base != 0 : octal or hexadec. */  | 
4640  | 0  |       if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { | 
4641  | 0  |   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];  | 
4642  | 0  |   UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;  | 
4643  | 0  |   UChar* psave = p;  | 
4644  | 0  |   int i, base = tok->base;  | 
4645  |  | 
  | 
4646  | 0  |   buf[0] = (UChar )tok->u.c;  | 
4647  | 0  |   for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { | 
4648  | 0  |     r = fetch_token_in_cc(tok, &p, end, env);  | 
4649  | 0  |     if (r < 0) goto err;  | 
4650  | 0  |     if (r != TK_RAW_BYTE || tok->base != base) { | 
4651  | 0  |       fetched = 1;  | 
4652  | 0  |       break;  | 
4653  | 0  |     }  | 
4654  | 0  |     buf[i] = (UChar )tok->u.c;  | 
4655  | 0  |   }  | 
4656  |  |  | 
4657  | 0  |   if (i < ONIGENC_MBC_MINLEN(env->enc)) { | 
4658  | 0  |     r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;  | 
4659  | 0  |     goto err;  | 
4660  | 0  |   }  | 
4661  |  |  | 
4662  | 0  |   if (env->enc == ONIG_ENCODING_EUC_JP ||  | 
4663  | 0  |     env->enc == ONIG_ENCODING_SJIS) { | 
4664  |  |     /* Strict version of enclen does not handle invalid single code  | 
4665  |  |      * point for SJIS and EUC-JP...*/  | 
4666  | 0  |     len = enclen_approximate(env->enc, buf, buf + i);  | 
4667  | 0  |   }  | 
4668  | 0  |   else { | 
4669  | 0  |     len = enclen(env->enc, buf, buf + i);  | 
4670  | 0  |   }  | 
4671  | 0  |   if (i < len) { | 
4672  | 0  |     r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;  | 
4673  | 0  |     goto err;  | 
4674  | 0  |   }  | 
4675  | 0  |   else if (i > len) { /* fetch back */ | 
4676  | 0  |     p = psave;  | 
4677  | 0  |     for (i = 1; i < len; i++) { | 
4678  | 0  |       (void)fetch_token_in_cc(tok, &p, end, env);  | 
4679  |  |       /* no need to check the retun value (already checked above) */  | 
4680  | 0  |     }  | 
4681  | 0  |     fetched = 0;  | 
4682  | 0  |   }  | 
4683  |  |  | 
4684  | 0  |   if (i == 1) { | 
4685  | 0  |     v = (OnigCodePoint )buf[0];  | 
4686  | 0  |     goto raw_single;  | 
4687  | 0  |   }  | 
4688  | 0  |   else { | 
4689  | 0  |     v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);  | 
4690  | 0  |     in_type = CCV_CODE_POINT;  | 
4691  | 0  |   }  | 
4692  | 0  |       }  | 
4693  | 0  |       else { | 
4694  | 0  |   v = (OnigCodePoint )tok->u.c;  | 
4695  | 0  |       raw_single:  | 
4696  | 0  |   in_type = CCV_SB;  | 
4697  | 0  |       }  | 
4698  | 0  |       in_israw = 1;  | 
4699  | 0  |       goto val_entry2;  | 
4700  | 0  |       break;  | 
4701  |  |  | 
4702  | 4.93k  |     case TK_CODE_POINT:  | 
4703  | 4.93k  |       v = tok->u.code;  | 
4704  | 4.93k  |       in_israw = 1;  | 
4705  | 4.93k  |     val_entry:  | 
4706  | 4.93k  |       len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);  | 
4707  | 4.93k  |       if (len < 0) { | 
4708  | 0  |   r = len;  | 
4709  | 0  |   goto err;  | 
4710  | 0  |       }  | 
4711  | 4.93k  |       in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);  | 
4712  | 14.7k  |     val_entry2:  | 
4713  | 14.7k  |       r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,  | 
4714  | 14.7k  |        &state, env);  | 
4715  | 14.7k  |       if (r != 0) goto err;  | 
4716  | 14.7k  |       break;  | 
4717  |  |  | 
4718  | 14.7k  |     case TK_POSIX_BRACKET_OPEN:  | 
4719  | 0  |       r = parse_posix_bracket(cc, asc_cc, &p, end, env);  | 
4720  | 0  |       if (r < 0) goto err;  | 
4721  | 0  |       if (r == 1) {  /* is not POSIX bracket */ | 
4722  | 0  |   CC_ESC_WARN(env, (UChar* )"[");  | 
4723  | 0  |   p = tok->backp;  | 
4724  | 0  |   v = (OnigCodePoint )tok->u.c;  | 
4725  | 0  |   in_israw = 0;  | 
4726  | 0  |   goto val_entry;  | 
4727  | 0  |       }  | 
4728  | 0  |       goto next_class;  | 
4729  | 0  |       break;  | 
4730  |  |  | 
4731  | 1.64k  |     case TK_CHAR_TYPE:  | 
4732  | 1.64k  |       r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,  | 
4733  | 1.64k  |         IS_ASCII_RANGE(env->option), env);  | 
4734  | 1.64k  |       if (r != 0) return r;  | 
4735  | 1.64k  |       if (IS_NOT_NULL(asc_cc)) { | 
4736  | 0  |   if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)  | 
4737  | 0  |     r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,  | 
4738  | 0  |             IS_ASCII_RANGE(env->option), env);  | 
4739  | 0  |   if (r != 0) return r;  | 
4740  | 0  |       }  | 
4741  |  |  | 
4742  | 1.64k  |     next_class:  | 
4743  | 1.64k  |       r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);  | 
4744  | 1.64k  |       if (r != 0) goto err;  | 
4745  | 1.64k  |       break;  | 
4746  |  |  | 
4747  | 1.64k  |     case TK_CHAR_PROPERTY:  | 
4748  | 0  |       { | 
4749  | 0  |   int ctype;  | 
4750  |  | 
  | 
4751  | 0  |   ctype = fetch_char_property_to_ctype(&p, end, env);  | 
4752  | 0  |   if (ctype < 0) return ctype;  | 
4753  | 0  |   r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);  | 
4754  | 0  |   if (r != 0) return r;  | 
4755  | 0  |   if (IS_NOT_NULL(asc_cc)) { | 
4756  | 0  |     if (ctype != ONIGENC_CTYPE_ASCII)  | 
4757  | 0  |       r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);  | 
4758  | 0  |     if (r != 0) return r;  | 
4759  | 0  |   }  | 
4760  | 0  |   goto next_class;  | 
4761  | 0  |       }  | 
4762  | 0  |       break;  | 
4763  |  |  | 
4764  | 0  |     case TK_CC_RANGE:  | 
4765  | 0  |       if (state == CCS_VALUE) { | 
4766  | 0  |   r = fetch_token_in_cc(tok, &p, end, env);  | 
4767  | 0  |   if (r < 0) goto err;  | 
4768  | 0  |   fetched = 1;  | 
4769  | 0  |   if (r == TK_CC_CLOSE) { /* allow [x-] */ | 
4770  | 0  |   range_end_val:  | 
4771  | 0  |     v = (OnigCodePoint )'-';  | 
4772  | 0  |     in_israw = 0;  | 
4773  | 0  |     goto val_entry;  | 
4774  | 0  |   }  | 
4775  | 0  |   else if (r == TK_CC_AND) { | 
4776  | 0  |     CC_ESC_WARN(env, (UChar* )"-");  | 
4777  | 0  |     goto range_end_val;  | 
4778  | 0  |   }  | 
4779  |  |  | 
4780  | 0  |   if (val_type == CCV_CLASS) { | 
4781  | 0  |     r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;  | 
4782  | 0  |     goto err;  | 
4783  | 0  |   }  | 
4784  |  |  | 
4785  | 0  |   state = CCS_RANGE;  | 
4786  | 0  |       }  | 
4787  | 0  |       else if (state == CCS_START) { | 
4788  |  |   /* [-xa] is allowed */  | 
4789  | 0  |   v = (OnigCodePoint )tok->u.c;  | 
4790  | 0  |   in_israw = 0;  | 
4791  |  | 
  | 
4792  | 0  |   r = fetch_token_in_cc(tok, &p, end, env);  | 
4793  | 0  |   if (r < 0) goto err;  | 
4794  | 0  |   fetched = 1;  | 
4795  |  |   /* [--x] or [a&&-x] is warned. */  | 
4796  | 0  |   if (r == TK_CC_RANGE || and_start != 0)  | 
4797  | 0  |     CC_ESC_WARN(env, (UChar* )"-");  | 
4798  |  | 
  | 
4799  | 0  |   goto val_entry;  | 
4800  | 0  |       }  | 
4801  | 0  |       else if (state == CCS_RANGE) { | 
4802  | 0  |   CC_ESC_WARN(env, (UChar* )"-");  | 
4803  | 0  |   goto sb_char;  /* [!--x] is allowed */  | 
4804  | 0  |       }  | 
4805  | 0  |       else { /* CCS_COMPLETE */ | 
4806  | 0  |   r = fetch_token_in_cc(tok, &p, end, env);  | 
4807  | 0  |   if (r < 0) goto err;  | 
4808  | 0  |   fetched = 1;  | 
4809  | 0  |   if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */  | 
4810  | 0  |   else if (r == TK_CC_AND) { | 
4811  | 0  |     CC_ESC_WARN(env, (UChar* )"-");  | 
4812  | 0  |     goto range_end_val;  | 
4813  | 0  |   }  | 
4814  |  |  | 
4815  | 0  |   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { | 
4816  | 0  |     CC_ESC_WARN(env, (UChar* )"-");  | 
4817  | 0  |     goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */  | 
4818  | 0  |   }  | 
4819  | 0  |   r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;  | 
4820  | 0  |   goto err;  | 
4821  | 0  |       }  | 
4822  | 0  |       break;  | 
4823  |  |  | 
4824  | 0  |     case TK_CC_CC_OPEN: /* [ */  | 
4825  | 0  |       { | 
4826  | 0  |   Node *anode, *aasc_node;  | 
4827  | 0  |   CClassNode* acc;  | 
4828  |  | 
  | 
4829  | 0  |   r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);  | 
4830  | 0  |   if (r == 0) { | 
4831  | 0  |     acc = NCCLASS(anode);  | 
4832  | 0  |     r = or_cclass(cc, acc, env);  | 
4833  | 0  |   }  | 
4834  | 0  |   if (r == 0 && IS_NOT_NULL(aasc_node)) { | 
4835  | 0  |     acc = NCCLASS(aasc_node);  | 
4836  | 0  |     r = or_cclass(asc_cc, acc, env);  | 
4837  | 0  |   }  | 
4838  | 0  |   onig_node_free(anode);  | 
4839  | 0  |   onig_node_free(aasc_node);  | 
4840  | 0  |   if (r != 0) goto err;  | 
4841  | 0  |       }  | 
4842  | 0  |       break;  | 
4843  |  |  | 
4844  | 0  |     case TK_CC_AND: /* && */  | 
4845  | 0  |       { | 
4846  | 0  |   if (state == CCS_VALUE) { | 
4847  | 0  |     r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,  | 
4848  | 0  |            &val_type, &state, env);  | 
4849  | 0  |     if (r != 0) goto err;  | 
4850  | 0  |   }  | 
4851  |  |   /* initialize local variables */  | 
4852  | 0  |   and_start = 1;  | 
4853  | 0  |   state = CCS_START;  | 
4854  |  | 
  | 
4855  | 0  |   if (IS_NOT_NULL(prev_cc)) { | 
4856  | 0  |     r = and_cclass(prev_cc, cc, env);  | 
4857  | 0  |     if (r != 0) goto err;  | 
4858  | 0  |     bbuf_free(cc->mbuf);  | 
4859  | 0  |     if (IS_NOT_NULL(asc_cc)) { | 
4860  | 0  |       r = and_cclass(asc_prev_cc, asc_cc, env);  | 
4861  | 0  |       if (r != 0) goto err;  | 
4862  | 0  |       bbuf_free(asc_cc->mbuf);  | 
4863  | 0  |     }  | 
4864  | 0  |   }  | 
4865  | 0  |   else { | 
4866  | 0  |     prev_cc = cc;  | 
4867  | 0  |     cc = &work_cc;  | 
4868  | 0  |     if (IS_NOT_NULL(asc_cc)) { | 
4869  | 0  |       asc_prev_cc = asc_cc;  | 
4870  | 0  |       asc_cc = &asc_work_cc;  | 
4871  | 0  |     }  | 
4872  | 0  |   }  | 
4873  | 0  |   initialize_cclass(cc);  | 
4874  | 0  |   if (IS_NOT_NULL(asc_cc))  | 
4875  | 0  |     initialize_cclass(asc_cc);  | 
4876  | 0  |       }  | 
4877  | 0  |       break;  | 
4878  |  |  | 
4879  | 0  |     case TK_EOT:  | 
4880  | 0  |       r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;  | 
4881  | 0  |       goto err;  | 
4882  | 0  |       break;  | 
4883  | 0  |     default:  | 
4884  | 0  |       r = ONIGERR_PARSER_BUG;  | 
4885  | 0  |       goto err;  | 
4886  | 0  |       break;  | 
4887  | 16.4k  |     }  | 
4888  |  |  | 
4889  | 16.4k  |     if (fetched)  | 
4890  | 0  |       r = tok->type;  | 
4891  | 16.4k  |     else { | 
4892  | 16.4k  |       r = fetch_token_in_cc(tok, &p, end, env);  | 
4893  | 16.4k  |       if (r < 0) goto err;  | 
4894  | 16.4k  |     }  | 
4895  | 16.4k  |   }  | 
4896  |  |  | 
4897  | 6.16k  |   if (state == CCS_VALUE) { | 
4898  | 6.16k  |     r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,  | 
4899  | 6.16k  |            &val_type, &state, env);  | 
4900  | 6.16k  |     if (r != 0) goto err;  | 
4901  | 6.16k  |   }  | 
4902  |  |  | 
4903  | 6.16k  |   if (IS_NOT_NULL(prev_cc)) { | 
4904  | 0  |     r = and_cclass(prev_cc, cc, env);  | 
4905  | 0  |     if (r != 0) goto err;  | 
4906  | 0  |     bbuf_free(cc->mbuf);  | 
4907  | 0  |     cc = prev_cc;  | 
4908  | 0  |     if (IS_NOT_NULL(asc_cc)) { | 
4909  | 0  |       r = and_cclass(asc_prev_cc, asc_cc, env);  | 
4910  | 0  |       if (r != 0) goto err;  | 
4911  | 0  |       bbuf_free(asc_cc->mbuf);  | 
4912  | 0  |       asc_cc = asc_prev_cc;  | 
4913  | 0  |     }  | 
4914  | 0  |   }  | 
4915  |  |  | 
4916  | 6.16k  |   if (neg != 0) { | 
4917  | 2.46k  |     NCCLASS_SET_NOT(cc);  | 
4918  | 2.46k  |     if (IS_NOT_NULL(asc_cc))  | 
4919  | 2.46k  |       NCCLASS_SET_NOT(asc_cc);  | 
4920  | 2.46k  |   }  | 
4921  | 3.69k  |   else { | 
4922  | 3.69k  |     NCCLASS_CLEAR_NOT(cc);  | 
4923  | 3.69k  |     if (IS_NOT_NULL(asc_cc))  | 
4924  | 3.69k  |       NCCLASS_CLEAR_NOT(asc_cc);  | 
4925  | 3.69k  |   }  | 
4926  | 6.16k  |   if (IS_NCCLASS_NOT(cc) &&  | 
4927  | 6.16k  |       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { | 
4928  | 0  |     int is_empty;  | 
4929  |  | 
  | 
4930  | 0  |     is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);  | 
4931  | 0  |     if (is_empty != 0)  | 
4932  | 0  |       BITSET_IS_EMPTY(cc->bs, is_empty);  | 
4933  |  | 
  | 
4934  | 0  |     if (is_empty == 0) { | 
4935  | 0  | #define NEWLINE_CODE    0x0a  | 
4936  |  | 
  | 
4937  | 0  |       if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { | 
4938  | 0  |   if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)  | 
4939  | 0  |     BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);  | 
4940  | 0  |   else { | 
4941  | 0  |     r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);  | 
4942  | 0  |     if (r < 0) goto err;  | 
4943  | 0  |   }  | 
4944  | 0  |       }  | 
4945  | 0  |     }  | 
4946  | 0  |   }  | 
4947  | 6.16k  |   *src = p;  | 
4948  | 6.16k  |   env->parse_depth--;  | 
4949  | 6.16k  |   return 0;  | 
4950  |  |  | 
4951  | 0  |  err:  | 
4952  | 0  |   if (cc != NCCLASS(*np))  | 
4953  | 0  |     bbuf_free(cc->mbuf);  | 
4954  | 0  |   if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))  | 
4955  | 0  |     bbuf_free(asc_cc->mbuf);  | 
4956  | 0  |   return r;  | 
4957  | 6.16k  | }  | 
4958  |  |  | 
4959  |  | static int parse_subexp(Node** top, OnigToken* tok, int term,  | 
4960  |  |       UChar** src, UChar* end, ScanEnv* env);  | 
4961  |  |  | 
4962  |  | static int  | 
4963  |  | parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,  | 
4964  |  |         ScanEnv* env)  | 
4965  | 4.93k  | { | 
4966  | 4.93k  |   int r = 0, num;  | 
4967  | 4.93k  |   Node *target, *work1 = NULL, *work2 = NULL;  | 
4968  | 4.93k  |   OnigOptionType option;  | 
4969  | 4.93k  |   OnigCodePoint c;  | 
4970  | 4.93k  |   OnigEncoding enc = env->enc;  | 
4971  |  |  | 
4972  | 4.93k  | #ifdef USE_NAMED_GROUP  | 
4973  | 4.93k  |   int list_capture;  | 
4974  | 4.93k  | #endif  | 
4975  |  |  | 
4976  | 4.93k  |   UChar* p = *src;  | 
4977  | 4.93k  |   PFETCH_READY;  | 
4978  |  |  | 
4979  | 4.93k  |   *np = NULL;  | 
4980  | 4.93k  |   if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;  | 
4981  |  |  | 
4982  | 4.93k  |   option = env->option;  | 
4983  | 4.93k  |   if (PPEEK_IS('?') && | 
4984  | 4.93k  |       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { | 
4985  | 4.52k  |     PINC;  | 
4986  | 4.52k  |     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;  | 
4987  |  |  | 
4988  | 4.52k  |     PFETCH(c);  | 
4989  | 4.52k  |     switch (c) { | 
4990  | 2.46k  |     case ':':   /* (?:...) grouping only */  | 
4991  | 2.46k  |     group:  | 
4992  | 2.46k  |       r = fetch_token(tok, &p, end, env);  | 
4993  | 2.46k  |       if (r < 0) return r;  | 
4994  | 2.46k  |       r = parse_subexp(np, tok, term, &p, end, env);  | 
4995  | 2.46k  |       if (r < 0) return r;  | 
4996  | 2.46k  |       *src = p;  | 
4997  | 2.46k  |       return 1; /* group */  | 
4998  | 0  |       break;  | 
4999  |  |  | 
5000  | 0  |     case '=':  | 
5001  | 0  |       *np = onig_node_new_anchor(ANCHOR_PREC_READ);  | 
5002  | 0  |       break;  | 
5003  | 0  |     case '!':   /* preceding read */  | 
5004  | 0  |       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);  | 
5005  | 0  |       break;  | 
5006  | 0  |     case '>':   /* (?>...) stop backtrack */  | 
5007  | 0  |       *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);  | 
5008  | 0  |       break;  | 
5009  | 0  |     case '~':   /* (?~...) absent operator */  | 
5010  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT)) { | 
5011  | 0  |   *np = node_new_enclose(ENCLOSE_ABSENT);  | 
5012  | 0  |       }  | 
5013  | 0  |       else { | 
5014  | 0  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5015  | 0  |       }  | 
5016  | 0  |       break;  | 
5017  |  |  | 
5018  | 0  | #ifdef USE_NAMED_GROUP  | 
5019  | 0  |     case '\'':  | 
5020  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { | 
5021  | 0  |   goto named_group1;  | 
5022  | 0  |       }  | 
5023  | 0  |       else  | 
5024  | 0  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5025  | 0  |       break;  | 
5026  |  |  | 
5027  | 0  | # ifdef USE_CAPITAL_P_NAMED_GROUP  | 
5028  | 0  |     case 'P':   /* (?P<name>...) */  | 
5029  | 0  |       if (!PEND &&  | 
5030  | 0  |     IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { | 
5031  | 0  |   PFETCH(c);  | 
5032  | 0  |   if (c == '<') goto named_group1;  | 
5033  | 0  |       }  | 
5034  | 0  |       return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5035  | 0  |       break;  | 
5036  | 0  | # endif  | 
5037  | 0  | #endif  | 
5038  |  |  | 
5039  | 1.64k  |     case '<':   /* look behind (?<=...), (?<!...) */  | 
5040  | 1.64k  |       if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;  | 
5041  | 1.64k  |       PFETCH(c);  | 
5042  | 1.64k  |       if (c == '=')  | 
5043  | 0  |   *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);  | 
5044  | 1.64k  |       else if (c == '!')  | 
5045  | 0  |   *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);  | 
5046  | 1.64k  | #ifdef USE_NAMED_GROUP  | 
5047  | 1.64k  |       else {    /* (?<name>...) */ | 
5048  | 1.64k  |   if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { | 
5049  | 1.64k  |     UChar *name;  | 
5050  | 1.64k  |     UChar *name_end;  | 
5051  |  |  | 
5052  | 1.64k  |     PUNFETCH;  | 
5053  | 1.64k  |     c = '<';  | 
5054  |  |  | 
5055  | 1.64k  |   named_group1:  | 
5056  | 1.64k  |     list_capture = 0;  | 
5057  |  |  | 
5058  | 1.64k  | # ifdef USE_CAPTURE_HISTORY  | 
5059  | 1.64k  |   named_group2:  | 
5060  | 1.64k  | # endif  | 
5061  | 1.64k  |     name = p;  | 
5062  | 1.64k  |     r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);  | 
5063  | 1.64k  |     if (r < 0) return r;  | 
5064  |  |  | 
5065  | 1.64k  |     num = scan_env_add_mem_entry(env);  | 
5066  | 1.64k  |     if (num < 0) return num;  | 
5067  | 1.64k  |     if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)  | 
5068  | 0  |       return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;  | 
5069  |  |  | 
5070  | 1.64k  |     r = name_add(env->reg, name, name_end, num, env);  | 
5071  | 1.64k  |     if (r != 0) return r;  | 
5072  | 1.64k  |     *np = node_new_enclose_memory(env->option, 1);  | 
5073  | 1.64k  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
5074  | 1.64k  |     NENCLOSE(*np)->regnum = num;  | 
5075  | 1.64k  |     if (list_capture != 0)  | 
5076  | 0  |       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);  | 
5077  | 1.64k  |     env->num_named++;  | 
5078  | 1.64k  |   }  | 
5079  | 0  |   else { | 
5080  | 0  |     return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5081  | 0  |   }  | 
5082  | 1.64k  |       }  | 
5083  |  | #else  | 
5084  |  |       else { | 
5085  |  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5086  |  |       }  | 
5087  |  | #endif  | 
5088  | 1.64k  |       break;  | 
5089  |  |  | 
5090  | 1.64k  | #ifdef USE_CAPTURE_HISTORY  | 
5091  | 1.64k  |     case '@':  | 
5092  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { | 
5093  | 0  | # ifdef USE_NAMED_GROUP  | 
5094  | 0  |   if (!PEND &&  | 
5095  | 0  |       IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { | 
5096  | 0  |     PFETCH(c);  | 
5097  | 0  |     if (c == '<' || c == '\'') { | 
5098  | 0  |       list_capture = 1;  | 
5099  | 0  |       goto named_group2; /* (?@<name>...) */  | 
5100  | 0  |     }  | 
5101  | 0  |     PUNFETCH;  | 
5102  | 0  |   }  | 
5103  | 0  | # endif  | 
5104  | 0  |   *np = node_new_enclose_memory(env->option, 0);  | 
5105  | 0  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
5106  | 0  |   num = scan_env_add_mem_entry(env);  | 
5107  | 0  |   if (num < 0) return num;  | 
5108  | 0  |   if (num >= (int )BIT_STATUS_BITS_NUM)  | 
5109  | 0  |     return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;  | 
5110  |  |  | 
5111  | 0  |   NENCLOSE(*np)->regnum = num;  | 
5112  | 0  |   BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);  | 
5113  | 0  |       }  | 
5114  | 0  |       else { | 
5115  | 0  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5116  | 0  |       }  | 
5117  | 0  |       break;  | 
5118  | 0  | #endif /* USE_CAPTURE_HISTORY */  | 
5119  |  |  | 
5120  | 0  |     case '(':   /* conditional expression: (?(cond)yes), (?(cond)yes|no) */ | 
5121  | 0  |       if (!PEND &&  | 
5122  | 0  |     IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) { | 
5123  | 0  |   UChar *name = NULL;  | 
5124  | 0  |   UChar *name_end;  | 
5125  | 0  |   PFETCH(c);  | 
5126  | 0  |   if (ONIGENC_IS_CODE_DIGIT(enc, c)) {     /* (n) */ | 
5127  | 0  |     PUNFETCH;  | 
5128  | 0  |     r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1); | 
5129  | 0  |     if (r < 0) return r;  | 
5130  |  | #if 0  | 
5131  |  |     /* Relative number is not currently supported. (same as Perl) */  | 
5132  |  |     if (num < 0) { | 
5133  |  |       num = BACKREF_REL_TO_ABS(num, env);  | 
5134  |  |       if (num <= 0)  | 
5135  |  |         return ONIGERR_INVALID_BACKREF;  | 
5136  |  |     }  | 
5137  |  | #endif  | 
5138  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { | 
5139  | 0  |       if (num > env->num_mem ||  | 
5140  | 0  |     IS_NULL(SCANENV_MEM_NODES(env)[num]))  | 
5141  | 0  |       return ONIGERR_INVALID_BACKREF;  | 
5142  | 0  |     }  | 
5143  | 0  |   }  | 
5144  | 0  | #ifdef USE_NAMED_GROUP  | 
5145  | 0  |   else if (c == '<' || c == '\'') {    /* (<name>), ('name') */ | 
5146  | 0  |     name = p;  | 
5147  | 0  |     r = fetch_named_backref_token(c, tok, &p, end, env);  | 
5148  | 0  |     if (r < 0) return r;  | 
5149  | 0  |     if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION; | 
5150  | 0  |     PINC;  | 
5151  |  | 
  | 
5152  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { | 
5153  | 0  |       num = tok->u.backref.ref1;  | 
5154  | 0  |     }  | 
5155  | 0  |     else { | 
5156  |  |       /* FIXME:  | 
5157  |  |        * Use left most named group for now. This is the same as Perl.  | 
5158  |  |        * However this should use the same strategy as normal back-  | 
5159  |  |        * references on Ruby syntax; search right to left. */  | 
5160  | 0  |       int len = tok->u.backref.num;  | 
5161  | 0  |       num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;  | 
5162  | 0  |     }  | 
5163  | 0  |   }  | 
5164  | 0  | #endif  | 
5165  | 0  |   else  | 
5166  | 0  |     return ONIGERR_INVALID_CONDITION_PATTERN;  | 
5167  | 0  |   *np = node_new_enclose(ENCLOSE_CONDITION);  | 
5168  | 0  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
5169  | 0  |   NENCLOSE(*np)->regnum = num;  | 
5170  | 0  |   if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;  | 
5171  | 0  |       }  | 
5172  | 0  |       else  | 
5173  | 0  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5174  | 0  |       break;  | 
5175  |  |  | 
5176  |  | #if 0  | 
5177  |  |     case '|':   /* branch reset: (?|...) */  | 
5178  |  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET)) { | 
5179  |  |   /* TODO */  | 
5180  |  |       }  | 
5181  |  |       else  | 
5182  |  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5183  |  |       break;  | 
5184  |  | #endif  | 
5185  |  |  | 
5186  | 0  |     case '^':   /* loads default options */  | 
5187  | 0  |       if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { | 
5188  |  |   /* d-imsx */  | 
5189  | 0  |   ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);  | 
5190  | 0  |   ONOFF(option, ONIG_OPTION_IGNORECASE, 1);  | 
5191  | 0  |   ONOFF(option, ONIG_OPTION_SINGLELINE, 0);  | 
5192  | 0  |   ONOFF(option, ONIG_OPTION_MULTILINE,  1);  | 
5193  | 0  |   ONOFF(option, ONIG_OPTION_EXTEND, 1);  | 
5194  | 0  |   PFETCH(c);  | 
5195  | 0  |       }  | 
5196  |  | #if 0  | 
5197  |  |       else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { | 
5198  |  |   /* d-imx */  | 
5199  |  |   ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);  | 
5200  |  |   ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);  | 
5201  |  |   ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0);  | 
5202  |  |   ONOFF(option, ONIG_OPTION_IGNORECASE, 1);  | 
5203  |  |   ONOFF(option, ONIG_OPTION_MULTILINE,  1);  | 
5204  |  |   ONOFF(option, ONIG_OPTION_EXTEND, 1);  | 
5205  |  |   PFETCH(c);  | 
5206  |  |       }  | 
5207  |  | #endif  | 
5208  | 0  |       else { | 
5209  | 0  |   return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5210  | 0  |       }  | 
5211  |  |       /* fall through */  | 
5212  |  | #ifdef USE_POSIXLINE_OPTION  | 
5213  |  |     case 'p':  | 
5214  |  | #endif  | 
5215  | 411  |     case '-': case 'i': case 'm': case 's': case 'x':  | 
5216  | 411  |     case 'a': case 'd': case 'l': case 'u':  | 
5217  | 411  |       { | 
5218  | 411  |   int neg = 0;  | 
5219  |  |  | 
5220  | 822  |   while (1) { | 
5221  | 822  |     switch (c) { | 
5222  | 411  |     case ':':  | 
5223  | 411  |     case ')':  | 
5224  | 411  |     break;  | 
5225  |  |  | 
5226  | 0  |     case '-':  neg = 1; break;  | 
5227  | 411  |     case 'x':  ONOFF(option, ONIG_OPTION_EXTEND,     neg); break;  | 
5228  | 0  |     case 'i':  ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;  | 
5229  | 0  |     case 's':  | 
5230  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { | 
5231  | 0  |         ONOFF(option, ONIG_OPTION_MULTILINE,  neg);  | 
5232  | 0  |       }  | 
5233  | 0  |       else  | 
5234  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5235  | 0  |       break;  | 
5236  |  |  | 
5237  | 0  |     case 'm':  | 
5238  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { | 
5239  | 0  |         ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));  | 
5240  | 0  |       }  | 
5241  | 0  |       else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { | 
5242  | 0  |         ONOFF(option, ONIG_OPTION_MULTILINE,  neg);  | 
5243  | 0  |       }  | 
5244  | 0  |       else  | 
5245  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5246  | 0  |       break;  | 
5247  |  | #ifdef USE_POSIXLINE_OPTION  | 
5248  |  |     case 'p':  | 
5249  |  |       ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);  | 
5250  |  |       break;  | 
5251  |  | #endif  | 
5252  |  |  | 
5253  | 0  |     case 'a':     /* limits \d, \s, \w and POSIX brackets to ASCII range */  | 
5254  | 0  |       if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||  | 
5255  | 0  |      IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&  | 
5256  | 0  |     (neg == 0)) { | 
5257  | 0  |         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);  | 
5258  | 0  |         ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);  | 
5259  | 0  |         ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);  | 
5260  | 0  |       }  | 
5261  | 0  |       else  | 
5262  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5263  | 0  |       break;  | 
5264  |  |  | 
5265  | 0  |     case 'u':  | 
5266  | 0  |       if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||  | 
5267  | 0  |      IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&  | 
5268  | 0  |     (neg == 0)) { | 
5269  | 0  |         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);  | 
5270  | 0  |         ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);  | 
5271  | 0  |         ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);  | 
5272  | 0  |       }  | 
5273  | 0  |       else  | 
5274  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5275  | 0  |       break;  | 
5276  |  |  | 
5277  | 0  |     case 'd':  | 
5278  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) &&  | 
5279  | 0  |     (neg == 0)) { | 
5280  | 0  |         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);  | 
5281  | 0  |       }  | 
5282  | 0  |       else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&  | 
5283  | 0  |     (neg == 0)) { | 
5284  | 0  |         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);  | 
5285  | 0  |         ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);  | 
5286  | 0  |         ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0);  | 
5287  | 0  |       }  | 
5288  | 0  |       else  | 
5289  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5290  | 0  |       break;  | 
5291  |  |  | 
5292  | 0  |     case 'l':  | 
5293  | 0  |       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) { | 
5294  | 0  |         ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);  | 
5295  | 0  |       }  | 
5296  | 0  |       else  | 
5297  | 0  |         return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5298  | 0  |       break;  | 
5299  |  |  | 
5300  | 0  |     default:  | 
5301  | 0  |       return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5302  | 822  |     }  | 
5303  |  |  | 
5304  | 822  |     if (c == ')') { | 
5305  | 0  |       *np = node_new_option(option);  | 
5306  | 0  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
5307  | 0  |       *src = p;  | 
5308  | 0  |       return 2; /* option only */  | 
5309  | 0  |     }  | 
5310  | 822  |     else if (c == ':') { | 
5311  | 411  |       OnigOptionType prev = env->option;  | 
5312  |  |  | 
5313  | 411  |       env->option = option;  | 
5314  | 411  |       r = fetch_token(tok, &p, end, env);  | 
5315  | 411  |       if (r < 0) { | 
5316  | 0  |         env->option = prev;  | 
5317  | 0  |         return r;  | 
5318  | 0  |       }  | 
5319  | 411  |       r = parse_subexp(&target, tok, term, &p, end, env);  | 
5320  | 411  |       env->option = prev;  | 
5321  | 411  |       if (r < 0) return r;  | 
5322  | 411  |       *np = node_new_option(option);  | 
5323  | 411  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
5324  | 411  |       NENCLOSE(*np)->target = target;  | 
5325  | 411  |       *src = p;  | 
5326  | 411  |       return 0;  | 
5327  | 411  |     }  | 
5328  |  |  | 
5329  | 411  |     if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;  | 
5330  | 411  |     PFETCH(c);  | 
5331  | 411  |   }  | 
5332  | 411  |       }  | 
5333  | 0  |       break;  | 
5334  |  |  | 
5335  | 0  |     default:  | 
5336  | 0  |       return ONIGERR_UNDEFINED_GROUP_OPTION;  | 
5337  | 4.52k  |     }  | 
5338  | 4.52k  |   }  | 
5339  | 411  |   else { | 
5340  | 411  |     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))  | 
5341  | 0  |       goto group;  | 
5342  |  |  | 
5343  | 411  |     *np = node_new_enclose_memory(env->option, 0);  | 
5344  | 411  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
5345  | 411  |     num = scan_env_add_mem_entry(env);  | 
5346  | 411  |     if (num < 0) return num;  | 
5347  | 411  |     NENCLOSE(*np)->regnum = num;  | 
5348  | 411  |   }  | 
5349  |  |  | 
5350  | 2.05k  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
5351  | 2.05k  |   r = fetch_token(tok, &p, end, env);  | 
5352  | 2.05k  |   if (r < 0) return r;  | 
5353  | 2.05k  |   r = parse_subexp(&target, tok, term, &p, end, env);  | 
5354  | 2.05k  |   if (r < 0) { | 
5355  | 0  |     onig_node_free(target);  | 
5356  | 0  |     return r;  | 
5357  | 0  |   }  | 
5358  |  |  | 
5359  | 2.05k  |   if (NTYPE(*np) == NT_ANCHOR)  | 
5360  | 0  |     NANCHOR(*np)->target = target;  | 
5361  | 2.05k  |   else { | 
5362  | 2.05k  |     NENCLOSE(*np)->target = target;  | 
5363  | 2.05k  |     if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { | 
5364  |  |       /* Don't move this to previous of parse_subexp() */  | 
5365  | 2.05k  |       r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);  | 
5366  | 2.05k  |       if (r != 0) return r;  | 
5367  | 2.05k  |     }  | 
5368  | 0  |     else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) { | 
5369  | 0  |       if (NTYPE(target) != NT_ALT) { | 
5370  |  |   /* convert (?(cond)yes) to (?(cond)yes|empty) */  | 
5371  | 0  |   work1 = node_new_empty();  | 
5372  | 0  |   if (IS_NULL(work1)) goto err;  | 
5373  | 0  |   work2 = onig_node_new_alt(work1, NULL_NODE);  | 
5374  | 0  |   if (IS_NULL(work2)) goto err;  | 
5375  | 0  |   work1 = onig_node_new_alt(target, work2);  | 
5376  | 0  |   if (IS_NULL(work1)) goto err;  | 
5377  | 0  |   NENCLOSE(*np)->target = work1;  | 
5378  | 0  |       }  | 
5379  | 0  |     }  | 
5380  | 2.05k  |   }  | 
5381  |  |  | 
5382  | 2.05k  |   *src = p;  | 
5383  | 2.05k  |   return 0;  | 
5384  |  |  | 
5385  | 0  |  err:  | 
5386  | 0  |   onig_node_free(work1);  | 
5387  | 0  |   onig_node_free(work2);  | 
5388  | 0  |   onig_node_free(*np);  | 
5389  | 0  |   *np = NULL;  | 
5390  | 0  |   return ONIGERR_MEMORY;  | 
5391  | 2.05k  | }  | 
5392  |  |  | 
5393  |  | static const char* const PopularQStr[] = { | 
5394  |  |   "?", "*", "+", "??", "*?", "+?"  | 
5395  |  | };  | 
5396  |  |  | 
5397  |  | static const char* const ReduceQStr[] = { | 
5398  |  |   "", "", "*", "*?", "??", "+ and ??", "+? and ?"  | 
5399  |  | };  | 
5400  |  |  | 
5401  |  | static int  | 
5402  |  | set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)  | 
5403  | 12.3k  | { | 
5404  | 12.3k  |   QtfrNode* qn;  | 
5405  |  |  | 
5406  | 12.3k  |   qn = NQTFR(qnode);  | 
5407  | 12.3k  |   if (qn->lower == 1 && qn->upper == 1) { | 
5408  | 0  |     return 1;  | 
5409  | 0  |   }  | 
5410  |  |  | 
5411  | 12.3k  |   switch (NTYPE(target)) { | 
5412  | 411  |   case NT_STR:  | 
5413  | 411  |     if (! group) { | 
5414  | 0  |       StrNode* sn = NSTR(target);  | 
5415  | 0  |       if (str_node_can_be_split(sn, env->enc)) { | 
5416  | 0  |   Node* n = str_node_split_last_char(sn, env->enc);  | 
5417  | 0  |   if (IS_NOT_NULL(n)) { | 
5418  | 0  |     qn->target = n;  | 
5419  | 0  |     return 2;  | 
5420  | 0  |   }  | 
5421  | 0  |       }  | 
5422  | 0  |     }  | 
5423  | 411  |     break;  | 
5424  |  |  | 
5425  | 411  |   case NT_QTFR:  | 
5426  | 0  |     { /* check redundant double repeat. */ | 
5427  |  |       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */  | 
5428  | 0  |       QtfrNode* qnt   = NQTFR(target);  | 
5429  | 0  |       int nestq_num   = popular_quantifier_num(qn);  | 
5430  | 0  |       int targetq_num = popular_quantifier_num(qnt);  | 
5431  |  | 
  | 
5432  | 0  | #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR  | 
5433  | 0  |       if (nestq_num >= 0 && targetq_num >= 0 &&  | 
5434  | 0  |     IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { | 
5435  | 0  |   switch (ReduceTypeTable[targetq_num][nestq_num]) { | 
5436  | 0  |   case RQ_ASIS:  | 
5437  | 0  |     break;  | 
5438  |  |  | 
5439  | 0  |   case RQ_DEL:  | 
5440  | 0  |     if (onig_warn != onig_null_warn) { | 
5441  | 0  |       onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",  | 
5442  | 0  |     PopularQStr[targetq_num]);  | 
5443  | 0  |     }  | 
5444  | 0  |     goto warn_exit;  | 
5445  | 0  |     break;  | 
5446  |  |  | 
5447  | 0  |   default:  | 
5448  | 0  |     if (onig_warn != onig_null_warn) { | 
5449  | 0  |       onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",  | 
5450  | 0  |     PopularQStr[targetq_num], PopularQStr[nestq_num],  | 
5451  | 0  |     ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);  | 
5452  | 0  |     }  | 
5453  | 0  |     goto warn_exit;  | 
5454  | 0  |     break;  | 
5455  | 0  |   }  | 
5456  | 0  |       }  | 
5457  |  |  | 
5458  | 0  |     warn_exit:  | 
5459  | 0  | #endif  | 
5460  | 0  |       if (targetq_num >= 0) { | 
5461  | 0  |   if (nestq_num >= 0) { | 
5462  | 0  |     onig_reduce_nested_quantifier(qnode, target);  | 
5463  | 0  |     goto q_exit;  | 
5464  | 0  |   }  | 
5465  | 0  |   else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ | 
5466  |  |     /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ | 
5467  | 0  |     if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { | 
5468  | 0  |       qn->upper = (qn->lower == 0 ? 1 : qn->lower);  | 
5469  | 0  |     }  | 
5470  | 0  |   }  | 
5471  | 0  |       }  | 
5472  | 0  |     }  | 
5473  | 0  |     break;  | 
5474  |  |  | 
5475  | 11.9k  |   default:  | 
5476  | 11.9k  |     break;  | 
5477  | 12.3k  |   }  | 
5478  |  |  | 
5479  | 12.3k  |   qn->target = target;  | 
5480  | 12.3k  |  q_exit:  | 
5481  | 12.3k  |   return 0;  | 
5482  | 12.3k  | }  | 
5483  |  |  | 
5484  |  |  | 
5485  |  | #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS  | 
5486  |  | static int  | 
5487  |  | clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)  | 
5488  |  | { | 
5489  |  |   BBuf *tbuf;  | 
5490  |  |   int r;  | 
5491  |  |  | 
5492  |  |   if (IS_NCCLASS_NOT(cc)) { | 
5493  |  |     bitset_invert(cc->bs);  | 
5494  |  |  | 
5495  |  |     if (! ONIGENC_IS_SINGLEBYTE(enc)) { | 
5496  |  |       r = not_code_range_buf(enc, cc->mbuf, &tbuf);  | 
5497  |  |       if (r != 0) return r;  | 
5498  |  |  | 
5499  |  |       bbuf_free(cc->mbuf);  | 
5500  |  |       cc->mbuf = tbuf;  | 
5501  |  |     }  | 
5502  |  |  | 
5503  |  |     NCCLASS_CLEAR_NOT(cc);  | 
5504  |  |   }  | 
5505  |  |  | 
5506  |  |   return 0;  | 
5507  |  | }  | 
5508  |  | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */  | 
5509  |  |  | 
5510  |  | typedef struct { | 
5511  |  |   ScanEnv*    env;  | 
5512  |  |   CClassNode* cc;  | 
5513  |  |   CClassNode* asc_cc;  | 
5514  |  |   Node*       alt_root;  | 
5515  |  |   Node**      ptail;  | 
5516  |  | } IApplyCaseFoldArg;  | 
5517  |  |  | 
5518  |  | static int  | 
5519  |  | i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],  | 
5520  |  |       int to_len, void* arg)  | 
5521  | 0  | { | 
5522  | 0  |   IApplyCaseFoldArg* iarg;  | 
5523  | 0  |   ScanEnv* env;  | 
5524  | 0  |   CClassNode* cc;  | 
5525  | 0  |   CClassNode* asc_cc;  | 
5526  | 0  |   BitSetRef bs;  | 
5527  | 0  |   int add_flag, r;  | 
5528  |  | 
  | 
5529  | 0  |   iarg = (IApplyCaseFoldArg* )arg;  | 
5530  | 0  |   env = iarg->env;  | 
5531  | 0  |   cc  = iarg->cc;  | 
5532  | 0  |   asc_cc = iarg->asc_cc;  | 
5533  | 0  |   bs = cc->bs;  | 
5534  |  | 
  | 
5535  | 0  |   if (IS_NULL(asc_cc)) { | 
5536  | 0  |     add_flag = 0;  | 
5537  | 0  |   }  | 
5538  | 0  |   else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) { | 
5539  | 0  |     add_flag = 1;  | 
5540  | 0  |   }  | 
5541  | 0  |   else { | 
5542  | 0  |     add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);  | 
5543  | 0  |     if (IS_NCCLASS_NOT(asc_cc))  | 
5544  | 0  |       add_flag = !add_flag;  | 
5545  | 0  |   }  | 
5546  |  | 
  | 
5547  | 0  |   if (to_len == 1) { | 
5548  | 0  |     int is_in = onig_is_code_in_cc(env->enc, from, cc);  | 
5549  | 0  | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS  | 
5550  | 0  |     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||  | 
5551  | 0  |   (is_in == 0 &&  IS_NCCLASS_NOT(cc))) { | 
5552  | 0  |       if (add_flag) { | 
5553  | 0  |   if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { | 
5554  | 0  |     r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);  | 
5555  | 0  |     if (r < 0) return r;  | 
5556  | 0  |   }  | 
5557  | 0  |   else { | 
5558  | 0  |     BITSET_SET_BIT(bs, *to);  | 
5559  | 0  |   }  | 
5560  | 0  |       }  | 
5561  | 0  |     }  | 
5562  |  | #else  | 
5563  |  |     if (is_in != 0) { | 
5564  |  |       if (add_flag) { | 
5565  |  |   if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { | 
5566  |  |     if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);  | 
5567  |  |     r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);  | 
5568  |  |     if (r < 0) return r;  | 
5569  |  |   }  | 
5570  |  |   else { | 
5571  |  |     if (IS_NCCLASS_NOT(cc)) { | 
5572  |  |       BITSET_CLEAR_BIT(bs, *to);  | 
5573  |  |     }  | 
5574  |  |     else { | 
5575  |  |       BITSET_SET_BIT(bs, *to);  | 
5576  |  |     }  | 
5577  |  |   }  | 
5578  |  |       }  | 
5579  |  |     }  | 
5580  |  | #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */  | 
5581  | 0  |   }  | 
5582  | 0  |   else { | 
5583  | 0  |     int r, i, len;  | 
5584  | 0  |     UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];  | 
5585  | 0  |     Node *snode = NULL_NODE;  | 
5586  |  | 
  | 
5587  | 0  |     if (onig_is_code_in_cc(env->enc, from, cc)  | 
5588  | 0  | #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS  | 
5589  | 0  |   && !IS_NCCLASS_NOT(cc)  | 
5590  | 0  | #endif  | 
5591  | 0  |   ) { | 
5592  | 0  |       for (i = 0; i < to_len; i++) { | 
5593  | 0  |   len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);  | 
5594  | 0  |   if (i == 0) { | 
5595  | 0  |     snode = onig_node_new_str(buf, buf + len);  | 
5596  | 0  |     CHECK_NULL_RETURN_MEMERR(snode);  | 
5597  |  |  | 
5598  |  |     /* char-class expanded multi-char only  | 
5599  |  |        compare with string folded at match time. */  | 
5600  | 0  |     NSTRING_SET_AMBIG(snode);  | 
5601  | 0  |   }  | 
5602  | 0  |   else { | 
5603  | 0  |     r = onig_node_str_cat(snode, buf, buf + len);  | 
5604  | 0  |     if (r < 0) { | 
5605  | 0  |       onig_node_free(snode);  | 
5606  | 0  |       return r;  | 
5607  | 0  |     }  | 
5608  | 0  |   }  | 
5609  | 0  |       }  | 
5610  |  |  | 
5611  | 0  |       *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);  | 
5612  | 0  |       CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));  | 
5613  | 0  |       iarg->ptail = &(NCDR((*(iarg->ptail))));  | 
5614  | 0  |     }  | 
5615  | 0  |   }  | 
5616  |  |  | 
5617  | 0  |   return 0;  | 
5618  | 0  | }  | 
5619  |  |  | 
5620  |  | static int  | 
5621  |  | cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)  | 
5622  | 0  | { | 
5623  | 0  |   int r;  | 
5624  | 0  |   IApplyCaseFoldArg iarg;  | 
5625  |  | 
  | 
5626  | 0  |   iarg.env         = env;  | 
5627  | 0  |   iarg.cc          = cc;  | 
5628  | 0  |   iarg.asc_cc      = asc_cc;  | 
5629  | 0  |   iarg.alt_root    = NULL_NODE;  | 
5630  | 0  |   iarg.ptail       = &(iarg.alt_root);  | 
5631  |  | 
  | 
5632  | 0  |   r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,  | 
5633  | 0  |           i_apply_case_fold, &iarg);  | 
5634  | 0  |   if (r != 0) { | 
5635  | 0  |     onig_node_free(iarg.alt_root);  | 
5636  | 0  |     return r;  | 
5637  | 0  |   }  | 
5638  | 0  |   if (IS_NOT_NULL(iarg.alt_root)) { | 
5639  | 0  |     Node* work = onig_node_new_alt(*np, iarg.alt_root);  | 
5640  | 0  |     if (IS_NULL(work)) { | 
5641  | 0  |       onig_node_free(iarg.alt_root);  | 
5642  | 0  |       return ONIGERR_MEMORY;  | 
5643  | 0  |     }  | 
5644  | 0  |     *np = work;  | 
5645  | 0  |   }  | 
5646  | 0  |   return r;  | 
5647  | 0  | }  | 
5648  |  |  | 
5649  |  | static int  | 
5650  |  | node_linebreak(Node** np, ScanEnv* env)  | 
5651  | 0  | { | 
5652  |  |   /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */ | 
5653  | 0  |   Node* left = NULL;  | 
5654  | 0  |   Node* right = NULL;  | 
5655  | 0  |   Node* target1 = NULL;  | 
5656  | 0  |   Node* target2 = NULL;  | 
5657  | 0  |   CClassNode* cc;  | 
5658  | 0  |   int num1, num2, r;  | 
5659  | 0  |   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];  | 
5660  |  |  | 
5661  |  |   /* \x0D\x0A */  | 
5662  | 0  |   num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);  | 
5663  | 0  |   if (num1 < 0) return num1;  | 
5664  | 0  |   num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);  | 
5665  | 0  |   if (num2 < 0) return num2;  | 
5666  | 0  |   left = node_new_str_raw(buf, buf + num1 + num2);  | 
5667  | 0  |   if (IS_NULL(left)) goto err;  | 
5668  |  |  | 
5669  |  |   /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */ | 
5670  | 0  |   right = node_new_cclass();  | 
5671  | 0  |   if (IS_NULL(right)) goto err;  | 
5672  | 0  |   cc = NCCLASS(right);  | 
5673  | 0  |   if (ONIGENC_MBC_MINLEN(env->enc) > 1) { | 
5674  | 0  |     r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);  | 
5675  | 0  |     if (r != 0) goto err;  | 
5676  | 0  |   }  | 
5677  | 0  |   else { | 
5678  | 0  |     bitset_set_range(env, cc->bs, 0x0A, 0x0D);  | 
5679  | 0  |   }  | 
5680  |  |  | 
5681  |  |   /* TODO: move this block to enc/unicode.c */  | 
5682  | 0  |   if (ONIGENC_IS_UNICODE(env->enc)) { | 
5683  |  |     /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */  | 
5684  | 0  |     r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);  | 
5685  | 0  |     if (r != 0) goto err;  | 
5686  | 0  |     r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);  | 
5687  | 0  |     if (r != 0) goto err;  | 
5688  | 0  |   }  | 
5689  |  |  | 
5690  |  |   /* ...|... */  | 
5691  | 0  |   target1 = onig_node_new_alt(right, NULL_NODE);  | 
5692  | 0  |   if (IS_NULL(target1)) goto err;  | 
5693  | 0  |   right = NULL;  | 
5694  | 0  |   target2 = onig_node_new_alt(left, target1);  | 
5695  | 0  |   if (IS_NULL(target2)) goto err;  | 
5696  | 0  |   left = NULL;  | 
5697  | 0  |   target1 = NULL;  | 
5698  |  |  | 
5699  |  |   /* (?>...) */  | 
5700  | 0  |   *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);  | 
5701  | 0  |   if (IS_NULL(*np)) goto err;  | 
5702  | 0  |   NENCLOSE(*np)->target = target2;  | 
5703  | 0  |   return ONIG_NORMAL;  | 
5704  |  |  | 
5705  | 0  |  err:  | 
5706  | 0  |   onig_node_free(left);  | 
5707  | 0  |   onig_node_free(right);  | 
5708  | 0  |   onig_node_free(target1);  | 
5709  | 0  |   onig_node_free(target2);  | 
5710  | 0  |   return ONIGERR_MEMORY;  | 
5711  | 0  | }  | 
5712  |  |  | 
5713  |  | static int  | 
5714  |  | propname2ctype(ScanEnv* env, const char* propname)  | 
5715  | 0  | { | 
5716  | 0  |   UChar* name = (UChar* )propname;  | 
5717  | 0  |   UChar* name_end = name + strlen(propname);  | 
5718  | 0  |   int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,  | 
5719  | 0  |       name, name_end);  | 
5720  | 0  |   if (ctype < 0) { | 
5721  | 0  |     onig_scan_env_set_error_string(env, ctype, name, name_end);  | 
5722  | 0  |   }  | 
5723  | 0  |   return ctype;  | 
5724  | 0  | }  | 
5725  |  |  | 
5726  |  | static int  | 
5727  |  | add_property_to_cc(CClassNode* cc, const char* propname, int not, ScanEnv* env)  | 
5728  | 0  | { | 
5729  | 0  |   int ctype = propname2ctype(env, propname);  | 
5730  | 0  |   if (ctype < 0) return ctype;  | 
5731  | 0  |   return add_ctype_to_cc(cc, ctype, not, 0, env);  | 
5732  | 0  | }  | 
5733  |  |  | 
5734  |  | /*  | 
5735  |  |  * helper methods for node_extended_grapheme_cluster (/\X/)  | 
5736  |  |  */  | 
5737  |  | static int  | 
5738  |  | create_property_node(Node **np, ScanEnv* env, const char* propname)  | 
5739  | 0  | { | 
5740  | 0  |   int r;  | 
5741  | 0  |   CClassNode* cc;  | 
5742  |  | 
  | 
5743  | 0  |   *np = node_new_cclass();  | 
5744  | 0  |   if (IS_NULL(*np)) return ONIGERR_MEMORY;  | 
5745  | 0  |   cc = NCCLASS(*np);  | 
5746  | 0  |   r = add_property_to_cc(cc, propname, 0, env);  | 
5747  | 0  |   if (r != 0)  | 
5748  | 0  |     onig_node_free(*np);  | 
5749  | 0  |   return r;  | 
5750  | 0  | }  | 
5751  |  |  | 
5752  |  | static int  | 
5753  |  | quantify_node(Node **np, int lower, int upper)  | 
5754  | 0  | { | 
5755  | 0  |   Node* tmp = node_new_quantifier(lower, upper, 0);  | 
5756  | 0  |   if (IS_NULL(tmp)) return ONIGERR_MEMORY;  | 
5757  | 0  |   NQTFR(tmp)->target = *np;  | 
5758  | 0  |   *np = tmp;  | 
5759  | 0  |   return 0;  | 
5760  | 0  | }  | 
5761  |  |  | 
5762  |  | static int  | 
5763  |  | quantify_property_node(Node **np, ScanEnv* env, const char* propname, char repetitions)  | 
5764  | 0  | { | 
5765  | 0  |   int r;  | 
5766  | 0  |   int lower = 0;  | 
5767  | 0  |   int upper = REPEAT_INFINITE;  | 
5768  |  | 
  | 
5769  | 0  |   r = create_property_node(np, env, propname);  | 
5770  | 0  |   if (r != 0) return r;  | 
5771  | 0  |   switch (repetitions) { | 
5772  | 0  |     case '?':  upper = 1;          break;  | 
5773  | 0  |     case '+':  lower = 1;          break;  | 
5774  | 0  |     case '*':                      break;  | 
5775  | 0  |     case '2':  lower = upper = 2;  break;  | 
5776  | 0  |     default :  return ONIGERR_PARSER_BUG;  | 
5777  | 0  |   }  | 
5778  | 0  |   return quantify_node(np, lower, upper);  | 
5779  | 0  | }  | 
5780  |  |  | 
5781  | 0  | #define LIST 0  | 
5782  |  | #define ALT  1  | 
5783  |  |  | 
5784  |  | /* IMPORTANT: Make sure node_array ends with NULL_NODE */  | 
5785  |  | static int  | 
5786  |  | create_node_from_array(int kind, Node **np, Node **node_array)  | 
5787  | 0  | { | 
5788  | 0  |   Node* tmp = NULL_NODE;  | 
5789  | 0  |   int i = 0;  | 
5790  |  | 
  | 
5791  | 0  |   while (node_array[i] != NULL_NODE)  i++;  | 
5792  | 0  |   while (--i >= 0) { | 
5793  | 0  |     *np = kind==LIST ? node_new_list(node_array[i], tmp)  | 
5794  | 0  |                      : onig_node_new_alt(node_array[i], tmp);  | 
5795  | 0  |     if (IS_NULL(*np)) { | 
5796  | 0  |       while (i >= 0) { | 
5797  | 0  |         onig_node_free(node_array[i]);  | 
5798  | 0  |         node_array[i--] = NULL_NODE;  | 
5799  | 0  |       }  | 
5800  | 0  |       onig_node_free(tmp);  | 
5801  | 0  |       return ONIGERR_MEMORY;  | 
5802  | 0  |     }  | 
5803  | 0  |     else  | 
5804  | 0  |       node_array[i] = NULL_NODE;  | 
5805  | 0  |     tmp = *np;  | 
5806  | 0  |   }  | 
5807  | 0  |   return 0;  | 
5808  | 0  | }  | 
5809  |  |  | 
5810  | 0  | #define R_ERR(call) r=(call);if(r!=0)goto err  | 
5811  |  |  | 
5812  |  | /* Memory layout for common node array:  | 
5813  |  |  * The main purpose is to be able to easily free all leftover nodes  | 
5814  |  |  * after an error. As a side effect, we share some memory.  | 
5815  |  |  *  | 
5816  |  |  * The layout is as shown below (each line corresponds to one call of  | 
5817  |  |  * create_node_from_array()). Because create_node_from_array sets all  | 
5818  |  |  * nodes of the source to NULL_NODE, we can overlap the target array  | 
5819  |  |  * as long as we do not override the actual target location.  | 
5820  |  |  *  | 
5821  |  |  * Target       Array name          Index  | 
5822  |  |  *  | 
5823  |  |  *              node_array          0 1 2 3 4 5 6 7 8 9 A B C D E F  | 
5824  |  |  * top_alts     alts[5]             0 1 2 3 4*  | 
5825  |  |  * alts+1       list[4]                   0 1 2 3*  | 
5826  |  |  * list+1       core_alts[7]                  0 1 2 3 4 5 6*  | 
5827  |  |  * core_alts+0  H_list[4]                       0 1 2 3*  | 
5828  |  |  * H_list+1     H_alt2[4]                           0 1 2 3*  | 
5829  |  |  * h_alt2+1     H_list2[3]                              0 1 2*  | 
5830  |  |  * core_alts+4  XP_list[4]                              0 1 2 3*  | 
5831  |  |  * XP_list+1    Ex_list[4]                                  0 1 2 3*  | 
5832  |  |  */  | 
5833  | 0  | #define NODE_COMMON_SIZE 16  | 
5834  |  |  | 
5835  |  | static int  | 
5836  |  | node_extended_grapheme_cluster(Node** np, ScanEnv* env)  | 
5837  | 0  | { | 
5838  | 0  |   Node* tmp = NULL;  | 
5839  | 0  |   Node* np1 = NULL;  | 
5840  | 0  |   Node* top_alt = NULL;  | 
5841  | 0  |   int r = 0;  | 
5842  | 0  |   int num1;  | 
5843  | 0  |   int i;  | 
5844  | 0  |   int any_target_position;  | 
5845  | 0  |   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];  | 
5846  | 0  |   OnigOptionType option;  | 
5847  |  |   /* node_common is function-global so that we can free all nodes  | 
5848  |  |    * in case of error. Unused slots are set to NULL_NODE at all times. */  | 
5849  | 0  |   Node *node_common[NODE_COMMON_SIZE];  | 
5850  | 0  |   Node **alts = node_common+0; /* size: 5 */  | 
5851  |  | 
  | 
5852  | 0  |   for (i=0; i<NODE_COMMON_SIZE; i++)  | 
5853  | 0  |     node_common[i] = NULL_NODE;  | 
5854  |  |  | 
5855  |  |   /* CRLF, common for both Unicode and non-Unicode */  | 
5856  |  |   /* \x0D\x0A */  | 
5857  | 0  |   r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);  | 
5858  | 0  |   if (r < 0) goto err;  | 
5859  | 0  |   num1 = r;  | 
5860  | 0  |   r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);  | 
5861  | 0  |   if (r < 0) goto err;  | 
5862  | 0  |   alts[0] = node_new_str_raw(buf, buf + num1 + r);  | 
5863  | 0  |   if (IS_NULL(alts[0])) goto err;  | 
5864  |  |  | 
5865  | 0  | #ifdef USE_UNICODE_PROPERTIES  | 
5866  | 0  |   if (ONIGENC_IS_UNICODE(env->enc)) {  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ | 
5867  | 0  |     CClassNode* cc;  | 
5868  |  | 
  | 
5869  | 0  |     if (propname2ctype(env, "Grapheme_Cluster_Break=Extend") < 0) goto err;  | 
5870  |  |     /* Unicode 11.0.0  | 
5871  |  |      *   CRLF     (already done)  | 
5872  |  |      * | [Control CR LF]  | 
5873  |  |      * | precore* core postcore*  | 
5874  |  |      * | .      (to catch invalid stuff, because this seems to be spec for String#grapheme_clusters) */  | 
5875  |  |  | 
5876  |  |     /* [Control CR LF]    (CR and LF are not in the spec, but this is a conformed fix) */  | 
5877  | 0  |     alts[1] = node_new_cclass();  | 
5878  | 0  |     if (IS_NULL(alts[1])) goto err;  | 
5879  | 0  |     cc = NCCLASS(alts[1]);  | 
5880  | 0  |     R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));  | 
5881  | 0  |     if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */ | 
5882  | 0  |       R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */  | 
5883  | 0  |       R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */  | 
5884  | 0  |     }  | 
5885  | 0  |     else { | 
5886  | 0  |       BITSET_SET_BIT(cc->bs, 0x0a);  | 
5887  | 0  |       BITSET_SET_BIT(cc->bs, 0x0d);  | 
5888  | 0  |     }  | 
5889  |  |  | 
5890  |  |     /* precore* core postcore* */  | 
5891  | 0  |     { | 
5892  | 0  |       Node **list = alts + 3; /* size: 4 */  | 
5893  |  |  | 
5894  |  |       /* precore*; precore := Prepend */  | 
5895  | 0  |       R_ERR(quantify_property_node(list+0, env, "Grapheme_Cluster_Break=Prepend", '*'));  | 
5896  |  |  | 
5897  |  |       /* core := hangul-syllable  | 
5898  |  |        *       | ri-sequence  | 
5899  |  |        *       | xpicto-sequence  | 
5900  |  |        *       | [^Control CR LF] */  | 
5901  | 0  |       { | 
5902  | 0  |         Node **core_alts = list + 2; /* size: 7 */  | 
5903  |  |  | 
5904  |  |         /* hangul-syllable :=  | 
5905  |  |          *     L* (V+ | LV V* | LVT) T*  | 
5906  |  |          *   | L+  | 
5907  |  |          *   | T+ */  | 
5908  |  |         /* hangul-syllable is an alternative (would be called H_alt)  | 
5909  |  |          * inside an alternative, but we flatten it into core_alts */  | 
5910  |  |  | 
5911  |  |         /* L* (V+ | LV V* | LVT) T* */  | 
5912  | 0  |         { | 
5913  | 0  |           Node **H_list = core_alts + 1; /* size: 4 */  | 
5914  | 0  |           R_ERR(quantify_property_node(H_list+0, env, "Grapheme_Cluster_Break=L", '*'));  | 
5915  |  |  | 
5916  |  |           /* V+ | LV V* | LVT */  | 
5917  | 0  |           { | 
5918  | 0  |             Node **H_alt2 = H_list + 2; /* size: 4 */  | 
5919  | 0  |             R_ERR(quantify_property_node(H_alt2+0, env, "Grapheme_Cluster_Break=V", '+'));  | 
5920  |  |  | 
5921  |  |             /* LV V* */  | 
5922  | 0  |             { | 
5923  | 0  |               Node **H_list2 = H_alt2 + 2; /* size: 3 */  | 
5924  |  | 
  | 
5925  | 0  |               R_ERR(create_property_node(H_list2+0, env, "Grapheme_Cluster_Break=LV"));  | 
5926  | 0  |               R_ERR(quantify_property_node(H_list2+1, env, "Grapheme_Cluster_Break=V", '*'));  | 
5927  | 0  |               R_ERR(create_node_from_array(LIST, H_alt2+1, H_list2));  | 
5928  | 0  |             }  | 
5929  |  |  | 
5930  | 0  |             R_ERR(create_property_node(H_alt2+2, env, "Grapheme_Cluster_Break=LVT"));  | 
5931  | 0  |             R_ERR(create_node_from_array(ALT, H_list+1, H_alt2));  | 
5932  | 0  |           }  | 
5933  |  |  | 
5934  | 0  |           R_ERR(quantify_property_node(H_list+2, env, "Grapheme_Cluster_Break=T", '*'));  | 
5935  | 0  |           R_ERR(create_node_from_array(LIST, core_alts+0, H_list));  | 
5936  | 0  |         }  | 
5937  |  |  | 
5938  | 0  |         R_ERR(quantify_property_node(core_alts+1, env, "Grapheme_Cluster_Break=L", '+'));  | 
5939  | 0  |         R_ERR(quantify_property_node(core_alts+2, env, "Grapheme_Cluster_Break=T", '+'));  | 
5940  |  |         /* end of hangul-syllable */  | 
5941  |  |  | 
5942  |  |         /* ri-sequence := RI RI */  | 
5943  | 0  |         R_ERR(quantify_property_node(core_alts+3, env, "Regional_Indicator", '2'));  | 
5944  |  |  | 
5945  |  |         /* xpicto-sequence := \p{Extended_Pictographic} (Extend* ZWJ \p{Extended_Pictographic})* */ | 
5946  | 0  |         { | 
5947  | 0  |           Node **XP_list = core_alts + 5; /* size: 3 */  | 
5948  | 0  |           R_ERR(create_property_node(XP_list+0, env, "Extended_Pictographic"));  | 
5949  |  |  | 
5950  |  |           /* (Extend* ZWJ \p{Extended_Pictographic})* */ | 
5951  | 0  |           { | 
5952  | 0  |             Node **Ex_list = XP_list + 2; /* size: 4 */  | 
5953  |  |             /* assert(Ex_list+4 == node_common+NODE_COMMON_SIZE); */  | 
5954  | 0  |             R_ERR(quantify_property_node(Ex_list+0, env, "Grapheme_Cluster_Break=Extend", '*'));  | 
5955  |  |  | 
5956  |  |             /* ZWJ (ZERO WIDTH JOINER) */  | 
5957  | 0  |             r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);  | 
5958  | 0  |             if (r < 0) goto err;  | 
5959  | 0  |             Ex_list[1] = node_new_str_raw(buf, buf + r);  | 
5960  | 0  |             if (IS_NULL(Ex_list[1])) goto err;  | 
5961  |  |  | 
5962  | 0  |             R_ERR(create_property_node(Ex_list+2, env, "Extended_Pictographic"));  | 
5963  | 0  |             R_ERR(create_node_from_array(LIST, XP_list+1, Ex_list));  | 
5964  | 0  |           }  | 
5965  | 0  |           R_ERR(quantify_node(XP_list+1, 0, REPEAT_INFINITE)); /* TODO: Check about node freeing */  | 
5966  |  |  | 
5967  | 0  |           R_ERR(create_node_from_array(LIST, core_alts+4, XP_list));  | 
5968  | 0  |         }  | 
5969  |  |  | 
5970  |  |         /* [^Control CR LF] */  | 
5971  | 0  |         core_alts[5] = node_new_cclass();  | 
5972  | 0  |         if (IS_NULL(core_alts[5])) goto err;  | 
5973  | 0  |         cc = NCCLASS(core_alts[5]);  | 
5974  | 0  |         if (ONIGENC_MBC_MINLEN(env->enc) > 1) { /* UTF-16/UTF-32 */ | 
5975  | 0  |           BBuf *inverted_buf = NULL;  | 
5976  |  |  | 
5977  |  |           /* TODO: fix false warning */  | 
5978  | 0  |           const int dup_not_warned = env->warnings_flag | ~ONIG_SYN_WARN_CC_DUP;  | 
5979  | 0  |           env->warnings_flag |= ONIG_SYN_WARN_CC_DUP;  | 
5980  |  |  | 
5981  |  |           /* Start with a positive buffer and invert at the end.  | 
5982  |  |            * Otherwise, adding single-character ranges work the wrong way. */  | 
5983  | 0  |           R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 0, env));  | 
5984  | 0  |           R_ERR(add_code_range(&(cc->mbuf), env, 0x000A, 0x000A)); /* CR */  | 
5985  | 0  |           R_ERR(add_code_range(&(cc->mbuf), env, 0x000D, 0x000D)); /* LF */  | 
5986  | 0  |           R_ERR(not_code_range_buf(env->enc, cc->mbuf, &inverted_buf, env));  | 
5987  | 0  |           cc->mbuf = inverted_buf; /* TODO: check what to do with buffer before inversion */  | 
5988  |  | 
  | 
5989  | 0  |           env->warnings_flag &= dup_not_warned; /* TODO: fix false warning */  | 
5990  | 0  |         }  | 
5991  | 0  |         else { | 
5992  | 0  |           R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=Control", 1, env));  | 
5993  | 0  |           BITSET_CLEAR_BIT(cc->bs, 0x0a);  | 
5994  | 0  |           BITSET_CLEAR_BIT(cc->bs, 0x0d);  | 
5995  | 0  |         }  | 
5996  |  |  | 
5997  | 0  |         R_ERR(create_node_from_array(ALT, list+1, core_alts));  | 
5998  | 0  |       }  | 
5999  |  |  | 
6000  |  |       /* postcore*; postcore = [Extend ZWJ SpacingMark] */  | 
6001  | 0  |       R_ERR(create_property_node(list+2, env, "Grapheme_Cluster_Break=Extend"));  | 
6002  | 0  |       cc = NCCLASS(list[2]);  | 
6003  | 0  |       R_ERR(add_property_to_cc(cc, "Grapheme_Cluster_Break=SpacingMark", 0, env));  | 
6004  | 0  |       R_ERR(add_code_range(&(cc->mbuf), env, 0x200D, 0x200D));  | 
6005  | 0  |       R_ERR(quantify_node(list+2, 0, REPEAT_INFINITE));  | 
6006  |  |  | 
6007  | 0  |       R_ERR(create_node_from_array(LIST, alts+2, list));  | 
6008  | 0  |     }  | 
6009  |  |  | 
6010  | 0  |     any_target_position = 3;  | 
6011  | 0  |   }  | 
6012  | 0  |   else  | 
6013  | 0  | #endif /* USE_UNICODE_PROPERTIES */  | 
6014  | 0  |   { | 
6015  | 0  |     any_target_position = 1;  | 
6016  | 0  |   }  | 
6017  |  |  | 
6018  |  |   /* PerlSyntax: (?s:.), RubySyntax: (?m:.), common for both Unicode and non-Unicode */  | 
6019  |  |   /* Not in Unicode spec (UAX #29), but added to catch invalid stuff,  | 
6020  |  |    * because this is Ruby spec for String#grapheme_clusters. */  | 
6021  | 0  |   np1 = node_new_anychar();  | 
6022  | 0  |   if (IS_NULL(np1)) goto err;  | 
6023  |  |  | 
6024  | 0  |   option = env->option;  | 
6025  | 0  |   ONOFF(option, ONIG_OPTION_MULTILINE, 0);  | 
6026  | 0  |   tmp = node_new_option(option);  | 
6027  | 0  |   if (IS_NULL(tmp)) goto err;  | 
6028  | 0  |   NENCLOSE(tmp)->target = np1;  | 
6029  | 0  |   alts[any_target_position] = tmp;  | 
6030  | 0  |   np1 = NULL;  | 
6031  |  | 
  | 
6032  | 0  |   R_ERR(create_node_from_array(ALT, &top_alt, alts));  | 
6033  |  |  | 
6034  |  |   /* (?>): For efficiency, because there is no text piece  | 
6035  |  |    *       that is not in a grapheme cluster, and there is only one way  | 
6036  |  |    *       to split a string into grapheme clusters. */  | 
6037  | 0  |   tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);  | 
6038  | 0  |   if (IS_NULL(tmp)) goto err;  | 
6039  | 0  |   NENCLOSE(tmp)->target = top_alt;  | 
6040  | 0  |   np1 = tmp;  | 
6041  |  | 
  | 
6042  | 0  | #ifdef USE_UNICODE_PROPERTIES  | 
6043  | 0  |   if (ONIGENC_IS_UNICODE(env->enc)) { | 
6044  |  |     /* Don't ignore case. */  | 
6045  | 0  |     option = env->option;  | 
6046  | 0  |     ONOFF(option, ONIG_OPTION_IGNORECASE, 1);  | 
6047  | 0  |     *np = node_new_option(option);  | 
6048  | 0  |     if (IS_NULL(*np)) goto err;  | 
6049  | 0  |     NENCLOSE(*np)->target = np1;  | 
6050  | 0  |   }  | 
6051  | 0  |   else  | 
6052  | 0  | #endif  | 
6053  | 0  |   { | 
6054  | 0  |     *np = np1;  | 
6055  | 0  |   }  | 
6056  | 0  |   return ONIG_NORMAL;  | 
6057  |  |  | 
6058  | 0  |  err:  | 
6059  | 0  |   onig_node_free(np1);  | 
6060  | 0  |   for (i=0; i<NODE_COMMON_SIZE; i++)  | 
6061  | 0  |     onig_node_free(node_common[i]);  | 
6062  | 0  |   return (r == 0) ? ONIGERR_MEMORY : r;  | 
6063  | 0  | }  | 
6064  |  | #undef R_ERR  | 
6065  |  |  | 
6066  |  | static int  | 
6067  |  | countbits(unsigned int bits)  | 
6068  | 2.87k  | { | 
6069  | 2.87k  |   bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);  | 
6070  | 2.87k  |   bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);  | 
6071  | 2.87k  |   bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);  | 
6072  | 2.87k  |   bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);  | 
6073  | 2.87k  |   return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);  | 
6074  | 2.87k  | }  | 
6075  |  |  | 
6076  |  | static int  | 
6077  |  | is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)  | 
6078  | 6.16k  | { | 
6079  | 6.16k  |   const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;  | 
6080  | 6.16k  |   OnigCodePoint c = not_found;  | 
6081  | 6.16k  |   int i;  | 
6082  | 6.16k  |   BBuf *bbuf = cc->mbuf;  | 
6083  |  |  | 
6084  | 6.16k  |   if (IS_NCCLASS_NOT(cc)) return 0;  | 
6085  |  |  | 
6086  |  |   /* check bbuf */  | 
6087  | 3.69k  |   if (IS_NOT_NULL(bbuf)) { | 
6088  | 0  |     OnigCodePoint n, *data;  | 
6089  | 0  |     GET_CODE_POINT(n, bbuf->p);  | 
6090  | 0  |     data = (OnigCodePoint* )(bbuf->p) + 1;  | 
6091  | 0  |     if ((n == 1) && (data[0] == data[1])) { | 
6092  |  |       /* only one char found in the bbuf, save the code point. */  | 
6093  | 0  |       c = data[0];  | 
6094  | 0  |       if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) { | 
6095  |  |   /* skip if c is included in the bitset */  | 
6096  | 0  |   c = not_found;  | 
6097  | 0  |       }  | 
6098  | 0  |     }  | 
6099  | 0  |     else { | 
6100  | 0  |       return 0;  /* the bbuf contains multiple chars */  | 
6101  | 0  |     }  | 
6102  | 0  |   }  | 
6103  |  |  | 
6104  |  |   /* check bitset */  | 
6105  | 6.57k  |   for (i = 0; i < BITSET_SIZE; i++) { | 
6106  | 6.57k  |     Bits b1 = cc->bs[i];  | 
6107  | 6.57k  |     if (b1 != 0) { | 
6108  | 6.57k  |       if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { | 
6109  | 2.87k  |   c = BITS_IN_ROOM * i + countbits(b1 - 1);  | 
6110  | 3.69k  |       } else { | 
6111  | 3.69k  |   return 0;  /* the character class contains multiple chars */  | 
6112  | 3.69k  |       }  | 
6113  | 6.57k  |     }  | 
6114  | 6.57k  |   }  | 
6115  |  |  | 
6116  | 0  |   if (c != not_found) { | 
6117  | 0  |     *code = c;  | 
6118  | 0  |     return 1;  | 
6119  | 0  |   }  | 
6120  |  |  | 
6121  |  |   /* the character class contains no char. */  | 
6122  | 0  |   return 0;  | 
6123  | 0  | }  | 
6124  |  |  | 
6125  |  |  | 
6126  |  | static int  | 
6127  |  | parse_exp(Node** np, OnigToken* tok, int term,  | 
6128  |  |     UChar** src, UChar* end, ScanEnv* env)  | 
6129  | 48.4k  | { | 
6130  | 48.4k  |   int r, len, group = 0;  | 
6131  | 48.4k  |   Node* qn;  | 
6132  | 48.4k  |   Node** targetp;  | 
6133  | 48.4k  |   unsigned int parse_depth;  | 
6134  |  |  | 
6135  | 48.4k  |   *np = NULL;  | 
6136  | 48.4k  |   if (tok->type == (enum TokenSyms )term)  | 
6137  | 411  |     goto end_of_token;  | 
6138  |  |  | 
6139  | 48.0k  |   parse_depth = env->parse_depth;  | 
6140  |  |  | 
6141  | 48.0k  |   switch (tok->type) { | 
6142  | 0  |   case TK_ALT:  | 
6143  | 0  |   case TK_EOT:  | 
6144  | 411  |   end_of_token:  | 
6145  | 411  |     *np = node_new_empty();  | 
6146  | 411  |     return tok->type;  | 
6147  | 0  |     break;  | 
6148  |  |  | 
6149  | 4.93k  |   case TK_SUBEXP_OPEN:  | 
6150  | 4.93k  |     r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);  | 
6151  | 4.93k  |     if (r < 0) return r;  | 
6152  | 4.93k  |     if (r == 1) group = 1;  | 
6153  | 2.46k  |     else if (r == 2) { /* option only */ | 
6154  | 0  |       Node* target;  | 
6155  | 0  |       OnigOptionType prev = env->option;  | 
6156  |  | 
  | 
6157  | 0  |       env->option = NENCLOSE(*np)->option;  | 
6158  | 0  |       r = fetch_token(tok, src, end, env);  | 
6159  | 0  |       if (r < 0) { | 
6160  | 0  |   env->option = prev;  | 
6161  | 0  |   return r;  | 
6162  | 0  |       }  | 
6163  | 0  |       r = parse_subexp(&target, tok, term, src, end, env);  | 
6164  | 0  |       env->option = prev;  | 
6165  | 0  |       if (r < 0) { | 
6166  | 0  |   onig_node_free(target);  | 
6167  | 0  |   return r;  | 
6168  | 0  |       }  | 
6169  | 0  |       NENCLOSE(*np)->target = target;  | 
6170  | 0  |       return tok->type;  | 
6171  | 0  |     }  | 
6172  | 4.93k  |     break;  | 
6173  |  |  | 
6174  | 4.93k  |   case TK_SUBEXP_CLOSE:  | 
6175  | 0  |     if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))  | 
6176  | 0  |       return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;  | 
6177  |  |  | 
6178  | 0  |     if (tok->escaped) goto tk_raw_byte;  | 
6179  | 0  |     else goto tk_byte;  | 
6180  | 0  |     break;  | 
6181  |  |  | 
6182  | 0  |   case TK_LINEBREAK:  | 
6183  | 0  |     r = node_linebreak(np, env);  | 
6184  | 0  |     if (r < 0) return r;  | 
6185  | 0  |     break;  | 
6186  |  |  | 
6187  | 0  |   case TK_EXTENDED_GRAPHEME_CLUSTER:  | 
6188  | 0  |     r = node_extended_grapheme_cluster(np, env);  | 
6189  | 0  |     if (r < 0) return r;  | 
6190  | 0  |     break;  | 
6191  |  |  | 
6192  | 0  |   case TK_KEEP:  | 
6193  | 0  |     *np = onig_node_new_anchor(ANCHOR_KEEP);  | 
6194  | 0  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6195  | 0  |     break;  | 
6196  |  |  | 
6197  | 17.2k  |   case TK_STRING:  | 
6198  | 17.2k  |   tk_byte:  | 
6199  | 17.2k  |     { | 
6200  | 17.2k  |       *np = node_new_str(tok->backp, *src);  | 
6201  | 17.2k  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
6202  |  |  | 
6203  | 17.2k  |     string_loop:  | 
6204  | 151k  |       while (1) { | 
6205  | 151k  |   r = fetch_token(tok, src, end, env);  | 
6206  | 151k  |   if (r < 0) return r;  | 
6207  | 151k  |   if (r == TK_STRING) { | 
6208  | 133k  |     r = onig_node_str_cat(*np, tok->backp, *src);  | 
6209  | 133k  |   }  | 
6210  | 17.2k  | #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG  | 
6211  | 17.2k  |   else if (r == TK_CODE_POINT) { | 
6212  | 0  |     r = node_str_cat_codepoint(*np, env->enc, tok->u.code);  | 
6213  | 0  |   }  | 
6214  | 17.2k  | #endif  | 
6215  | 17.2k  |   else { | 
6216  | 17.2k  |     break;  | 
6217  | 17.2k  |   }  | 
6218  | 133k  |   if (r < 0) return r;  | 
6219  | 133k  |       }  | 
6220  |  |  | 
6221  | 17.2k  |     string_end:  | 
6222  | 17.2k  |       targetp = np;  | 
6223  | 17.2k  |       goto repeat;  | 
6224  | 17.2k  |     }  | 
6225  | 0  |     break;  | 
6226  |  |  | 
6227  | 0  |   case TK_RAW_BYTE:  | 
6228  | 0  |   tk_raw_byte:  | 
6229  | 0  |     { | 
6230  | 0  |       *np = node_new_str_raw_char((UChar )tok->u.c);  | 
6231  | 0  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
6232  | 0  |       len = 1;  | 
6233  | 0  |       while (1) { | 
6234  | 0  |   if (len >= ONIGENC_MBC_MINLEN(env->enc)) { | 
6235  | 0  |     if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { | 
6236  | 0  |       r = fetch_token(tok, src, end, env);  | 
6237  | 0  |       NSTRING_CLEAR_RAW(*np);  | 
6238  | 0  |       goto string_end;  | 
6239  | 0  |     }  | 
6240  | 0  |   }  | 
6241  |  |  | 
6242  | 0  |   r = fetch_token(tok, src, end, env);  | 
6243  | 0  |   if (r < 0) return r;  | 
6244  | 0  |   if (r != TK_RAW_BYTE) { | 
6245  |  |     /* Don't use this, it is wrong for little endian encodings. */  | 
6246  |  | #ifdef USE_PAD_TO_SHORT_BYTE_CHAR  | 
6247  |  |     int rem;  | 
6248  |  |     if (len < ONIGENC_MBC_MINLEN(env->enc)) { | 
6249  |  |       rem = ONIGENC_MBC_MINLEN(env->enc) - len;  | 
6250  |  |       (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);  | 
6251  |  |       if (len + rem == enclen(env->enc, NSTR(*np)->s)) { | 
6252  |  |         NSTRING_CLEAR_RAW(*np);  | 
6253  |  |         goto string_end;  | 
6254  |  |       }  | 
6255  |  |     }  | 
6256  |  | #endif  | 
6257  | 0  |     return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;  | 
6258  | 0  |   }  | 
6259  |  |  | 
6260  | 0  |   r = node_str_cat_char(*np, (UChar )tok->u.c);  | 
6261  | 0  |   if (r < 0) return r;  | 
6262  |  |  | 
6263  | 0  |   len++;  | 
6264  | 0  |       }  | 
6265  | 0  |     }  | 
6266  | 0  |     break;  | 
6267  |  |  | 
6268  | 0  |   case TK_CODE_POINT:  | 
6269  | 0  |     { | 
6270  | 0  |       *np = node_new_empty();  | 
6271  | 0  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
6272  | 0  |       r = node_str_cat_codepoint(*np, env->enc, tok->u.code);  | 
6273  | 0  |       if (r != 0) return r;  | 
6274  |  | #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG  | 
6275  |  |       NSTRING_SET_RAW(*np);  | 
6276  |  | #else  | 
6277  | 0  |       goto string_loop;  | 
6278  | 0  | #endif  | 
6279  | 0  |     }  | 
6280  | 0  |     break;  | 
6281  |  |  | 
6282  | 0  |   case TK_QUOTE_OPEN:  | 
6283  | 0  |     { | 
6284  | 0  |       OnigCodePoint end_op[2];  | 
6285  | 0  |       UChar *qstart, *qend, *nextp;  | 
6286  |  | 
  | 
6287  | 0  |       end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);  | 
6288  | 0  |       end_op[1] = (OnigCodePoint )'E';  | 
6289  | 0  |       qstart = *src;  | 
6290  | 0  |       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);  | 
6291  | 0  |       if (IS_NULL(qend)) { | 
6292  | 0  |   nextp = qend = end;  | 
6293  | 0  |       }  | 
6294  | 0  |       *np = node_new_str(qstart, qend);  | 
6295  | 0  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
6296  | 0  |       *src = nextp;  | 
6297  | 0  |     }  | 
6298  | 0  |     break;  | 
6299  |  |  | 
6300  | 3.69k  |   case TK_CHAR_TYPE:  | 
6301  | 3.69k  |     { | 
6302  | 3.69k  |       switch (tok->u.prop.ctype) { | 
6303  | 0  |       case ONIGENC_CTYPE_WORD:  | 
6304  | 0  |   *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,  | 
6305  | 0  |            IS_ASCII_RANGE(env->option));  | 
6306  | 0  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
6307  | 0  |   break;  | 
6308  |  |  | 
6309  | 2.05k  |       case ONIGENC_CTYPE_SPACE:  | 
6310  | 3.69k  |       case ONIGENC_CTYPE_DIGIT:  | 
6311  | 3.69k  |       case ONIGENC_CTYPE_XDIGIT:  | 
6312  | 3.69k  |   { | 
6313  | 3.69k  |     CClassNode* cc;  | 
6314  |  |  | 
6315  | 3.69k  |     *np = node_new_cclass();  | 
6316  | 3.69k  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6317  | 3.69k  |     cc = NCCLASS(*np);  | 
6318  | 3.69k  |     r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,  | 
6319  | 3.69k  |         IS_ASCII_RANGE(env->option), env);  | 
6320  | 3.69k  |     if (r != 0) return r;  | 
6321  | 3.69k  |     if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);  | 
6322  | 3.69k  |   }  | 
6323  | 0  |   break;  | 
6324  |  |  | 
6325  | 0  |       default:  | 
6326  | 0  |   return ONIGERR_PARSER_BUG;  | 
6327  | 0  |   break;  | 
6328  | 3.69k  |       }  | 
6329  | 3.69k  |     }  | 
6330  | 3.69k  |     break;  | 
6331  |  |  | 
6332  | 3.69k  |   case TK_CHAR_PROPERTY:  | 
6333  | 0  |     r = parse_char_property(np, tok, src, end, env);  | 
6334  | 0  |     if (r != 0) return r;  | 
6335  | 0  |     break;  | 
6336  |  |  | 
6337  | 6.16k  |   case TK_CC_OPEN:  | 
6338  | 6.16k  |     { | 
6339  | 6.16k  |       Node *asc_node;  | 
6340  | 6.16k  |       CClassNode* cc;  | 
6341  | 6.16k  |       OnigCodePoint code;  | 
6342  |  |  | 
6343  | 6.16k  |       r = parse_char_class(np, &asc_node, tok, src, end, env);  | 
6344  | 6.16k  |       if (r != 0) { | 
6345  | 0  |   onig_node_free(asc_node);  | 
6346  | 0  |   return r;  | 
6347  | 0  |       }  | 
6348  |  |  | 
6349  | 6.16k  |       cc = NCCLASS(*np);  | 
6350  | 6.16k  |       if (is_onechar_cclass(cc, &code)) { | 
6351  | 0  |   onig_node_free(*np);  | 
6352  | 0  |   onig_node_free(asc_node);  | 
6353  | 0  |   *np = node_new_empty();  | 
6354  | 0  |   CHECK_NULL_RETURN_MEMERR(*np);  | 
6355  | 0  |   r = node_str_cat_codepoint(*np, env->enc, code);  | 
6356  | 0  |   if (r != 0) return r;  | 
6357  | 0  |   goto string_loop;  | 
6358  | 0  |       }  | 
6359  | 6.16k  |       if (IS_IGNORECASE(env->option)) { | 
6360  | 0  |   r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);  | 
6361  | 0  |   if (r != 0) { | 
6362  | 0  |     onig_node_free(asc_node);  | 
6363  | 0  |     return r;  | 
6364  | 0  |   }  | 
6365  | 0  |       }  | 
6366  | 6.16k  |       onig_node_free(asc_node);  | 
6367  | 6.16k  |     }  | 
6368  | 0  |     break;  | 
6369  |  |  | 
6370  | 4.11k  |   case TK_ANYCHAR:  | 
6371  | 4.11k  |     *np = node_new_anychar();  | 
6372  | 4.11k  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6373  | 4.11k  |     break;  | 
6374  |  |  | 
6375  | 4.11k  |   case TK_ANYCHAR_ANYTIME:  | 
6376  | 0  |     *np = node_new_anychar();  | 
6377  | 0  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6378  | 0  |     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);  | 
6379  | 0  |     CHECK_NULL_RETURN_MEMERR(qn);  | 
6380  | 0  |     NQTFR(qn)->target = *np;  | 
6381  | 0  |     *np = qn;  | 
6382  | 0  |     break;  | 
6383  |  |  | 
6384  | 0  |   case TK_BACKREF:  | 
6385  | 0  |     len = tok->u.backref.num;  | 
6386  | 0  |     *np = node_new_backref(len,  | 
6387  | 0  |        (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),  | 
6388  | 0  |          tok->u.backref.by_name,  | 
6389  | 0  | #ifdef USE_BACKREF_WITH_LEVEL  | 
6390  | 0  |          tok->u.backref.exist_level,  | 
6391  | 0  |          tok->u.backref.level,  | 
6392  | 0  | #endif  | 
6393  | 0  |          env);  | 
6394  | 0  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6395  | 0  |     break;  | 
6396  |  |  | 
6397  | 0  | #ifdef USE_SUBEXP_CALL  | 
6398  | 0  |   case TK_CALL:  | 
6399  | 0  |     { | 
6400  | 0  |       int gnum = tok->u.call.gnum;  | 
6401  |  | 
  | 
6402  | 0  |       if (gnum < 0 || tok->u.call.rel != 0) { | 
6403  | 0  |   if (gnum > 0) gnum--;  | 
6404  | 0  |   gnum = BACKREF_REL_TO_ABS(gnum, env);  | 
6405  | 0  |   if (gnum <= 0)  | 
6406  | 0  |     return ONIGERR_INVALID_BACKREF;  | 
6407  | 0  |       }  | 
6408  | 0  |       *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);  | 
6409  | 0  |       CHECK_NULL_RETURN_MEMERR(*np);  | 
6410  | 0  |       env->num_call++;  | 
6411  | 0  |     }  | 
6412  | 0  |     break;  | 
6413  | 0  | #endif  | 
6414  |  |  | 
6415  | 11.9k  |   case TK_ANCHOR:  | 
6416  | 11.9k  |     *np = onig_node_new_anchor(tok->u.anchor.subtype);  | 
6417  | 11.9k  |     CHECK_NULL_RETURN_MEMERR(*np);  | 
6418  | 11.9k  |     NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;  | 
6419  | 11.9k  |     break;  | 
6420  |  |  | 
6421  | 0  |   case TK_OP_REPEAT:  | 
6422  | 0  |   case TK_INTERVAL:  | 
6423  | 0  |     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { | 
6424  | 0  |       if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))  | 
6425  | 0  |   return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;  | 
6426  | 0  |       else  | 
6427  | 0  |   *np = node_new_empty();  | 
6428  | 0  |     }  | 
6429  | 0  |     else { | 
6430  | 0  |       goto tk_byte;  | 
6431  | 0  |     }  | 
6432  | 0  |     break;  | 
6433  |  |  | 
6434  | 0  |   default:  | 
6435  | 0  |     return ONIGERR_PARSER_BUG;  | 
6436  | 0  |     break;  | 
6437  | 48.0k  |   }  | 
6438  |  |  | 
6439  | 30.8k  |   { | 
6440  | 30.8k  |     targetp = np;  | 
6441  |  |  | 
6442  | 43.1k  |   re_entry:  | 
6443  | 43.1k  |     r = fetch_token(tok, src, end, env);  | 
6444  | 43.1k  |     if (r < 0) return r;  | 
6445  |  |  | 
6446  | 60.4k  |   repeat:  | 
6447  | 60.4k  |     if (r == TK_OP_REPEAT || r == TK_INTERVAL) { | 
6448  | 12.3k  |       if (is_invalid_quantifier_target(*targetp))  | 
6449  | 0  |   return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;  | 
6450  |  |  | 
6451  | 12.3k  |       parse_depth++;  | 
6452  | 12.3k  |       if (parse_depth > ParseDepthLimit)  | 
6453  | 0  |   return ONIGERR_PARSE_DEPTH_LIMIT_OVER;  | 
6454  |  |  | 
6455  | 12.3k  |       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,  | 
6456  | 12.3k  |              (r == TK_INTERVAL ? 1 : 0));  | 
6457  | 12.3k  |       CHECK_NULL_RETURN_MEMERR(qn);  | 
6458  | 12.3k  |       NQTFR(qn)->greedy = tok->u.repeat.greedy;  | 
6459  | 12.3k  |       r = set_quantifier(qn, *targetp, group, env);  | 
6460  | 12.3k  |       if (r < 0) { | 
6461  | 0  |   onig_node_free(qn);  | 
6462  | 0  |   return r;  | 
6463  | 0  |       }  | 
6464  |  |  | 
6465  | 12.3k  |       if (tok->u.repeat.possessive != 0) { | 
6466  | 0  |   Node* en;  | 
6467  | 0  |   en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);  | 
6468  | 0  |   if (IS_NULL(en)) { | 
6469  | 0  |     onig_node_free(qn);  | 
6470  | 0  |     return ONIGERR_MEMORY;  | 
6471  | 0  |   }  | 
6472  | 0  |   NENCLOSE(en)->target = qn;  | 
6473  | 0  |   qn = en;  | 
6474  | 0  |       }  | 
6475  |  |  | 
6476  | 12.3k  |       if (r == 0) { | 
6477  | 12.3k  |   *targetp = qn;  | 
6478  | 12.3k  |       }  | 
6479  | 0  |       else if (r == 1) { | 
6480  | 0  |   onig_node_free(qn);  | 
6481  | 0  |       }  | 
6482  | 0  |       else if (r == 2) { /* split case: /abc+/ */ | 
6483  | 0  |   Node *tmp;  | 
6484  |  | 
  | 
6485  | 0  |   *targetp = node_new_list(*targetp, NULL);  | 
6486  | 0  |   if (IS_NULL(*targetp)) { | 
6487  | 0  |     onig_node_free(qn);  | 
6488  | 0  |     return ONIGERR_MEMORY;  | 
6489  | 0  |   }  | 
6490  | 0  |   tmp = NCDR(*targetp) = node_new_list(qn, NULL);  | 
6491  | 0  |   if (IS_NULL(tmp)) { | 
6492  | 0  |     onig_node_free(qn);  | 
6493  | 0  |     return ONIGERR_MEMORY;  | 
6494  | 0  |   }  | 
6495  | 0  |   targetp = &(NCAR(tmp));  | 
6496  | 0  |       }  | 
6497  | 12.3k  |       goto re_entry;  | 
6498  | 12.3k  |     }  | 
6499  | 60.4k  |   }  | 
6500  |  |  | 
6501  | 48.0k  |   return r;  | 
6502  | 60.4k  | }  | 
6503  |  |  | 
6504  |  | static int  | 
6505  |  | parse_branch(Node** top, OnigToken* tok, int term,  | 
6506  |  |        UChar** src, UChar* end, ScanEnv* env)  | 
6507  | 17.6k  | { | 
6508  | 17.6k  |   int r;  | 
6509  | 17.6k  |   Node *node, **headp;  | 
6510  |  |  | 
6511  | 17.6k  |   *top = NULL;  | 
6512  | 17.6k  |   r = parse_exp(&node, tok, term, src, end, env);  | 
6513  | 17.6k  |   if (r < 0) { | 
6514  | 0  |     onig_node_free(node);  | 
6515  | 0  |     return r;  | 
6516  | 0  |   }  | 
6517  |  |  | 
6518  | 17.6k  |   if (r == TK_EOT || r == term || r == TK_ALT) { | 
6519  | 7.80k  |     *top = node;  | 
6520  | 7.80k  |   }  | 
6521  | 9.86k  |   else { | 
6522  | 9.86k  |     *top  = node_new_list(node, NULL);  | 
6523  | 9.86k  |     headp = &(NCDR(*top));  | 
6524  | 40.6k  |     while (r != TK_EOT && r != term && r != TK_ALT) { | 
6525  | 30.8k  |       r = parse_exp(&node, tok, term, src, end, env);  | 
6526  | 30.8k  |       if (r < 0) { | 
6527  | 0  |   onig_node_free(node);  | 
6528  | 0  |   return r;  | 
6529  | 0  |       }  | 
6530  |  |  | 
6531  | 30.8k  |       if (NTYPE(node) == NT_LIST) { | 
6532  | 0  |   *headp = node;  | 
6533  | 0  |   while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);  | 
6534  | 0  |   headp = &(NCDR(node));  | 
6535  | 0  |       }  | 
6536  | 30.8k  |       else { | 
6537  | 30.8k  |   *headp = node_new_list(node, NULL);  | 
6538  | 30.8k  |   headp = &(NCDR(*headp));  | 
6539  | 30.8k  |       }  | 
6540  | 30.8k  |     }  | 
6541  | 9.86k  |   }  | 
6542  |  |  | 
6543  | 17.6k  |   return r;  | 
6544  | 17.6k  | }  | 
6545  |  |  | 
6546  |  | /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */  | 
6547  |  | static int  | 
6548  |  | parse_subexp(Node** top, OnigToken* tok, int term,  | 
6549  |  |        UChar** src, UChar* end, ScanEnv* env)  | 
6550  | 14.3k  | { | 
6551  | 14.3k  |   int r;  | 
6552  | 14.3k  |   Node *node, **headp;  | 
6553  |  |  | 
6554  | 14.3k  |   *top = NULL;  | 
6555  | 14.3k  |   env->parse_depth++;  | 
6556  | 14.3k  |   if (env->parse_depth > ParseDepthLimit)  | 
6557  | 0  |     return ONIGERR_PARSE_DEPTH_LIMIT_OVER;  | 
6558  | 14.3k  |   r = parse_branch(&node, tok, term, src, end, env);  | 
6559  | 14.3k  |   if (r < 0) { | 
6560  | 0  |     onig_node_free(node);  | 
6561  | 0  |     return r;  | 
6562  | 0  |   }  | 
6563  |  |  | 
6564  | 14.3k  |   if (r == term) { | 
6565  | 11.9k  |     *top = node;  | 
6566  | 11.9k  |   }  | 
6567  | 2.46k  |   else if (r == TK_ALT) { | 
6568  | 2.46k  |     *top  = onig_node_new_alt(node, NULL);  | 
6569  | 2.46k  |     headp = &(NCDR(*top));  | 
6570  | 5.75k  |     while (r == TK_ALT) { | 
6571  | 3.28k  |       r = fetch_token(tok, src, end, env);  | 
6572  | 3.28k  |       if (r < 0) return r;  | 
6573  | 3.28k  |       r = parse_branch(&node, tok, term, src, end, env);  | 
6574  | 3.28k  |       if (r < 0) { | 
6575  | 0  |   onig_node_free(node);  | 
6576  | 0  |   return r;  | 
6577  | 0  |       }  | 
6578  |  |  | 
6579  | 3.28k  |       *headp = onig_node_new_alt(node, NULL);  | 
6580  | 3.28k  |       headp = &(NCDR(*headp));  | 
6581  | 3.28k  |     }  | 
6582  |  |  | 
6583  | 2.46k  |     if (tok->type != (enum TokenSyms )term)  | 
6584  | 0  |       goto err;  | 
6585  | 2.46k  |   }  | 
6586  | 0  |   else { | 
6587  | 0  |     onig_node_free(node);  | 
6588  | 0  |   err:  | 
6589  | 0  |     if (term == TK_SUBEXP_CLOSE)  | 
6590  | 0  |       return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;  | 
6591  | 0  |     else  | 
6592  | 0  |       return ONIGERR_PARSER_BUG;  | 
6593  | 0  |   }  | 
6594  |  |  | 
6595  | 14.3k  |   env->parse_depth--;  | 
6596  | 14.3k  |   return r;  | 
6597  | 14.3k  | }  | 
6598  |  |  | 
6599  |  | static int  | 
6600  |  | parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)  | 
6601  | 9.45k  | { | 
6602  | 9.45k  |   int r;  | 
6603  | 9.45k  |   OnigToken tok;  | 
6604  |  |  | 
6605  | 9.45k  |   r = fetch_token(&tok, src, end, env);  | 
6606  | 9.45k  |   if (r < 0) return r;  | 
6607  | 9.45k  |   r = parse_subexp(top, &tok, TK_EOT, src, end, env);  | 
6608  | 9.45k  |   if (r < 0) return r;  | 
6609  |  |  | 
6610  | 9.45k  | #ifdef USE_SUBEXP_CALL  | 
6611  | 9.45k  |   if (env->num_call > 0) { | 
6612  |  |     /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */  | 
6613  | 0  |     const int num = 0;  | 
6614  | 0  |     Node* np;  | 
6615  | 0  |     np = node_new_enclose_memory(env->option, 0);  | 
6616  | 0  |     CHECK_NULL_RETURN_MEMERR(np);  | 
6617  | 0  |     NENCLOSE(np)->regnum = num;  | 
6618  | 0  |     NENCLOSE(np)->target = *top;  | 
6619  | 0  |     r = scan_env_set_mem_node(env, num, np);  | 
6620  | 0  |     if (r != 0) { | 
6621  | 0  |   onig_node_free(np);  | 
6622  | 0  |   return r;  | 
6623  | 0  |     }  | 
6624  | 0  |     *top = np;  | 
6625  | 0  |   }  | 
6626  | 9.45k  | #endif  | 
6627  | 9.45k  |   return 0;  | 
6628  | 9.45k  | }  | 
6629  |  |  | 
6630  |  | extern int  | 
6631  |  | onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,  | 
6632  |  |          regex_t* reg, ScanEnv* env)  | 
6633  | 9.45k  | { | 
6634  | 9.45k  |   int r;  | 
6635  | 9.45k  |   UChar* p;  | 
6636  |  |  | 
6637  | 9.45k  | #ifdef USE_NAMED_GROUP  | 
6638  | 9.45k  |   names_clear(reg);  | 
6639  | 9.45k  | #endif  | 
6640  |  |  | 
6641  | 9.45k  |   scan_env_clear(env);  | 
6642  | 9.45k  |   env->option         = reg->options;  | 
6643  | 9.45k  |   env->case_fold_flag = reg->case_fold_flag;  | 
6644  | 9.45k  |   env->enc            = reg->enc;  | 
6645  | 9.45k  |   env->syntax         = reg->syntax;  | 
6646  | 9.45k  |   env->pattern        = (UChar* )pattern;  | 
6647  | 9.45k  |   env->pattern_end    = (UChar* )end;  | 
6648  | 9.45k  |   env->reg            = reg;  | 
6649  |  |  | 
6650  | 9.45k  |   *root = NULL;  | 
6651  | 9.45k  |   p = (UChar* )pattern;  | 
6652  | 9.45k  |   r = parse_regexp(root, &p, (UChar* )end, env);  | 
6653  | 9.45k  |   reg->num_mem = env->num_mem;  | 
6654  | 9.45k  |   return r;  | 
6655  | 9.45k  | }  | 
6656  |  |  | 
6657  |  | extern void  | 
6658  |  | onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,  | 
6659  |  |         UChar* arg, UChar* arg_end)  | 
6660  | 0  | { | 
6661  | 0  |   env->error     = arg;  | 
6662  | 0  |   env->error_end = arg_end;  | 
6663  | 0  | }  |