Coverage Report

Created: 2024-02-25 06:12

/src/oniguruma/src/regparse.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regparse.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#ifdef DEBUG_ND_FREE
31
#ifndef NEED_TO_INCLUDE_STDIO
32
#define NEED_TO_INCLUDE_STDIO
33
#endif
34
#endif
35
36
#include "regparse.h"
37
#include "st.h"
38
39
430
#define INIT_TAG_NAMES_ALLOC_NUM   5
40
41
0
#define WARN_BUFSIZE    256
42
43
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44
45
#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46
984k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48
3.20k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49
50
10.6k
#define OPTON_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
51
33.2k
#define OPTON_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
52
134k
#define OPTON_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
53
8.17k
#define OPTON_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
54
#define OPTON_WORD_ASCII(option) \
55
58.0k
  ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56
#define OPTON_DIGIT_ASCII(option) \
57
2.32k
  ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58
#define OPTON_SPACE_ASCII(option) \
59
3.88k
  ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60
27.7k
#define OPTON_POSIX_ASCII(option)    ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61
8.24k
#define OPTON_TEXT_SEGMENT_WORD(option)  ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62
63
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64
62.0k
  ((ctype) >= 0 && \
65
62.0k
  (((ctype) < ONIGENC_CTYPE_ASCII  && OPTON_POSIX_ASCII(options)) ||\
66
28.7k
   ((ctype) == ONIGENC_CTYPE_WORD  && OPTON_WORD_ASCII(options))  ||\
67
28.7k
   ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68
28.7k
   ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69
70
71
OnigSyntaxType OnigSyntaxOniguruma = {
72
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
76
     ONIG_SYN_OP_ESC_C_CONTROL )
77
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79
      ONIG_SYN_OP2_OPTION_ONIGURUMA |
80
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83
      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84
      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |
85
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87
      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
91
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96
  , ( SYN_GNU_REGEX_BV |
97
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99
      ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103
      ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105
#ifdef USE_WHOLE_OPTIONS
106
      ONIG_SYN_WHOLE_OPTIONS |
107
#endif
108
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
109
    )
110
  , ONIG_OPTION_NONE
111
  ,
112
  {
113
      (OnigCodePoint )'\\'                       /* esc */
114
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
115
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
116
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
117
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
118
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
119
  }
120
};
121
122
OnigSyntaxType OnigSyntaxRuby = {
123
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
124
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
125
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
126
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
127
     ONIG_SYN_OP_ESC_C_CONTROL )
128
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
129
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
130
      ONIG_SYN_OP2_OPTION_RUBY |
131
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
132
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
133
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
134
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
135
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
136
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
137
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
138
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
139
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
140
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
141
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
142
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
143
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
144
  , ( SYN_GNU_REGEX_BV |
145
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
146
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
147
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
148
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
149
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
150
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
151
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
152
  , ONIG_OPTION_NONE
153
  ,
154
  {
155
      (OnigCodePoint )'\\'                       /* esc */
156
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
157
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
158
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
159
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
160
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
161
  }
162
};
163
164
OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
165
166
167
222k
#define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size))
168
169
7.03k
#define BB_EXPAND(buf,low) do{\
170
7.03k
  do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
171
7.03k
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
172
7.03k
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
173
7.03k
} while (0)
174
175
5.25M
#define BB_ENSURE_SIZE(buf,size) do{\
176
5.25M
  unsigned int new_alloc = (buf)->alloc;\
177
5.34M
  while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
178
5.25M
  if ((buf)->alloc != new_alloc) {\
179
97.3k
    (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
180
97.3k
    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
181
97.3k
    (buf)->alloc = new_alloc;\
182
97.3k
  }\
183
5.25M
} while (0)
184
185
15.9M
#define BB_WRITE(buf,pos,bytes,n) do{\
186
15.9M
  int used = (pos) + (n);\
187
15.9M
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
188
15.9M
  xmemcpy((buf)->p + (pos), (bytes), (n));\
189
15.9M
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
190
15.9M
} while (0)
191
192
#define BB_WRITE1(buf,pos,byte) do{\
193
  int used = (pos) + 1;\
194
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
195
  (buf)->p[(pos)] = (byte);\
196
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
197
} while (0)
198
199
#define BB_ADD(buf,bytes,n)       BB_WRITE((buf),(buf)->used,(bytes),(n))
200
#define BB_ADD1(buf,byte)         BB_WRITE1((buf),(buf)->used,(byte))
201
#define BB_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
202
#define BB_GET_OFFSET_POS(buf)    ((buf)->used)
203
204
/* from < to */
205
179k
#define BB_MOVE_RIGHT(buf,from,to,n) do {\
206
179k
  if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
207
179k
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
208
179k
  if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
209
179k
} while (0)
210
211
/* from > to */
212
#define BB_MOVE_LEFT(buf,from,to,n) do {\
213
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
214
} while (0)
215
216
/* from > to */
217
5.33k
#define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
218
5.33k
  xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
219
5.33k
  (buf)->used -= (from - to);\
220
5.33k
} while (0)
221
222
#define BB_INSERT(buf,pos,bytes,n) do {\
223
  if (pos >= (buf)->used) {\
224
    BB_WRITE(buf,pos,bytes,n);\
225
  }\
226
  else {\
227
    BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
228
    xmemcpy((buf)->p + (pos), (bytes), (n));\
229
  }\
230
} while (0)
231
232
#define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
233
234
235
typedef enum {
236
  CS_VALUE,
237
  CS_RANGE,
238
  CS_COMPLETE,
239
  CS_START
240
} CSTATE;
241
242
typedef enum {
243
  CV_UNDEF,
244
  CV_SB,
245
  CV_MB,
246
  CV_CPROP
247
} CVAL;
248
249
0
extern void onig_null_warn(const char* s ARG_UNUSED) { }
250
251
#ifdef DEFAULT_WARN_FUNCTION
252
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
253
#else
254
static OnigWarnFunc onig_warn = onig_null_warn;
255
#endif
256
257
#ifdef DEFAULT_VERB_WARN_FUNCTION
258
static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
259
#else
260
static OnigWarnFunc onig_verb_warn = onig_null_warn;
261
#endif
262
263
extern void onig_set_warn_func(OnigWarnFunc f)
264
0
{
265
0
  onig_warn = f;
266
0
}
267
268
extern void onig_set_verb_warn_func(OnigWarnFunc f)
269
0
{
270
0
  onig_verb_warn = f;
271
0
}
272
273
extern void
274
onig_warning(const char* s)
275
25.6k
{
276
25.6k
  if (onig_warn == onig_null_warn) return ;
277
278
0
  (*onig_warn)(s);
279
0
}
280
281
#define DEFAULT_MAX_CAPTURE_NUM   32767
282
283
static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
284
285
extern int
286
onig_set_capture_num_limit(int num)
287
0
{
288
0
  if (num < 0) return -1;
289
290
0
  MaxCaptureNum = num;
291
0
  return 0;
292
0
}
293
294
static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
295
296
extern unsigned int
297
onig_get_parse_depth_limit(void)
298
0
{
299
0
  return ParseDepthLimit;
300
0
}
301
302
extern int
303
onig_set_parse_depth_limit(unsigned int depth)
304
25.6k
{
305
25.6k
  if (depth == 0)
306
0
    ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
307
25.6k
  else
308
25.6k
    ParseDepthLimit = depth;
309
25.6k
  return 0;
310
25.6k
}
311
312
#ifdef ONIG_DEBUG_PARSE
313
#define INC_PARSE_DEPTH(d) do {\
314
  (d)++;\
315
  if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
316
  if ((d) > ParseDepthLimit) \
317
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
318
} while (0)
319
#else
320
325k
#define INC_PARSE_DEPTH(d) do {\
321
325k
  (d)++;\
322
325k
  if ((d) > ParseDepthLimit) \
323
325k
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
324
325k
} while (0)
325
#endif
326
327
218k
#define DEC_PARSE_DEPTH(d)  (d)--
328
329
330
static int
331
bbuf_init(BBuf* buf, int size)
332
222k
{
333
222k
  if (size <= 0) {
334
0
    size   = 0;
335
0
    buf->p = NULL;
336
0
  }
337
222k
  else {
338
222k
    buf->p = (UChar* )xmalloc(size);
339
222k
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
340
222k
  }
341
342
222k
  buf->alloc = size;
343
222k
  buf->used  = 0;
344
222k
  return 0;
345
222k
}
346
347
static void
348
bbuf_free(BBuf* bbuf)
349
225k
{
350
225k
  if (IS_NOT_NULL(bbuf)) {
351
222k
    if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
352
222k
    xfree(bbuf);
353
222k
  }
354
225k
}
355
356
static int
357
bbuf_clone(BBuf** rto, BBuf* from)
358
5.11k
{
359
5.11k
  int r;
360
5.11k
  BBuf *to;
361
362
5.11k
  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
363
5.11k
  CHECK_NULL_RETURN_MEMERR(to);
364
5.11k
  r = BB_INIT(to, from->alloc);
365
5.11k
  if (r != 0) {
366
0
    bbuf_free(to);
367
0
    *rto = 0;
368
0
    return r;
369
0
  }
370
5.11k
  to->used = from->used;
371
5.11k
  xmemcpy(to->p, from->p, from->used);
372
5.11k
  return 0;
373
5.11k
}
374
375
static int
376
backref_rel_to_abs(int rel_no, ParseEnv* env)
377
3.02k
{
378
3.02k
  if (rel_no > 0) {
379
752
    if (rel_no > ONIG_INT_MAX - env->num_mem)
380
4
      return ONIGERR_INVALID_BACKREF;
381
748
    return env->num_mem + rel_no;
382
752
  }
383
2.27k
  else {
384
2.27k
    return env->num_mem + 1 + rel_no;
385
2.27k
  }
386
3.02k
}
387
388
#define OPTION_ON(v,f)     ((v) |= (f))
389
#define OPTION_OFF(v,f)    ((v) &= ~(f))
390
391
6.15k
#define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
392
393
#define MBCODE_START_POS(enc) \
394
3.90k
  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
395
396
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
397
3.00k
  add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
398
399
2.86k
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
400
2.86k
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
401
2.86k
    r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
402
2.86k
    if (r != 0) return r;\
403
2.86k
  }\
404
2.86k
} while (0)
405
406
407
286
#define BITSET_IS_EMPTY(bs,empty) do {\
408
286
  int i;\
409
286
  empty = 1;\
410
1.47k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
411
1.41k
    if ((bs)[i] != 0) {\
412
224
      empty = 0; break;\
413
224
    }\
414
1.41k
  }\
415
286
} while (0)
416
417
static void
418
bitset_set_range(BitSetRef bs, int from, int to)
419
5.88k
{
420
5.88k
  int i;
421
403k
  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
422
397k
    BITSET_SET_BIT(bs, i);
423
397k
  }
424
5.88k
}
425
426
static void
427
bitset_invert(BitSetRef bs)
428
0
{
429
0
  int i;
430
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
431
0
}
432
433
static void
434
bitset_invert_to(BitSetRef from, BitSetRef to)
435
1.05k
{
436
1.05k
  int i;
437
9.45k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
438
1.05k
}
439
440
static void
441
bitset_and(BitSetRef dest, BitSetRef bs)
442
1.42k
{
443
1.42k
  int i;
444
12.8k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
445
1.42k
}
446
447
static void
448
bitset_or(BitSetRef dest, BitSetRef bs)
449
2.89k
{
450
2.89k
  int i;
451
26.0k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
452
2.89k
}
453
454
static void
455
bitset_copy(BitSetRef dest, BitSetRef bs)
456
0
{
457
0
  int i;
458
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
459
0
}
460
461
extern int
462
onig_strncmp(const UChar* s1, const UChar* s2, int n)
463
0
{
464
0
  int x;
465
466
0
  while (n-- > 0) {
467
0
    x = *s2++ - *s1++;
468
0
    if (x) return x;
469
0
  }
470
0
  return 0;
471
0
}
472
473
extern void
474
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
475
640k
{
476
640k
  int len = (int )(end - src);
477
640k
  if (len > 0) {
478
639k
    xmemcpy(dest, src, len);
479
639k
    dest[len] = (UChar )0;
480
639k
  }
481
640k
}
482
483
/* scan pattern methods */
484
130
#define PEND_VALUE   0
485
486
933k
#define PFETCH_READY  UChar* pfetch_prev
487
2.05M
#define PEND         (p < end ?  0 : 1)
488
142k
#define PUNFETCH     p = pfetch_prev
489
1.16k
#define PPREV        pfetch_prev
490
75.2k
#define PINC       do { \
491
75.2k
  pfetch_prev = p; \
492
75.2k
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
493
75.2k
} while (0)
494
1.15M
#define PFETCH(c)  do { \
495
1.15M
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
496
1.15M
  pfetch_prev = p; \
497
1.15M
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
498
1.15M
} while (0)
499
500
1.59k
#define PINC_S     do { \
501
1.59k
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
502
1.59k
} while (0)
503
163k
#define PFETCH_S(c) do { \
504
163k
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
505
163k
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
506
163k
} while (0)
507
508
344k
#define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
509
475k
#define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
510
511
static UChar*
512
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
513
            int capa)
514
60.5k
{
515
60.5k
  UChar* r;
516
60.5k
  ptrdiff_t dest_delta = dest_end - dest;
517
518
60.5k
  if (dest)
519
60.5k
    r = (UChar* )xrealloc(dest, capa + 1);
520
0
  else
521
0
    r = (UChar* )xmalloc(capa + 1);
522
523
60.5k
  CHECK_NULL_RETURN(r);
524
60.5k
  onig_strcpy(r + dest_delta, src, src_end);
525
60.5k
  return r;
526
60.5k
}
527
528
/* dest on static area */
529
static UChar*
530
strcat_capa_from_static(UChar* dest, UChar* dest_end,
531
                        const UChar* src, const UChar* src_end, int capa)
532
2.41k
{
533
2.41k
  UChar* r;
534
535
2.41k
  r = (UChar* )xmalloc(capa + 1);
536
2.41k
  CHECK_NULL_RETURN(r);
537
2.41k
  onig_strcpy(r, dest, dest_end);
538
2.41k
  onig_strcpy(r + (dest_end - dest), src, src_end);
539
2.41k
  return r;
540
2.41k
}
541
542
543
#ifdef USE_ST_LIBRARY
544
545
typedef struct {
546
  UChar* s;
547
  UChar* end;
548
} st_str_end_key;
549
550
static int
551
str_end_cmp(st_data_t ax, st_data_t ay)
552
6.58k
{
553
6.58k
  st_str_end_key* x = (st_str_end_key* )ax;
554
6.58k
  st_str_end_key* y = (st_str_end_key* )ay;
555
6.58k
  UChar *p, *q;
556
6.58k
  int c;
557
558
6.58k
  if ((x->end - x->s) != (y->end - y->s))
559
264
    return 1;
560
561
6.31k
  p = x->s;
562
6.31k
  q = y->s;
563
13.3k
  while (p < x->end) {
564
7.05k
    c = (int )*p - (int )*q;
565
7.05k
    if (c != 0) return c;
566
567
7.05k
    p++; q++;
568
7.05k
  }
569
570
6.31k
  return 0;
571
6.31k
}
572
573
static int
574
str_end_hash(st_data_t ax)
575
10.1k
{
576
10.1k
  st_str_end_key* x = (st_str_end_key* )ax;
577
10.1k
  UChar *p;
578
10.1k
  unsigned val = 0;
579
580
10.1k
  p = x->s;
581
27.1k
  while (p < x->end) {
582
17.0k
    val = val * 997 + (unsigned )*p++;
583
17.0k
  }
584
585
10.1k
  return (int) (val + (val >> 5));
586
10.1k
}
587
588
extern hash_table_type
589
onig_st_init_strend_table_with_size(int size)
590
1.92k
{
591
1.92k
  static struct st_hash_type hashType = {
592
1.92k
    str_end_cmp,
593
1.92k
    str_end_hash,
594
1.92k
  };
595
596
1.92k
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
597
1.92k
}
598
599
extern int
600
onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
601
                      const UChar* end_key, hash_data_type *value)
602
7.47k
{
603
7.47k
  st_str_end_key key;
604
605
7.47k
  key.s   = (UChar* )str_key;
606
7.47k
  key.end = (UChar* )end_key;
607
608
7.47k
  return onig_st_lookup(table, (st_data_t )(&key), value);
609
7.47k
}
610
611
extern int
612
onig_st_insert_strend(hash_table_type table, const UChar* str_key,
613
                      const UChar* end_key, hash_data_type value)
614
2.64k
{
615
2.64k
  st_str_end_key* key;
616
2.64k
  int result;
617
618
2.64k
  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
619
2.64k
  CHECK_NULL_RETURN_MEMERR(key);
620
621
2.64k
  key->s   = (UChar* )str_key;
622
2.64k
  key->end = (UChar* )end_key;
623
2.64k
  result = onig_st_insert(table, (st_data_t )key, value);
624
2.64k
  if (result) {
625
0
    xfree(key);
626
0
  }
627
2.64k
  return result;
628
2.64k
}
629
630
631
#ifdef USE_CALLOUT
632
633
typedef struct {
634
  OnigEncoding enc;
635
  int    type; /* callout type: single or not */
636
  UChar* s;
637
  UChar* end;
638
} st_callout_name_key;
639
640
static int
641
callout_name_table_cmp(st_data_t ax, st_data_t ay)
642
3.15k
{
643
3.15k
  st_callout_name_key* x = (st_callout_name_key* )ax;
644
3.15k
  st_callout_name_key* y = (st_callout_name_key* )ay;
645
3.15k
  UChar *p, *q;
646
3.15k
  int c;
647
648
3.15k
  if (x->enc  != y->enc)  return 1;
649
3.15k
  if (x->type != y->type) return 1;
650
3.15k
  if ((x->end - x->s) != (y->end - y->s))
651
0
    return 1;
652
653
3.15k
  p = x->s;
654
3.15k
  q = y->s;
655
13.8k
  while (p < x->end) {
656
10.6k
    c = (int )*p - (int )*q;
657
10.6k
    if (c != 0) return c;
658
659
10.6k
    p++; q++;
660
10.6k
  }
661
662
3.15k
  return 0;
663
3.15k
}
664
665
static int
666
callout_name_table_hash(st_data_t ax)
667
325k
{
668
325k
  st_callout_name_key* x = (st_callout_name_key* )ax;
669
325k
  UChar *p;
670
325k
  unsigned int val = 0;
671
672
325k
  p = x->s;
673
2.25M
  while (p < x->end) {
674
1.92M
    val = val * 997 + (unsigned int )*p++;
675
1.92M
  }
676
677
  /* use intptr_t for escape warning in Windows */
678
325k
  return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
679
325k
}
680
681
extern hash_table_type
682
onig_st_init_callout_name_table_with_size(int size)
683
24.5k
{
684
24.5k
  static struct st_hash_type hashType = {
685
24.5k
    callout_name_table_cmp,
686
24.5k
    callout_name_table_hash,
687
24.5k
  };
688
689
24.5k
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
690
24.5k
}
691
692
extern int
693
onig_st_lookup_callout_name_table(hash_table_type table,
694
                                  OnigEncoding enc,
695
                                  int type,
696
                                  const UChar* str_key,
697
                                  const UChar* end_key,
698
                                  hash_data_type *value)
699
153k
{
700
153k
  st_callout_name_key key;
701
702
153k
  key.enc  = enc;
703
153k
  key.type = type;
704
153k
  key.s    = (UChar* )str_key;
705
153k
  key.end  = (UChar* )end_key;
706
707
153k
  return onig_st_lookup(table, (st_data_t )(&key), value);
708
153k
}
709
710
static int
711
st_insert_callout_name_table(hash_table_type table,
712
                             OnigEncoding enc, int type,
713
                             UChar* str_key, UChar* end_key,
714
                             hash_data_type value)
715
171k
{
716
171k
  st_callout_name_key* key;
717
171k
  int result;
718
719
171k
  key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
720
171k
  CHECK_NULL_RETURN_MEMERR(key);
721
722
  /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
723
171k
  key->enc  = enc;
724
171k
  key->type = type;
725
171k
  key->s    = str_key;
726
171k
  key->end  = end_key;
727
171k
  result = onig_st_insert(table, (st_data_t )key, value);
728
171k
  if (result) {
729
0
    xfree(key);
730
0
  }
731
171k
  return result;
732
171k
}
733
#endif
734
735
#endif /* USE_ST_LIBRARY */
736
737
738
1.04k
#define INIT_NAME_BACKREFS_ALLOC_NUM   8
739
740
typedef struct {
741
  UChar* name;
742
  int    name_len;   /* byte length */
743
  int    back_num;   /* number of backrefs */
744
  int    back_alloc;
745
  int    back_ref1;
746
  int*   back_refs;
747
} NameEntry;
748
749
#ifdef USE_ST_LIBRARY
750
751
26.0k
#define INIT_NAMES_ALLOC_NUM    5
752
753
typedef st_table  NameTable;
754
typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
755
756
#define NAMEBUF_SIZE    24
757
#define NAMEBUF_SIZE_1  25
758
759
#ifdef ONIG_DEBUG
760
static int
761
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
762
{
763
  int i;
764
  FILE* fp = (FILE* )arg;
765
766
  fprintf(fp, "%s: ", e->name);
767
  if (e->back_num == 0)
768
    fputs("-", fp);
769
  else if (e->back_num == 1)
770
    fprintf(fp, "%d", e->back_ref1);
771
  else {
772
    for (i = 0; i < e->back_num; i++) {
773
      if (i > 0) fprintf(fp, ", ");
774
      fprintf(fp, "%d", e->back_refs[i]);
775
    }
776
  }
777
  fputs("\n", fp);
778
  return ST_CONTINUE;
779
}
780
781
extern int
782
onig_print_names(FILE* fp, regex_t* reg)
783
{
784
  NameTable* t = (NameTable* )reg->name_table;
785
786
  if (IS_NOT_NULL(t)) {
787
    fprintf(fp, "name table\n");
788
    onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
789
    fputs("\n", fp);
790
  }
791
  return 0;
792
}
793
#endif /* ONIG_DEBUG */
794
795
static int
796
i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
797
2.17k
{
798
2.17k
  xfree(e->name);
799
2.17k
  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
800
2.17k
  xfree(key);
801
2.17k
  xfree(e);
802
2.17k
  return ST_DELETE;
803
2.17k
}
804
805
static int
806
names_clear(regex_t* reg)
807
102k
{
808
102k
  NameTable* t = (NameTable* )reg->name_table;
809
810
102k
  if (IS_NOT_NULL(t)) {
811
1.49k
    onig_st_foreach(t, i_free_name_entry, 0);
812
1.49k
  }
813
102k
  return 0;
814
102k
}
815
816
extern int
817
onig_names_free(regex_t* reg)
818
51.3k
{
819
51.3k
  int r;
820
51.3k
  NameTable* t;
821
822
51.3k
  r = names_clear(reg);
823
51.3k
  if (r != 0) return r;
824
825
51.3k
  t = (NameTable* )reg->name_table;
826
51.3k
  if (IS_NOT_NULL(t)) onig_st_free_table(t);
827
51.3k
  reg->name_table = (void* )NULL;
828
51.3k
  return 0;
829
51.3k
}
830
831
static NameEntry*
832
name_find(regex_t* reg, const UChar* name, const UChar* name_end)
833
8.34k
{
834
8.34k
  NameEntry* e;
835
8.34k
  NameTable* t = (NameTable* )reg->name_table;
836
837
8.34k
  e = (NameEntry* )NULL;
838
8.34k
  if (IS_NOT_NULL(t)) {
839
6.80k
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
840
6.80k
  }
841
8.34k
  return e;
842
8.34k
}
843
844
typedef struct {
845
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
846
  regex_t* reg;
847
  void* arg;
848
  int ret;
849
  OnigEncoding enc;
850
} INamesArg;
851
852
static int
853
i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
854
0
{
855
0
  int r = (*(arg->func))(e->name,
856
0
                         e->name + e->name_len,
857
0
                         e->back_num,
858
0
                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
859
0
                         arg->reg, arg->arg);
860
0
  if (r != 0) {
861
0
    arg->ret = r;
862
0
    return ST_STOP;
863
0
  }
864
0
  return ST_CONTINUE;
865
0
}
866
867
extern int
868
onig_foreach_name(regex_t* reg,
869
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
870
0
{
871
0
  INamesArg narg;
872
0
  NameTable* t = (NameTable* )reg->name_table;
873
874
0
  narg.ret = 0;
875
0
  if (IS_NOT_NULL(t)) {
876
0
    narg.func = func;
877
0
    narg.reg  = reg;
878
0
    narg.arg  = arg;
879
0
    narg.enc  = reg->enc; /* should be pattern encoding. */
880
0
    onig_st_foreach(t, i_names, (HashDataType )&narg);
881
0
  }
882
0
  return narg.ret;
883
0
}
884
885
static int
886
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
887
508
{
888
508
  int i;
889
890
508
  if (e->back_num > 1) {
891
940
    for (i = 0; i < e->back_num; i++) {
892
740
      e->back_refs[i] = map[e->back_refs[i]].new_val;
893
740
    }
894
200
  }
895
308
  else if (e->back_num == 1) {
896
308
    e->back_ref1 = map[e->back_ref1].new_val;
897
308
  }
898
899
508
  return ST_CONTINUE;
900
508
}
901
902
extern int
903
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
904
320
{
905
320
  NameTable* t = (NameTable* )reg->name_table;
906
907
320
  if (IS_NOT_NULL(t)) {
908
320
    onig_st_foreach(t, i_renumber_name, (HashDataType )map);
909
320
  }
910
320
  return 0;
911
320
}
912
913
914
extern int
915
onig_number_of_names(regex_t* reg)
916
0
{
917
0
  NameTable* t = (NameTable* )reg->name_table;
918
919
0
  if (IS_NOT_NULL(t))
920
0
    return t->num_entries;
921
0
  else
922
0
    return 0;
923
0
}
924
925
#else  /* USE_ST_LIBRARY */
926
927
#define INIT_NAMES_ALLOC_NUM    8
928
929
typedef struct {
930
  NameEntry* e;
931
  int        num;
932
  int        alloc;
933
} NameTable;
934
935
#ifdef ONIG_DEBUG
936
extern int
937
onig_print_names(FILE* fp, regex_t* reg)
938
{
939
  int i, j;
940
  NameEntry* e;
941
  NameTable* t = (NameTable* )reg->name_table;
942
943
  if (IS_NOT_NULL(t) && t->num > 0) {
944
    fprintf(fp, "name table\n");
945
    for (i = 0; i < t->num; i++) {
946
      e = &(t->e[i]);
947
      fprintf(fp, "%s: ", e->name);
948
      if (e->back_num == 0) {
949
        fputs("-", fp);
950
      }
951
      else if (e->back_num == 1) {
952
        fprintf(fp, "%d", e->back_ref1);
953
      }
954
      else {
955
        for (j = 0; j < e->back_num; j++) {
956
          if (j > 0) fprintf(fp, ", ");
957
          fprintf(fp, "%d", e->back_refs[j]);
958
        }
959
      }
960
      fputs("\n", fp);
961
    }
962
    fputs("\n", fp);
963
  }
964
  return 0;
965
}
966
#endif
967
968
static int
969
names_clear(regex_t* reg)
970
{
971
  int i;
972
  NameEntry* e;
973
  NameTable* t = (NameTable* )reg->name_table;
974
975
  if (IS_NOT_NULL(t)) {
976
    for (i = 0; i < t->num; i++) {
977
      e = &(t->e[i]);
978
      if (IS_NOT_NULL(e->name)) {
979
        xfree(e->name);
980
        e->name       = NULL;
981
        e->name_len   = 0;
982
        e->back_num   = 0;
983
        e->back_alloc = 0;
984
        if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
985
        e->back_refs = (int* )NULL;
986
      }
987
    }
988
    if (IS_NOT_NULL(t->e)) {
989
      xfree(t->e);
990
      t->e = NULL;
991
    }
992
    t->num = 0;
993
  }
994
  return 0;
995
}
996
997
extern int
998
onig_names_free(regex_t* reg)
999
{
1000
  int r;
1001
  NameTable* t;
1002
1003
  r = names_clear(reg);
1004
  if (r != 0) return r;
1005
1006
  t = (NameTable* )reg->name_table;
1007
  if (IS_NOT_NULL(t)) xfree(t);
1008
  reg->name_table = NULL;
1009
  return 0;
1010
}
1011
1012
static NameEntry*
1013
name_find(regex_t* reg, UChar* name, UChar* name_end)
1014
{
1015
  int i, len;
1016
  NameEntry* e;
1017
  NameTable* t = (NameTable* )reg->name_table;
1018
1019
  if (IS_NOT_NULL(t)) {
1020
    len = name_end - name;
1021
    for (i = 0; i < t->num; i++) {
1022
      e = &(t->e[i]);
1023
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1024
        return e;
1025
    }
1026
  }
1027
  return (NameEntry* )NULL;
1028
}
1029
1030
extern int
1031
onig_foreach_name(regex_t* reg,
1032
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1033
{
1034
  int i, r;
1035
  NameEntry* e;
1036
  NameTable* t = (NameTable* )reg->name_table;
1037
1038
  if (IS_NOT_NULL(t)) {
1039
    for (i = 0; i < t->num; i++) {
1040
      e = &(t->e[i]);
1041
      r = (*func)(e->name, e->name + e->name_len, e->back_num,
1042
                  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1043
                  reg, arg);
1044
      if (r != 0) return r;
1045
    }
1046
  }
1047
  return 0;
1048
}
1049
1050
extern int
1051
onig_number_of_names(regex_t* reg)
1052
{
1053
  NameTable* t = (NameTable* )reg->name_table;
1054
1055
  if (IS_NOT_NULL(t))
1056
    return t->num;
1057
  else
1058
    return 0;
1059
}
1060
1061
#endif /* else USE_ST_LIBRARY */
1062
1063
static int
1064
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1065
6.52k
{
1066
6.52k
  int r;
1067
6.52k
  int alloc;
1068
6.52k
  NameEntry* e;
1069
6.52k
  NameTable* t = (NameTable* )reg->name_table;
1070
1071
6.52k
  if (name_end - name <= 0)
1072
0
    return ONIGERR_EMPTY_GROUP_NAME;
1073
1074
6.52k
  e = name_find(reg, name, name_end);
1075
6.52k
  if (IS_NULL(e)) {
1076
2.17k
#ifdef USE_ST_LIBRARY
1077
2.17k
    if (IS_NULL(t)) {
1078
1.49k
      t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1079
1.49k
      CHECK_NULL_RETURN_MEMERR(t);
1080
1.49k
      reg->name_table = (void* )t;
1081
1.49k
    }
1082
2.17k
    e = (NameEntry* )xmalloc(sizeof(NameEntry));
1083
2.17k
    CHECK_NULL_RETURN_MEMERR(e);
1084
1085
2.17k
    e->name = onigenc_strdup(reg->enc, name, name_end);
1086
2.17k
    if (IS_NULL(e->name)) {
1087
0
      xfree(e);  return ONIGERR_MEMORY;
1088
0
    }
1089
2.17k
    r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1090
2.17k
                              (HashDataType )e);
1091
2.17k
    if (r < 0) return r;
1092
1093
2.17k
    e->name_len   = (int )(name_end - name);
1094
2.17k
    e->back_num   = 0;
1095
2.17k
    e->back_alloc = 0;
1096
2.17k
    e->back_refs  = (int* )NULL;
1097
1098
#else
1099
1100
    if (IS_NULL(t)) {
1101
      alloc = INIT_NAMES_ALLOC_NUM;
1102
      t = (NameTable* )xmalloc(sizeof(NameTable));
1103
      CHECK_NULL_RETURN_MEMERR(t);
1104
      t->e     = NULL;
1105
      t->alloc = 0;
1106
      t->num   = 0;
1107
1108
      t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1109
      if (IS_NULL(t->e)) {
1110
        xfree(t);
1111
        return ONIGERR_MEMORY;
1112
      }
1113
      t->alloc = alloc;
1114
      reg->name_table = t;
1115
      goto clear;
1116
    }
1117
    else if (t->num == t->alloc) {
1118
      int i;
1119
1120
      alloc = t->alloc * 2;
1121
      t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1122
      CHECK_NULL_RETURN_MEMERR(t->e);
1123
      t->alloc = alloc;
1124
1125
    clear:
1126
      for (i = t->num; i < t->alloc; i++) {
1127
        t->e[i].name       = NULL;
1128
        t->e[i].name_len   = 0;
1129
        t->e[i].back_num   = 0;
1130
        t->e[i].back_alloc = 0;
1131
        t->e[i].back_refs  = (int* )NULL;
1132
      }
1133
    }
1134
    e = &(t->e[t->num]);
1135
    t->num++;
1136
    e->name = onigenc_strdup(reg->enc, name, name_end);
1137
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1138
    e->name_len = name_end - name;
1139
#endif
1140
2.17k
  }
1141
1142
6.52k
  if (e->back_num >= 1 &&
1143
6.52k
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1144
2
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1145
2
                                   name, name_end);
1146
2
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
1147
2
  }
1148
1149
6.52k
  e->back_num++;
1150
6.52k
  if (e->back_num == 1) {
1151
2.17k
    e->back_ref1 = backref;
1152
2.17k
  }
1153
4.34k
  else {
1154
4.34k
    if (e->back_num == 2) {
1155
1.04k
      alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1156
1.04k
      e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1157
1.04k
      CHECK_NULL_RETURN_MEMERR(e->back_refs);
1158
1.04k
      e->back_alloc = alloc;
1159
1.04k
      e->back_refs[0] = e->back_ref1;
1160
1.04k
      e->back_refs[1] = backref;
1161
1.04k
    }
1162
3.30k
    else {
1163
3.30k
      if (e->back_num > e->back_alloc) {
1164
246
        alloc = e->back_alloc * 2;
1165
246
        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1166
246
        CHECK_NULL_RETURN_MEMERR(e->back_refs);
1167
246
        e->back_alloc = alloc;
1168
246
      }
1169
3.30k
      e->back_refs[e->back_num - 1] = backref;
1170
3.30k
    }
1171
4.34k
  }
1172
1173
6.52k
  return 0;
1174
6.52k
}
1175
1176
extern int
1177
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1178
                           const UChar* name_end, int** nums)
1179
88
{
1180
88
  NameEntry* e = name_find(reg, name, name_end);
1181
1182
88
  if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1183
1184
50
  switch (e->back_num) {
1185
0
  case 0:
1186
0
    break;
1187
46
  case 1:
1188
46
    *nums = &(e->back_ref1);
1189
46
    break;
1190
4
  default:
1191
4
    *nums = e->back_refs;
1192
4
    break;
1193
50
  }
1194
50
  return e->back_num;
1195
50
}
1196
1197
static int
1198
name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1199
                      int** nums)
1200
1.72k
{
1201
1.72k
  regex_t* reg;
1202
1.72k
  NameEntry* e;
1203
1204
1.72k
  reg = env->reg;
1205
1.72k
  e = name_find(reg, name, name_end);
1206
1207
1.72k
  if (IS_NULL(e)) {
1208
8
    onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1209
8
                                   (UChar* )name, (UChar* )name_end);
1210
8
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
1211
8
  }
1212
1213
1.71k
  switch (e->back_num) {
1214
0
  case 0:
1215
0
    break;
1216
352
  case 1:
1217
352
    *nums = &(e->back_ref1);
1218
352
    break;
1219
1.36k
  default:
1220
1.36k
    *nums = e->back_refs;
1221
1.36k
    break;
1222
1.71k
  }
1223
1.71k
  return e->back_num;
1224
1.71k
}
1225
1226
extern int
1227
onig_name_to_backref_number(regex_t* reg, const UChar* name,
1228
                            const UChar* name_end, OnigRegion *region)
1229
0
{
1230
0
  int i, n, *nums;
1231
1232
0
  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1233
0
  if (n < 0)
1234
0
    return n;
1235
0
  else if (n == 0)
1236
0
    return ONIGERR_PARSER_BUG;
1237
0
  else if (n == 1)
1238
0
    return nums[0];
1239
0
  else {
1240
0
    if (IS_NOT_NULL(region)) {
1241
0
      for (i = n - 1; i >= 0; i--) {
1242
0
        if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1243
0
          return nums[i];
1244
0
      }
1245
0
    }
1246
0
    return nums[n - 1];
1247
0
  }
1248
0
}
1249
1250
extern int
1251
onig_noname_group_capture_is_active(regex_t* reg)
1252
0
{
1253
0
  if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1254
0
    return 0;
1255
1256
0
  if (onig_number_of_names(reg) > 0 &&
1257
0
      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1258
0
      ! OPTON_CAPTURE_GROUP(reg->options)) {
1259
0
    return 0;
1260
0
  }
1261
1262
0
  return 1;
1263
0
}
1264
1265
#ifdef USE_CALLOUT
1266
1267
typedef struct {
1268
  OnigCalloutType type;
1269
  int             in;
1270
  OnigCalloutFunc start_func;
1271
  OnigCalloutFunc end_func;
1272
  int             arg_num;
1273
  int             opt_arg_num;
1274
  unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1275
  OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1276
  UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */
1277
} CalloutNameListEntry;
1278
1279
typedef struct {
1280
  int  n;
1281
  int  alloc;
1282
  CalloutNameListEntry* v;
1283
} CalloutNameListType;
1284
1285
static CalloutNameListType* GlobalCalloutNameList;
1286
1287
static int
1288
make_callout_func_list(CalloutNameListType** rs, int init_size)
1289
24.5k
{
1290
24.5k
  CalloutNameListType* s;
1291
24.5k
  CalloutNameListEntry* v;
1292
1293
24.5k
  *rs = 0;
1294
1295
24.5k
  s = xmalloc(sizeof(*s));
1296
24.5k
  if (IS_NULL(s)) return ONIGERR_MEMORY;
1297
1298
24.5k
  v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1299
24.5k
  if (IS_NULL(v)) {
1300
0
    xfree(s);
1301
0
    return ONIGERR_MEMORY;
1302
0
  }
1303
1304
24.5k
  s->n = 0;
1305
24.5k
  s->alloc = init_size;
1306
24.5k
  s->v = v;
1307
1308
24.5k
  *rs = s;
1309
24.5k
  return ONIG_NORMAL;
1310
24.5k
}
1311
1312
static void
1313
free_callout_func_list(CalloutNameListType* s)
1314
25.6k
{
1315
25.6k
  if (IS_NOT_NULL(s)) {
1316
24.5k
    if (IS_NOT_NULL(s->v)) {
1317
24.5k
      int i, j;
1318
1319
221k
      for (i = 0; i < s->n; i++) {
1320
196k
        CalloutNameListEntry* e = s->v + i;
1321
294k
        for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1322
98.2k
          if (e->arg_types[j] == ONIG_TYPE_STRING) {
1323
0
            UChar* p = e->opt_defaults[j].s.start;
1324
0
            if (IS_NOT_NULL(p)) xfree(p);
1325
0
          }
1326
98.2k
        }
1327
196k
      }
1328
24.5k
      xfree(s->v);
1329
24.5k
    }
1330
24.5k
    xfree(s);
1331
24.5k
  }
1332
25.6k
}
1333
1334
static int
1335
callout_func_list_add(CalloutNameListType* s, int* rid)
1336
196k
{
1337
196k
  if (s->n >= s->alloc) {
1338
0
    int new_size = s->alloc * 2;
1339
0
    CalloutNameListEntry* nv = (CalloutNameListEntry* )
1340
0
      xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1341
0
    if (IS_NULL(nv)) return ONIGERR_MEMORY;
1342
1343
0
    s->alloc = new_size;
1344
0
    s->v = nv;
1345
0
  }
1346
1347
196k
  *rid = s->n;
1348
1349
196k
  xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1350
196k
  s->n++;
1351
196k
  return ONIG_NORMAL;
1352
196k
}
1353
1354
1355
typedef struct {
1356
  UChar* name;
1357
  int    name_len;   /* byte length */
1358
  int    id;
1359
} CalloutNameEntry;
1360
1361
#ifdef USE_ST_LIBRARY
1362
typedef st_table  CalloutNameTable;
1363
#else
1364
typedef struct {
1365
  CalloutNameEntry* e;
1366
  int               num;
1367
  int               alloc;
1368
} CalloutNameTable;
1369
#endif
1370
1371
static CalloutNameTable* GlobalCalloutNameTable;
1372
static int CalloutNameIDCounter;
1373
1374
#ifdef USE_ST_LIBRARY
1375
1376
static int
1377
i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1378
                          void* arg ARG_UNUSED)
1379
171k
{
1380
171k
  if (IS_NOT_NULL(e)) {
1381
171k
    xfree(e->name);
1382
171k
  }
1383
  /*xfree(key->s); */ /* is same as e->name */
1384
171k
  xfree(key);
1385
171k
  xfree(e);
1386
171k
  return ST_DELETE;
1387
171k
}
1388
1389
static int
1390
callout_name_table_clear(CalloutNameTable* t)
1391
24.5k
{
1392
24.5k
  if (IS_NOT_NULL(t)) {
1393
24.5k
    onig_st_foreach(t, i_free_callout_name_entry, 0);
1394
24.5k
  }
1395
24.5k
  return 0;
1396
24.5k
}
1397
1398
static int
1399
global_callout_name_table_free(void)
1400
25.6k
{
1401
25.6k
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1402
24.5k
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1403
24.5k
    if (r != 0) return r;
1404
1405
24.5k
    onig_st_free_table(GlobalCalloutNameTable);
1406
24.5k
    GlobalCalloutNameTable = 0;
1407
24.5k
    CalloutNameIDCounter = 0;
1408
24.5k
  }
1409
1410
25.6k
  return 0;
1411
25.6k
}
1412
1413
static CalloutNameEntry*
1414
callout_name_find(OnigEncoding enc, int is_not_single,
1415
                  const UChar* name, const UChar* name_end)
1416
175k
{
1417
175k
  int r;
1418
175k
  CalloutNameEntry* e;
1419
175k
  CalloutNameTable* t = GlobalCalloutNameTable;
1420
1421
175k
  e = (CalloutNameEntry* )NULL;
1422
175k
  if (IS_NOT_NULL(t)) {
1423
150k
    r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1424
150k
                                          (HashDataType* )((void* )(&e)));
1425
150k
    if (r == 0) { /* not found */
1426
150k
      if (enc != ONIG_ENCODING_ASCII &&
1427
150k
          ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1428
3.10k
        enc = ONIG_ENCODING_ASCII;
1429
3.10k
        onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1430
3.10k
                                          (HashDataType* )((void* )(&e)));
1431
3.10k
      }
1432
150k
    }
1433
150k
  }
1434
175k
  return e;
1435
175k
}
1436
1437
#else
1438
1439
static int
1440
callout_name_table_clear(CalloutNameTable* t)
1441
{
1442
  int i;
1443
  CalloutNameEntry* e;
1444
1445
  if (IS_NOT_NULL(t)) {
1446
    for (i = 0; i < t->num; i++) {
1447
      e = &(t->e[i]);
1448
      if (IS_NOT_NULL(e->name)) {
1449
        xfree(e->name);
1450
        e->name     = NULL;
1451
        e->name_len = 0;
1452
        e->id       = 0;
1453
        e->func     = 0;
1454
      }
1455
    }
1456
    if (IS_NOT_NULL(t->e)) {
1457
      xfree(t->e);
1458
      t->e = NULL;
1459
    }
1460
    t->num = 0;
1461
  }
1462
  return 0;
1463
}
1464
1465
static int
1466
global_callout_name_table_free(void)
1467
{
1468
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1469
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1470
    if (r != 0) return r;
1471
1472
    xfree(GlobalCalloutNameTable);
1473
    GlobalCalloutNameTable = 0;
1474
    CalloutNameIDCounter = 0;
1475
  }
1476
  return 0;
1477
}
1478
1479
static CalloutNameEntry*
1480
callout_name_find(UChar* name, UChar* name_end)
1481
{
1482
  int i, len;
1483
  CalloutNameEntry* e;
1484
  CalloutNameTable* t = Calloutnames;
1485
1486
  if (IS_NOT_NULL(t)) {
1487
    len = name_end - name;
1488
    for (i = 0; i < t->num; i++) {
1489
      e = &(t->e[i]);
1490
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1491
        return e;
1492
    }
1493
  }
1494
  return (CalloutNameEntry* )NULL;
1495
}
1496
1497
#endif
1498
1499
/* name string must be single byte char string. */
1500
static int
1501
callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1502
                   int is_not_single, UChar* name, UChar* name_end)
1503
171k
{
1504
171k
  int r;
1505
171k
  CalloutNameEntry* e;
1506
171k
  CalloutNameTable* t = GlobalCalloutNameTable;
1507
1508
171k
  *rentry = 0;
1509
171k
  if (name_end - name <= 0)
1510
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1511
1512
171k
  e = callout_name_find(enc, is_not_single, name, name_end);
1513
171k
  if (IS_NULL(e)) {
1514
171k
#ifdef USE_ST_LIBRARY
1515
171k
    if (IS_NULL(t)) {
1516
24.5k
      t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1517
24.5k
      CHECK_NULL_RETURN_MEMERR(t);
1518
24.5k
      GlobalCalloutNameTable = t;
1519
24.5k
    }
1520
171k
    e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1521
171k
    CHECK_NULL_RETURN_MEMERR(e);
1522
1523
171k
    e->name = onigenc_strdup(enc, name, name_end);
1524
171k
    if (IS_NULL(e->name)) {
1525
0
      xfree(e);  return ONIGERR_MEMORY;
1526
0
    }
1527
1528
171k
    r = st_insert_callout_name_table(t, enc, is_not_single,
1529
171k
                                     e->name, (e->name + (name_end - name)),
1530
171k
                                     (HashDataType )e);
1531
171k
    if (r < 0) return r;
1532
1533
#else
1534
1535
    int alloc;
1536
1537
    if (IS_NULL(t)) {
1538
      alloc = INIT_NAMES_ALLOC_NUM;
1539
      t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1540
      CHECK_NULL_RETURN_MEMERR(t);
1541
      t->e     = NULL;
1542
      t->alloc = 0;
1543
      t->num   = 0;
1544
1545
      t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1546
      if (IS_NULL(t->e)) {
1547
        xfree(t);
1548
        return ONIGERR_MEMORY;
1549
      }
1550
      t->alloc = alloc;
1551
      GlobalCalloutNameTable = t;
1552
      goto clear;
1553
    }
1554
    else if (t->num == t->alloc) {
1555
      int i;
1556
1557
      alloc = t->alloc * 2;
1558
      t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1559
      CHECK_NULL_RETURN_MEMERR(t->e);
1560
      t->alloc = alloc;
1561
1562
    clear:
1563
      for (i = t->num; i < t->alloc; i++) {
1564
        t->e[i].name       = NULL;
1565
        t->e[i].name_len   = 0;
1566
        t->e[i].id         = 0;
1567
      }
1568
    }
1569
    e = &(t->e[t->num]);
1570
    t->num++;
1571
    e->name = onigenc_strdup(enc, name, name_end);
1572
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1573
#endif
1574
1575
171k
    CalloutNameIDCounter++;
1576
171k
    e->id = CalloutNameIDCounter;
1577
171k
    e->name_len = (int )(name_end - name);
1578
171k
  }
1579
1580
171k
  *rentry = e;
1581
171k
  return e->id;
1582
171k
}
1583
1584
static int
1585
is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1586
179k
{
1587
179k
  UChar* p;
1588
179k
  OnigCodePoint c;
1589
1590
179k
  if (name >= name_end) return 0;
1591
1592
179k
  p = name;
1593
1.16M
  while (p < name_end) {
1594
984k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1595
984k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1596
118
      return 0;
1597
1598
983k
    if (p == name) {
1599
179k
      if (c >= '0' && c <= '9') return 0;
1600
179k
    }
1601
1602
983k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1603
983k
  }
1604
1605
179k
  return 1;
1606
179k
}
1607
1608
static int
1609
is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1610
1.34k
{
1611
1.34k
  UChar* p;
1612
1.34k
  OnigCodePoint c;
1613
1614
1.34k
  if (name >= name_end) return 0;
1615
1616
1.29k
  p = name;
1617
4.06k
  while (p < name_end) {
1618
3.20k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1619
3.20k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1620
312
      return 0;
1621
1622
2.89k
    if (p == name) {
1623
1.02k
      if (c >= '0' && c <= '9') return 0;
1624
1.02k
    }
1625
1626
2.77k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1627
2.77k
  }
1628
1629
860
  return 1;
1630
1.29k
}
1631
1632
extern int
1633
onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1634
                         UChar* name, UChar* name_end, int in,
1635
                         OnigCalloutFunc start_func,
1636
                         OnigCalloutFunc end_func,
1637
                         int arg_num, unsigned int arg_types[],
1638
                         int opt_arg_num, OnigValue opt_defaults[])
1639
171k
{
1640
171k
  int r;
1641
171k
  int i;
1642
171k
  int j;
1643
171k
  int id;
1644
171k
  int is_not_single;
1645
171k
  CalloutNameEntry* e;
1646
171k
  CalloutNameListEntry* fe;
1647
1648
171k
  if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1649
0
    return ONIGERR_INVALID_ARGUMENT;
1650
1651
171k
  if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1652
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1653
1654
171k
  if (opt_arg_num < 0 || opt_arg_num > arg_num)
1655
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1656
1657
171k
  if (start_func == 0 && end_func == 0)
1658
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1659
1660
171k
  if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1661
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1662
1663
368k
  for (i = 0; i < arg_num; i++) {
1664
196k
    unsigned int t = arg_types[i];
1665
196k
    if (t == ONIG_TYPE_VOID)
1666
0
      return ONIGERR_INVALID_CALLOUT_ARG;
1667
196k
    else {
1668
196k
      if (i >= arg_num - opt_arg_num) {
1669
98.2k
        if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1670
98.2k
            t != ONIG_TYPE_TAG)
1671
0
          return ONIGERR_INVALID_CALLOUT_ARG;
1672
98.2k
      }
1673
98.2k
      else {
1674
98.2k
        if (t != ONIG_TYPE_LONG) {
1675
98.2k
          t = t & ~ONIG_TYPE_LONG;
1676
98.2k
          if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1677
0
            return ONIGERR_INVALID_CALLOUT_ARG;
1678
98.2k
        }
1679
98.2k
      }
1680
196k
    }
1681
196k
  }
1682
1683
171k
  if (! is_allowed_callout_name(enc, name, name_end)) {
1684
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1685
0
  }
1686
1687
171k
  is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1688
171k
  id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1689
171k
  if (id < 0) return id;
1690
1691
171k
  r = ONIG_NORMAL;
1692
171k
  if (IS_NULL(GlobalCalloutNameList)) {
1693
24.5k
    r = make_callout_func_list(&GlobalCalloutNameList, 10);
1694
24.5k
    if (r != ONIG_NORMAL) return r;
1695
24.5k
  }
1696
1697
368k
  while (id >= GlobalCalloutNameList->n) {
1698
196k
    int rid;
1699
196k
    r = callout_func_list_add(GlobalCalloutNameList, &rid);
1700
196k
    if (r != ONIG_NORMAL) return r;
1701
196k
  }
1702
1703
171k
  fe = GlobalCalloutNameList->v + id;
1704
171k
  fe->type         = callout_type;
1705
171k
  fe->in           = in;
1706
171k
  fe->start_func   = start_func;
1707
171k
  fe->end_func     = end_func;
1708
171k
  fe->arg_num      = arg_num;
1709
171k
  fe->opt_arg_num  = opt_arg_num;
1710
171k
  fe->name         = e->name;
1711
1712
368k
  for (i = 0; i < arg_num; i++) {
1713
196k
    fe->arg_types[i] = arg_types[i];
1714
196k
  }
1715
270k
  for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1716
98.2k
    if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1717
98.2k
    if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1718
0
      OnigValue* val;
1719
0
      UChar* ds;
1720
1721
0
      val = opt_defaults + j;
1722
0
      ds = onigenc_strdup(enc, val->s.start, val->s.end);
1723
0
      CHECK_NULL_RETURN_MEMERR(ds);
1724
1725
0
      fe->opt_defaults[i].s.start = ds;
1726
0
      fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start);
1727
0
    }
1728
98.2k
    else {
1729
98.2k
      fe->opt_defaults[i] = opt_defaults[j];
1730
98.2k
    }
1731
98.2k
  }
1732
1733
171k
  r = id;
1734
171k
  return r;
1735
171k
}
1736
1737
static int
1738
get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1739
                            UChar* name, UChar* name_end, int* rid)
1740
3.38k
{
1741
3.38k
  int r;
1742
3.38k
  CalloutNameEntry* e;
1743
1744
3.38k
  if (! is_allowed_callout_name(enc, name, name_end)) {
1745
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1746
0
  }
1747
1748
3.38k
  e = callout_name_find(enc, is_not_single, name, name_end);
1749
3.38k
  if (IS_NULL(e)) {
1750
234
    return ONIGERR_UNDEFINED_CALLOUT_NAME;
1751
234
  }
1752
1753
3.15k
  r = ONIG_NORMAL;
1754
3.15k
  *rid = e->id;
1755
1756
3.15k
  return r;
1757
3.38k
}
1758
1759
extern OnigCalloutFunc
1760
onig_get_callout_start_func(regex_t* reg, int callout_num)
1761
838k
{
1762
  /* If used for callouts of contents, return 0. */
1763
838k
  CalloutListEntry* e;
1764
1765
838k
  e = onig_reg_callout_list_at(reg, callout_num);
1766
838k
  CHECK_NULL_RETURN(e);
1767
838k
  return e->start_func;
1768
838k
}
1769
1770
extern const UChar*
1771
onig_get_callout_tag_start(regex_t* reg, int callout_num)
1772
0
{
1773
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1774
0
  CHECK_NULL_RETURN(e);
1775
0
  return e->tag_start;
1776
0
}
1777
1778
extern const UChar*
1779
onig_get_callout_tag_end(regex_t* reg, int callout_num)
1780
0
{
1781
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1782
0
  CHECK_NULL_RETURN(e);
1783
0
  return e->tag_end;
1784
0
}
1785
1786
1787
extern OnigCalloutType
1788
onig_get_callout_type_by_name_id(int name_id)
1789
2.72k
{
1790
2.72k
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1791
0
    return 0;
1792
1793
2.72k
  return GlobalCalloutNameList->v[name_id].type;
1794
2.72k
}
1795
1796
extern OnigCalloutFunc
1797
onig_get_callout_start_func_by_name_id(int name_id)
1798
2.72k
{
1799
2.72k
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1800
0
    return 0;
1801
1802
2.72k
  return GlobalCalloutNameList->v[name_id].start_func;
1803
2.72k
}
1804
1805
extern OnigCalloutFunc
1806
onig_get_callout_end_func_by_name_id(int name_id)
1807
2.72k
{
1808
2.72k
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1809
0
    return 0;
1810
1811
2.72k
  return GlobalCalloutNameList->v[name_id].end_func;
1812
2.72k
}
1813
1814
extern int
1815
onig_get_callout_in_by_name_id(int name_id)
1816
2.85k
{
1817
2.85k
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1818
0
    return 0;
1819
1820
2.85k
  return GlobalCalloutNameList->v[name_id].in;
1821
2.85k
}
1822
1823
static int
1824
get_callout_arg_num_by_name_id(int name_id)
1825
3.15k
{
1826
3.15k
  return GlobalCalloutNameList->v[name_id].arg_num;
1827
3.15k
}
1828
1829
static int
1830
get_callout_opt_arg_num_by_name_id(int name_id)
1831
2.85k
{
1832
2.85k
  return GlobalCalloutNameList->v[name_id].opt_arg_num;
1833
2.85k
}
1834
1835
static unsigned int
1836
get_callout_arg_type_by_name_id(int name_id, int index)
1837
6.37k
{
1838
6.37k
  return GlobalCalloutNameList->v[name_id].arg_types[index];
1839
6.37k
}
1840
1841
static OnigValue
1842
get_callout_opt_default_by_name_id(int name_id, int index)
1843
616
{
1844
616
  return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1845
616
}
1846
1847
extern UChar*
1848
onig_get_callout_name_by_name_id(int name_id)
1849
0
{
1850
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1851
0
    return 0;
1852
1853
0
  return GlobalCalloutNameList->v[name_id].name;
1854
0
}
1855
1856
extern int
1857
onig_global_callout_names_free(void)
1858
25.6k
{
1859
25.6k
  free_callout_func_list(GlobalCalloutNameList);
1860
25.6k
  GlobalCalloutNameList = 0;
1861
1862
25.6k
  global_callout_name_table_free();
1863
25.6k
  return ONIG_NORMAL;
1864
25.6k
}
1865
1866
1867
typedef st_table   CalloutTagTable;
1868
typedef intptr_t   CalloutTagVal;
1869
1870
426
#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0)
1871
1872
static int
1873
i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1874
426
{
1875
426
  int num;
1876
426
  RegexExt* ext = (RegexExt* )arg;
1877
1878
426
  num = (int )e - 1;
1879
426
  ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1880
426
  return ST_CONTINUE;
1881
426
}
1882
1883
static int
1884
setup_ext_callout_list_values(regex_t* reg)
1885
2.24k
{
1886
2.24k
  int i, j;
1887
2.24k
  RegexExt* ext;
1888
1889
2.24k
  ext = reg->extp;
1890
2.24k
  if (IS_NOT_NULL(ext->tag_table)) {
1891
404
    onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1892
404
                    (st_data_t )ext);
1893
404
  }
1894
1895
5.19k
  for (i = 0; i < ext->callout_num; i++) {
1896
2.96k
    CalloutListEntry* e = ext->callout_list + i;
1897
2.96k
    if (e->of == ONIG_CALLOUT_OF_NAME) {
1898
7.54k
      for (j = 0; j < e->u.arg.num; j++) {
1899
5.12k
        if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1900
198
          UChar* start;
1901
198
          UChar* end;
1902
198
          int num;
1903
198
          start = e->u.arg.vals[j].s.start;
1904
198
          end   = e->u.arg.vals[j].s.end;
1905
198
          num = onig_get_callout_num_by_tag(reg, start, end);
1906
198
          if (num < 0) return num;
1907
186
          e->u.arg.vals[j].tag = num;
1908
186
        }
1909
5.12k
      }
1910
2.43k
    }
1911
2.96k
  }
1912
1913
2.23k
  return ONIG_NORMAL;
1914
2.24k
}
1915
1916
extern int
1917
onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1918
0
{
1919
0
  RegexExt* ext = reg->extp;
1920
1921
0
  if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1922
0
  if (callout_num > ext->callout_num) return 0;
1923
1924
0
  return (ext->callout_list[callout_num].flag &
1925
0
          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1926
0
}
1927
1928
static int
1929
i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1930
462
{
1931
462
  xfree(key);
1932
462
  return ST_DELETE;
1933
462
}
1934
1935
static int
1936
callout_tag_table_clear(CalloutTagTable* t)
1937
430
{
1938
430
  if (IS_NOT_NULL(t)) {
1939
430
    onig_st_foreach(t, i_free_callout_tag_entry, 0);
1940
430
  }
1941
430
  return 0;
1942
430
}
1943
1944
extern int
1945
onig_callout_tag_table_free(void* table)
1946
430
{
1947
430
  CalloutTagTable* t = (CalloutTagTable* )table;
1948
1949
430
  if (IS_NOT_NULL(t)) {
1950
430
    int r = callout_tag_table_clear(t);
1951
430
    if (r != 0) return r;
1952
1953
430
    onig_st_free_table(t);
1954
430
  }
1955
1956
430
  return 0;
1957
430
}
1958
1959
extern int
1960
onig_get_callout_num_by_tag(regex_t* reg,
1961
                            const UChar* tag, const UChar* tag_end)
1962
198
{
1963
198
  int r;
1964
198
  RegexExt* ext;
1965
198
  CalloutTagVal e;
1966
1967
198
  ext = reg->extp;
1968
198
  if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1969
10
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1970
1971
188
  r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1972
188
                            (HashDataType* )((void* )(&e)));
1973
188
  if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1974
186
  return (int )e;
1975
188
}
1976
1977
static CalloutTagVal
1978
callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1979
476
{
1980
476
  CalloutTagVal e;
1981
1982
476
  e = -1;
1983
476
  if (IS_NOT_NULL(t)) {
1984
476
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1985
476
  }
1986
476
  return e;
1987
476
}
1988
1989
static int
1990
callout_tag_table_new(CalloutTagTable** rt)
1991
430
{
1992
430
  CalloutTagTable* t;
1993
1994
430
  *rt = 0;
1995
430
  t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1996
430
  CHECK_NULL_RETURN_MEMERR(t);
1997
1998
430
  *rt = t;
1999
430
  return ONIG_NORMAL;
2000
430
}
2001
2002
static int
2003
callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
2004
                      UChar* name_end, CalloutTagVal entry_val)
2005
476
{
2006
476
  int r;
2007
476
  CalloutTagVal val;
2008
2009
476
  if (name_end - name <= 0)
2010
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2011
2012
476
  val = callout_tag_find(t, name, name_end);
2013
476
  if (val >= 0) {
2014
14
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2015
14
                                   name, name_end);
2016
14
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
2017
14
  }
2018
2019
462
  r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2020
462
  if (r < 0) return r;
2021
2022
462
  return ONIG_NORMAL;
2023
462
}
2024
2025
static int
2026
ext_ensure_tag_table(regex_t* reg)
2027
476
{
2028
476
  int r;
2029
476
  RegexExt* ext;
2030
476
  CalloutTagTable* t;
2031
2032
476
  ext = onig_get_regex_ext(reg);
2033
476
  CHECK_NULL_RETURN_MEMERR(ext);
2034
2035
476
  if (IS_NULL(ext->tag_table)) {
2036
430
    r = callout_tag_table_new(&t);
2037
430
    if (r != ONIG_NORMAL) return r;
2038
2039
430
    ext->tag_table = t;
2040
430
  }
2041
2042
476
  return ONIG_NORMAL;
2043
476
}
2044
2045
static int
2046
callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2047
                  CalloutTagVal entry_val)
2048
476
{
2049
476
  int r;
2050
476
  RegexExt* ext;
2051
476
  CalloutListEntry* e;
2052
2053
476
  r = ext_ensure_tag_table(reg);
2054
476
  if (r != ONIG_NORMAL) return r;
2055
2056
476
  ext = onig_get_regex_ext(reg);
2057
476
  CHECK_NULL_RETURN_MEMERR(ext);
2058
476
  r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2059
2060
476
  e = onig_reg_callout_list_at(reg, (int )entry_val);
2061
476
  CHECK_NULL_RETURN_MEMERR(e);
2062
476
  e->tag_start = name;
2063
476
  e->tag_end   = name_end;
2064
2065
476
  return r;
2066
476
}
2067
2068
#endif /* USE_CALLOUT */
2069
2070
2071
844
#define INIT_PARSEENV_MEMENV_ALLOC_SIZE   16
2072
2073
static void
2074
scan_env_clear(ParseEnv* env)
2075
51.3k
{
2076
51.3k
  MEM_STATUS_CLEAR(env->cap_history);
2077
51.3k
  MEM_STATUS_CLEAR(env->backtrack_mem);
2078
51.3k
  MEM_STATUS_CLEAR(env->backrefed_mem);
2079
51.3k
  env->error      = (UChar* )NULL;
2080
51.3k
  env->error_end  = (UChar* )NULL;
2081
51.3k
  env->num_call   = 0;
2082
2083
51.3k
#ifdef USE_CALL
2084
51.3k
  env->unset_addr_list = NULL;
2085
51.3k
#endif
2086
2087
51.3k
  env->num_mem    = 0;
2088
51.3k
  env->num_named  = 0;
2089
51.3k
  env->mem_alloc  = 0;
2090
51.3k
  env->mem_env_dynamic = (MemEnv* )NULL;
2091
2092
51.3k
  xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2093
2094
51.3k
  env->parse_depth      = 0;
2095
#ifdef ONIG_DEBUG_PARSE
2096
  env->max_parse_depth  = 0;
2097
#endif
2098
51.3k
  env->backref_num      = 0;
2099
51.3k
  env->keep_num         = 0;
2100
51.3k
  env->id_num           = 0;
2101
51.3k
  env->save_alloc_num   = 0;
2102
51.3k
  env->saves            = 0;
2103
51.3k
  env->flags            = 0;
2104
51.3k
}
2105
2106
static int
2107
scan_env_add_mem_entry(ParseEnv* env)
2108
35.0k
{
2109
35.0k
  int i, need, alloc;
2110
35.0k
  MemEnv* p;
2111
2112
35.0k
  need = env->num_mem + 1;
2113
35.0k
  if (need > MaxCaptureNum && MaxCaptureNum != 0)
2114
0
    return ONIGERR_TOO_MANY_CAPTURES;
2115
2116
35.0k
  if (need >= PARSEENV_MEMENV_SIZE) {
2117
14.0k
    if (env->mem_alloc <= need) {
2118
1.73k
      if (IS_NULL(env->mem_env_dynamic)) {
2119
844
        alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2120
844
        p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2121
844
        CHECK_NULL_RETURN_MEMERR(p);
2122
844
        xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2123
844
      }
2124
886
      else {
2125
886
        alloc = env->mem_alloc * 2;
2126
886
        p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2127
886
        CHECK_NULL_RETURN_MEMERR(p);
2128
886
      }
2129
2130
29.1k
      for (i = env->num_mem + 1; i < alloc; i++) {
2131
27.3k
        p[i].mem_node = NULL_NODE;
2132
27.3k
        p[i].empty_repeat_node = NULL_NODE;
2133
27.3k
      }
2134
2135
1.73k
      env->mem_env_dynamic = p;
2136
1.73k
      env->mem_alloc = alloc;
2137
1.73k
    }
2138
14.0k
  }
2139
2140
35.0k
  env->num_mem++;
2141
35.0k
  return env->num_mem;
2142
35.0k
}
2143
2144
static int
2145
scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2146
36.2k
{
2147
36.2k
  if (env->num_mem >= num)
2148
36.2k
    PARSEENV_MEMENV(env)[num].mem_node = node;
2149
0
  else
2150
0
    return ONIGERR_PARSER_BUG;
2151
36.2k
  return 0;
2152
36.2k
}
2153
2154
static void
2155
node_free_body(Node* node)
2156
1.21M
{
2157
1.21M
  if (IS_NULL(node)) return ;
2158
2159
1.21M
  switch (ND_TYPE(node)) {
2160
310k
  case ND_STRING:
2161
310k
    if (STR_(node)->capacity != 0 &&
2162
310k
        IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2163
2.41k
      xfree(STR_(node)->s);
2164
2.41k
    }
2165
310k
    break;
2166
2167
263k
  case ND_LIST:
2168
293k
  case ND_ALT:
2169
293k
    onig_node_free(ND_CAR(node));
2170
293k
    node = ND_CDR(node);
2171
907k
    while (IS_NOT_NULL(node)) {
2172
614k
      Node* next = ND_CDR(node);
2173
614k
      onig_node_free(ND_CAR(node));
2174
614k
      xfree(node);
2175
614k
      node = next;
2176
614k
    }
2177
293k
    break;
2178
2179
323k
  case ND_CCLASS:
2180
323k
    {
2181
323k
      CClassNode* cc = CCLASS_(node);
2182
2183
323k
      if (cc->mbuf)
2184
220k
        bbuf_free(cc->mbuf);
2185
323k
    }
2186
323k
    break;
2187
2188
12.5k
  case ND_BACKREF:
2189
12.5k
    if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2190
348
      xfree(BACKREF_(node)->back_dynamic);
2191
12.5k
    break;
2192
2193
67.2k
  case ND_BAG:
2194
67.2k
    if (ND_BODY(node))
2195
64.6k
      onig_node_free(ND_BODY(node));
2196
2197
67.2k
    {
2198
67.2k
      BagNode* en = BAG_(node);
2199
67.2k
      if (en->type == BAG_IF_ELSE) {
2200
6.96k
        onig_node_free(en->te.Then);
2201
6.96k
        onig_node_free(en->te.Else);
2202
6.96k
      }
2203
67.2k
    }
2204
67.2k
    break;
2205
2206
87.8k
  case ND_QUANT:
2207
87.8k
    if (ND_BODY(node))
2208
79.8k
      onig_node_free(ND_BODY(node));
2209
87.8k
    break;
2210
2211
40.6k
  case ND_ANCHOR:
2212
40.6k
    if (ND_BODY(node))
2213
16.7k
      onig_node_free(ND_BODY(node));
2214
40.6k
    if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2215
9.87k
      onig_node_free(ANCHOR_(node)->lead_node);
2216
40.6k
    break;
2217
2218
40.9k
  case ND_CTYPE:
2219
49.7k
  case ND_CALL:
2220
81.6k
  case ND_GIMMICK:
2221
81.6k
    break;
2222
1.21M
  }
2223
1.21M
}
2224
2225
extern void
2226
onig_node_free(Node* node)
2227
1.26M
{
2228
1.26M
  if (IS_NULL(node)) return ;
2229
2230
#ifdef DEBUG_ND_FREE
2231
  fprintf(stderr, "onig_node_free: %p\n", node);
2232
#endif
2233
2234
1.21M
  node_free_body(node);
2235
1.21M
  xfree(node);
2236
1.21M
}
2237
2238
static void
2239
cons_node_free_alone(Node* node)
2240
918
{
2241
918
  ND_CAR(node) = 0;
2242
918
  ND_CDR(node) = 0;
2243
918
  onig_node_free(node);
2244
918
}
2245
2246
static Node*
2247
node_new(void)
2248
1.83M
{
2249
1.83M
  Node* node;
2250
2251
1.83M
  node = (Node* )xmalloc(sizeof(Node));
2252
1.83M
  CHECK_NULL_RETURN(node);
2253
1.83M
  xmemset(node, 0, sizeof(*node));
2254
2255
#ifdef DEBUG_ND_FREE
2256
  fprintf(stderr, "node_new: %p\n", node);
2257
#endif
2258
1.83M
  return node;
2259
1.83M
}
2260
2261
extern int
2262
onig_node_copy(Node** rcopy, Node* from)
2263
19.7k
{
2264
19.7k
  int r;
2265
19.7k
  Node* copy;
2266
2267
19.7k
  *rcopy = NULL_NODE;
2268
2269
19.7k
  switch (ND_TYPE(from)) {
2270
0
  case ND_LIST:
2271
0
  case ND_ALT:
2272
9.86k
  case ND_ANCHOR:
2273
    /* These node's link to other nodes are processed by caller. */
2274
9.86k
    break;
2275
4.24k
  case ND_STRING:
2276
9.76k
  case ND_CCLASS:
2277
9.87k
  case ND_CTYPE:
2278
    /* Fixed contents after copy. */
2279
9.87k
    break;
2280
0
  default:
2281
    /* Not supported yet. */
2282
0
    return ONIGERR_TYPE_BUG;
2283
0
    break;
2284
19.7k
  }
2285
2286
19.7k
  copy = node_new();
2287
19.7k
  CHECK_NULL_RETURN_MEMERR(copy);
2288
19.7k
  xmemcpy(copy, from, sizeof(*copy));
2289
2290
19.7k
  switch (ND_TYPE(copy)) {
2291
4.24k
  case ND_STRING:
2292
4.24k
    r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2293
4.24k
    if (r != 0) {
2294
0
    err:
2295
0
      onig_node_free(copy);
2296
0
      return r;
2297
0
    }
2298
4.24k
    break;
2299
2300
5.52k
  case ND_CCLASS:
2301
5.52k
    {
2302
5.52k
      CClassNode *fcc, *tcc;
2303
2304
5.52k
      fcc = CCLASS_(from);
2305
5.52k
      tcc = CCLASS_(copy);
2306
5.52k
      if (IS_NOT_NULL(fcc->mbuf)) {
2307
4.37k
        r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2308
4.37k
        if (r != 0) goto err;
2309
4.37k
      }
2310
5.52k
    }
2311
5.52k
    break;
2312
2313
9.96k
  default:
2314
9.96k
    break;
2315
19.7k
  }
2316
2317
19.7k
  *rcopy = copy;
2318
19.7k
  return ONIG_NORMAL;
2319
19.7k
}
2320
2321
2322
static void
2323
initialize_cclass(CClassNode* cc)
2324
319k
{
2325
319k
  BITSET_CLEAR(cc->bs);
2326
319k
  cc->flags = 0;
2327
319k
  cc->mbuf  = NULL;
2328
319k
}
2329
2330
static Node*
2331
node_new_cclass(void)
2332
318k
{
2333
318k
  Node* node = node_new();
2334
318k
  CHECK_NULL_RETURN(node);
2335
2336
318k
  ND_SET_TYPE(node, ND_CCLASS);
2337
318k
  initialize_cclass(CCLASS_(node));
2338
318k
  return node;
2339
318k
}
2340
2341
static Node*
2342
node_new_ctype(int type, int not, OnigOptionType options)
2343
40.8k
{
2344
40.8k
  Node* node = node_new();
2345
40.8k
  CHECK_NULL_RETURN(node);
2346
2347
40.8k
  ND_SET_TYPE(node, ND_CTYPE);
2348
40.8k
  CTYPE_(node)->ctype   = type;
2349
40.8k
  CTYPE_(node)->not     = not;
2350
40.8k
  CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2351
40.8k
  return node;
2352
40.8k
}
2353
2354
static Node*
2355
node_new_anychar(OnigOptionType options)
2356
33.2k
{
2357
33.2k
  Node* node;
2358
2359
33.2k
  node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2360
33.2k
  CHECK_NULL_RETURN(node);
2361
2362
33.2k
  if (OPTON_MULTILINE(options))
2363
21.5k
    ND_STATUS_ADD(node, MULTILINE);
2364
33.2k
  return node;
2365
33.2k
}
2366
2367
static int
2368
node_new_no_newline(Node** node, ParseEnv* env)
2369
370
{
2370
370
  Node* n;
2371
2372
370
  n = node_new_anychar(ONIG_OPTION_NONE);
2373
370
  CHECK_NULL_RETURN_MEMERR(n);
2374
370
  *node = n;
2375
370
  return 0;
2376
370
}
2377
2378
static int
2379
node_new_true_anychar(Node** node)
2380
15.1k
{
2381
15.1k
  Node* n;
2382
2383
15.1k
  n = node_new_anychar(ONIG_OPTION_MULTILINE);
2384
15.1k
  CHECK_NULL_RETURN_MEMERR(n);
2385
15.1k
  *node = n;
2386
15.1k
  return 0;
2387
15.1k
}
2388
2389
static Node*
2390
node_new_list(Node* left, Node* right)
2391
348k
{
2392
348k
  Node* node = node_new();
2393
348k
  CHECK_NULL_RETURN(node);
2394
2395
348k
  ND_SET_TYPE(node, ND_LIST);
2396
348k
  ND_CAR(node)  = left;
2397
348k
  ND_CDR(node) = right;
2398
348k
  return node;
2399
348k
}
2400
2401
extern Node*
2402
onig_node_new_list(Node* left, Node* right)
2403
123k
{
2404
123k
  return node_new_list(left, right);
2405
123k
}
2406
2407
extern Node*
2408
onig_node_new_alt(Node* left, Node* right)
2409
227k
{
2410
227k
  Node* node = node_new();
2411
227k
  CHECK_NULL_RETURN(node);
2412
2413
227k
  ND_SET_TYPE(node, ND_ALT);
2414
227k
  ND_CAR(node)  = left;
2415
227k
  ND_CDR(node) = right;
2416
227k
  return node;
2417
227k
}
2418
2419
static Node*
2420
make_list_or_alt(NodeType type, int n, Node* ns[])
2421
331k
{
2422
331k
  Node* r;
2423
2424
331k
  if (n <= 0) return NULL_NODE;
2425
2426
331k
  if (n == 1) {
2427
156k
    r = node_new();
2428
156k
    CHECK_NULL_RETURN(r);
2429
156k
    ND_SET_TYPE(r, type);
2430
156k
    ND_CAR(r) = ns[0];
2431
156k
    ND_CDR(r) = NULL_NODE;
2432
156k
  }
2433
174k
  else {
2434
174k
    Node* right;
2435
2436
174k
    r = node_new();
2437
174k
    CHECK_NULL_RETURN(r);
2438
2439
174k
    right = make_list_or_alt(type, n - 1, ns + 1);
2440
174k
    if (IS_NULL(right)) {
2441
0
      onig_node_free(r);
2442
0
      return NULL_NODE;
2443
0
    }
2444
2445
174k
    ND_SET_TYPE(r, type);
2446
174k
    ND_CAR(r) = ns[0];
2447
174k
    ND_CDR(r) = right;
2448
174k
  }
2449
2450
331k
  return r;
2451
331k
}
2452
2453
static Node*
2454
make_list(int n, Node* ns[])
2455
150k
{
2456
150k
  return make_list_or_alt(ND_LIST, n, ns);
2457
150k
}
2458
2459
static Node*
2460
make_alt(int n, Node* ns[])
2461
6.39k
{
2462
6.39k
  return make_list_or_alt(ND_ALT, n, ns);
2463
6.39k
}
2464
2465
static Node*
2466
node_new_anchor(int type)
2467
30.8k
{
2468
30.8k
  Node* node;
2469
2470
30.8k
  node = node_new();
2471
30.8k
  CHECK_NULL_RETURN(node);
2472
2473
30.8k
  ND_SET_TYPE(node, ND_ANCHOR);
2474
30.8k
  ANCHOR_(node)->type       = type;
2475
30.8k
  ANCHOR_(node)->char_min_len = 0;
2476
30.8k
  ANCHOR_(node)->char_max_len = INFINITE_LEN;
2477
30.8k
  ANCHOR_(node)->ascii_mode = 0;
2478
30.8k
  ANCHOR_(node)->lead_node  = NULL_NODE;
2479
30.8k
  return node;
2480
30.8k
}
2481
2482
static Node*
2483
node_new_anchor_with_options(int type, OnigOptionType options)
2484
23.8k
{
2485
23.8k
  int ascii_mode;
2486
23.8k
  Node* node;
2487
2488
23.8k
  node = node_new_anchor(type);
2489
23.8k
  CHECK_NULL_RETURN(node);
2490
2491
23.8k
  ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2492
23.8k
  ANCHOR_(node)->ascii_mode = ascii_mode;
2493
2494
23.8k
  if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2495
23.8k
      type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2496
8.24k
    if (OPTON_TEXT_SEGMENT_WORD(options))
2497
3.46k
      ND_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2498
8.24k
  }
2499
2500
23.8k
  return node;
2501
23.8k
}
2502
2503
static Node*
2504
node_new_backref(int back_num, int* backrefs, int by_name,
2505
#ifdef USE_BACKREF_WITH_LEVEL
2506
                 int exist_level, int nest_level,
2507
#endif
2508
                 ParseEnv* env)
2509
12.5k
{
2510
12.5k
  int i;
2511
12.5k
  Node* node;
2512
2513
12.5k
  node = node_new();
2514
12.5k
  CHECK_NULL_RETURN(node);
2515
2516
12.5k
  ND_SET_TYPE(node, ND_BACKREF);
2517
12.5k
  BACKREF_(node)->back_num = back_num;
2518
12.5k
  BACKREF_(node)->back_dynamic = (int* )NULL;
2519
12.5k
  if (by_name != 0)
2520
1.71k
    ND_STATUS_ADD(node, BY_NAME);
2521
2522
12.5k
  if (OPTON_IGNORECASE(env->options))
2523
7.12k
    ND_STATUS_ADD(node, IGNORECASE);
2524
2525
12.5k
#ifdef USE_BACKREF_WITH_LEVEL
2526
12.5k
  if (exist_level != 0) {
2527
940
    ND_STATUS_ADD(node, NEST_LEVEL);
2528
940
    BACKREF_(node)->nest_level  = nest_level;
2529
940
  }
2530
12.5k
#endif
2531
2532
28.7k
  for (i = 0; i < back_num; i++) {
2533
17.9k
    if (backrefs[i] <= env->num_mem &&
2534
17.9k
        IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2535
1.72k
      ND_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */
2536
1.72k
      break;
2537
1.72k
    }
2538
17.9k
  }
2539
2540
12.5k
  if (back_num <= ND_BACKREFS_SIZE) {
2541
25.8k
    for (i = 0; i < back_num; i++)
2542
13.6k
      BACKREF_(node)->back_static[i] = backrefs[i];
2543
12.2k
  }
2544
348
  else {
2545
348
    int* p = (int* )xmalloc(sizeof(int) * back_num);
2546
348
    if (IS_NULL(p)) {
2547
0
      onig_node_free(node);
2548
0
      return NULL;
2549
0
    }
2550
348
    BACKREF_(node)->back_dynamic = p;
2551
4.79k
    for (i = 0; i < back_num; i++)
2552
4.45k
      p[i] = backrefs[i];
2553
348
  }
2554
2555
12.5k
  env->backref_num++;
2556
12.5k
  return node;
2557
12.5k
}
2558
2559
static Node*
2560
node_new_backref_checker(int back_num, int* backrefs, int by_name,
2561
#ifdef USE_BACKREF_WITH_LEVEL
2562
                         int exist_level, int nest_level,
2563
#endif
2564
                         ParseEnv* env)
2565
1.16k
{
2566
1.16k
  Node* node;
2567
2568
1.16k
  node = node_new_backref(back_num, backrefs, by_name,
2569
1.16k
#ifdef USE_BACKREF_WITH_LEVEL
2570
1.16k
                          exist_level, nest_level,
2571
1.16k
#endif
2572
1.16k
                          env);
2573
1.16k
  CHECK_NULL_RETURN(node);
2574
2575
1.16k
  ND_STATUS_ADD(node, CHECKER);
2576
1.16k
  return node;
2577
1.16k
}
2578
2579
#ifdef USE_CALL
2580
static Node*
2581
node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2582
8.76k
{
2583
8.76k
  Node* node = node_new();
2584
8.76k
  CHECK_NULL_RETURN(node);
2585
2586
8.76k
  ND_SET_TYPE(node, ND_CALL);
2587
8.76k
  CALL_(node)->by_number   = by_number;
2588
8.76k
  CALL_(node)->name        = name;
2589
8.76k
  CALL_(node)->name_end    = name_end;
2590
8.76k
  CALL_(node)->called_gnum = gnum;
2591
8.76k
  CALL_(node)->entry_count = 1;
2592
8.76k
  return node;
2593
8.76k
}
2594
#endif
2595
2596
static Node*
2597
node_new_quantifier(int lower, int upper, int by_number)
2598
88.7k
{
2599
88.7k
  Node* node = node_new();
2600
88.7k
  CHECK_NULL_RETURN(node);
2601
2602
88.7k
  ND_SET_TYPE(node, ND_QUANT);
2603
88.7k
  QUANT_(node)->lower            = lower;
2604
88.7k
  QUANT_(node)->upper            = upper;
2605
88.7k
  QUANT_(node)->greedy           = 1;
2606
88.7k
  QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY;
2607
88.7k
  QUANT_(node)->head_exact       = NULL_NODE;
2608
88.7k
  QUANT_(node)->next_head_exact  = NULL_NODE;
2609
88.7k
  QUANT_(node)->include_referred = 0;
2610
88.7k
  QUANT_(node)->empty_status_mem = 0;
2611
88.7k
  if (by_number != 0)
2612
23.2k
    ND_STATUS_ADD(node, BY_NUMBER);
2613
2614
88.7k
  return node;
2615
88.7k
}
2616
2617
static Node*
2618
node_new_bag(enum BagType type)
2619
67.2k
{
2620
67.2k
  Node* node = node_new();
2621
67.2k
  CHECK_NULL_RETURN(node);
2622
2623
67.2k
  ND_SET_TYPE(node, ND_BAG);
2624
67.2k
  BAG_(node)->type = type;
2625
2626
67.2k
  switch (type) {
2627
37.1k
  case BAG_MEMORY:
2628
37.1k
    BAG_(node)->m.regnum       =  0;
2629
37.1k
    BAG_(node)->m.called_addr  = -1;
2630
37.1k
    BAG_(node)->m.entry_count  =  1;
2631
37.1k
    BAG_(node)->m.called_state =  0;
2632
37.1k
    break;
2633
2634
1.13k
  case BAG_OPTION:
2635
1.13k
    BAG_(node)->o.options =  0;
2636
1.13k
    break;
2637
2638
21.9k
  case BAG_STOP_BACKTRACK:
2639
21.9k
    break;
2640
2641
6.96k
  case BAG_IF_ELSE:
2642
6.96k
    BAG_(node)->te.Then = 0;
2643
6.96k
    BAG_(node)->te.Else = 0;
2644
6.96k
    break;
2645
67.2k
  }
2646
2647
67.2k
  BAG_(node)->opt_count = 0;
2648
67.2k
  return node;
2649
67.2k
}
2650
2651
extern Node*
2652
onig_node_new_bag(enum BagType type)
2653
6.86k
{
2654
6.86k
  return node_new_bag(type);
2655
6.86k
}
2656
2657
static Node*
2658
node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2659
6.96k
{
2660
6.96k
  Node* n;
2661
6.96k
  n = node_new_bag(BAG_IF_ELSE);
2662
6.96k
  CHECK_NULL_RETURN(n);
2663
2664
6.96k
  ND_BODY(n) = cond;
2665
6.96k
  BAG_(n)->te.Then = Then;
2666
6.96k
  BAG_(n)->te.Else = Else;
2667
6.96k
  return n;
2668
6.96k
}
2669
2670
static Node*
2671
node_new_memory(int is_named)
2672
37.1k
{
2673
37.1k
  Node* node = node_new_bag(BAG_MEMORY);
2674
37.1k
  CHECK_NULL_RETURN(node);
2675
37.1k
  if (is_named != 0)
2676
6.52k
    ND_STATUS_ADD(node, NAMED_GROUP);
2677
2678
37.1k
  return node;
2679
37.1k
}
2680
2681
static Node*
2682
node_new_option(OnigOptionType option)
2683
1.13k
{
2684
1.13k
  Node* node = node_new_bag(BAG_OPTION);
2685
1.13k
  CHECK_NULL_RETURN(node);
2686
1.13k
  BAG_(node)->o.options = option;
2687
1.13k
  return node;
2688
1.13k
}
2689
2690
static Node*
2691
node_new_group(Node* content)
2692
172
{
2693
172
  Node* node;
2694
2695
172
  node = node_new();
2696
172
  CHECK_NULL_RETURN(node);
2697
172
  ND_SET_TYPE(node, ND_LIST);
2698
172
  ND_CAR(node) = content;
2699
172
  ND_CDR(node) = NULL_NODE;
2700
2701
172
  return node;
2702
172
}
2703
2704
static Node*
2705
node_drop_group(Node* group)
2706
42
{
2707
42
  Node* content;
2708
2709
42
  content = ND_CAR(group);
2710
42
  ND_CAR(group) = NULL_NODE;
2711
42
  onig_node_free(group);
2712
42
  return content;
2713
42
}
2714
2715
static int
2716
node_set_fail(Node* node)
2717
7.22k
{
2718
7.22k
  ND_SET_TYPE(node, ND_GIMMICK);
2719
7.22k
  GIMMICK_(node)->type = GIMMICK_FAIL;
2720
7.22k
  return ONIG_NORMAL;
2721
7.22k
}
2722
2723
static int
2724
node_new_fail(Node** node, ParseEnv* env)
2725
6.39k
{
2726
6.39k
  *node = node_new();
2727
6.39k
  CHECK_NULL_RETURN_MEMERR(*node);
2728
2729
6.39k
  return node_set_fail(*node);
2730
6.39k
}
2731
2732
extern int
2733
onig_node_reset_fail(Node* node)
2734
834
{
2735
834
  node_free_body(node);
2736
834
  return node_set_fail(node);
2737
834
}
2738
2739
static int
2740
node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2741
10.8k
{
2742
10.8k
  int id;
2743
2744
10.8k
  ID_ENTRY(env, id);
2745
2746
10.8k
  *node = node_new();
2747
10.8k
  CHECK_NULL_RETURN_MEMERR(*node);
2748
2749
10.8k
  ND_SET_TYPE(*node, ND_GIMMICK);
2750
10.8k
  GIMMICK_(*node)->id   = id;
2751
10.8k
  GIMMICK_(*node)->type = GIMMICK_SAVE;
2752
10.8k
  GIMMICK_(*node)->detail_type = (int )save_type;
2753
2754
10.8k
  return ONIG_NORMAL;
2755
10.8k
}
2756
2757
static int
2758
node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2759
                            int id, ParseEnv* env)
2760
10.2k
{
2761
10.2k
  *node = node_new();
2762
10.2k
  CHECK_NULL_RETURN_MEMERR(*node);
2763
2764
10.2k
  ND_SET_TYPE(*node, ND_GIMMICK);
2765
10.2k
  GIMMICK_(*node)->id   = id;
2766
10.2k
  GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2767
10.2k
  GIMMICK_(*node)->detail_type = (int )update_var_type;
2768
2769
10.2k
  return ONIG_NORMAL;
2770
10.2k
}
2771
2772
static int
2773
node_new_keep(Node** node, ParseEnv* env)
2774
3.22k
{
2775
3.22k
  int r;
2776
2777
3.22k
  r = node_new_save_gimmick(node, SAVE_KEEP, env);
2778
3.22k
  if (r != 0) return r;
2779
2780
3.22k
  env->keep_num++;
2781
3.22k
  return ONIG_NORMAL;
2782
3.22k
}
2783
2784
#ifdef USE_CALLOUT
2785
2786
extern void
2787
onig_free_reg_callout_list(int n, CalloutListEntry* list)
2788
2.34k
{
2789
2.34k
  int i;
2790
2.34k
  int j;
2791
2792
2.34k
  if (IS_NULL(list)) return ;
2793
2794
5.88k
  for (i = 0; i < n; i++) {
2795
3.53k
    if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2796
7.47k
      for (j = 0; j < list[i].u.arg.passed_num; j++) {
2797
4.74k
        if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2798
1.19k
          if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2799
1.19k
            xfree(list[i].u.arg.vals[j].s.start);
2800
1.19k
        }
2801
4.74k
      }
2802
2.72k
    }
2803
812
    else { /* ONIG_CALLOUT_OF_CONTENTS */
2804
812
      if (IS_NOT_NULL(list[i].u.content.start)) {
2805
798
        xfree((void* )list[i].u.content.start);
2806
798
      }
2807
812
    }
2808
3.53k
  }
2809
2810
2.34k
  xfree(list);
2811
2.34k
}
2812
2813
extern CalloutListEntry*
2814
onig_reg_callout_list_at(regex_t* reg, int num)
2815
3.72M
{
2816
3.72M
  RegexExt* ext = reg->extp;
2817
3.72M
  CHECK_NULL_RETURN(ext);
2818
2819
3.72M
  if (num <= 0 || num > ext->callout_num)
2820
0
    return 0;
2821
2822
3.72M
  num--;
2823
3.72M
  return ext->callout_list + num;
2824
3.72M
}
2825
2826
static int
2827
reg_callout_list_entry(ParseEnv* env, int* rnum)
2828
3.53k
{
2829
4.68k
#define INIT_CALLOUT_LIST_NUM  3
2830
2831
3.53k
  int num;
2832
3.53k
  CalloutListEntry* list;
2833
3.53k
  CalloutListEntry* e;
2834
3.53k
  RegexExt* ext;
2835
2836
3.53k
  ext = onig_get_regex_ext(env->reg);
2837
3.53k
  CHECK_NULL_RETURN_MEMERR(ext);
2838
2839
3.53k
  if (IS_NULL(ext->callout_list)) {
2840
2.34k
    list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2841
2.34k
    CHECK_NULL_RETURN_MEMERR(list);
2842
2843
2.34k
    ext->callout_list = list;
2844
2.34k
    ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2845
2.34k
    ext->callout_num = 0;
2846
2.34k
  }
2847
2848
3.53k
  num = ext->callout_num + 1;
2849
3.53k
  if (num > ext->callout_list_alloc) {
2850
222
    int alloc = ext->callout_list_alloc * 2;
2851
222
    list = (CalloutListEntry* )xrealloc(ext->callout_list,
2852
222
                                        sizeof(CalloutListEntry) * alloc);
2853
222
    CHECK_NULL_RETURN_MEMERR(list);
2854
2855
222
    ext->callout_list       = list;
2856
222
    ext->callout_list_alloc = alloc;
2857
222
  }
2858
2859
3.53k
  e = ext->callout_list + (num - 1);
2860
2861
3.53k
  e->flag             = 0;
2862
3.53k
  e->of               = 0;
2863
3.53k
  e->in               = ONIG_CALLOUT_OF_CONTENTS;
2864
3.53k
  e->type             = 0;
2865
3.53k
  e->tag_start        = 0;
2866
3.53k
  e->tag_end          = 0;
2867
3.53k
  e->start_func       = 0;
2868
3.53k
  e->end_func         = 0;
2869
3.53k
  e->u.arg.num        = 0;
2870
3.53k
  e->u.arg.passed_num = 0;
2871
2872
3.53k
  ext->callout_num = num;
2873
3.53k
  *rnum = num;
2874
3.53k
  return ONIG_NORMAL;
2875
3.53k
}
2876
2877
static int
2878
node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2879
                 ParseEnv* env)
2880
3.52k
{
2881
3.52k
  *node = node_new();
2882
3.52k
  CHECK_NULL_RETURN_MEMERR(*node);
2883
2884
3.52k
  ND_SET_TYPE(*node, ND_GIMMICK);
2885
3.52k
  GIMMICK_(*node)->id          = id;
2886
3.52k
  GIMMICK_(*node)->num         = num;
2887
3.52k
  GIMMICK_(*node)->type        = GIMMICK_CALLOUT;
2888
3.52k
  GIMMICK_(*node)->detail_type = (int )callout_of;
2889
2890
3.52k
  return ONIG_NORMAL;
2891
3.52k
}
2892
#endif
2893
2894
static int
2895
make_text_segment(Node** node, ParseEnv* env)
2896
5.99k
{
2897
5.99k
  int r;
2898
5.99k
  int i;
2899
5.99k
  Node* x;
2900
5.99k
  Node* ns[2];
2901
2902
  /* \X == (?>\O(?:\Y\O)*) */
2903
2904
5.99k
  ns[1] = NULL_NODE;
2905
2906
5.99k
  r = ONIGERR_MEMORY;
2907
5.99k
  ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2908
5.99k
  if (IS_NULL(ns[0])) goto err;
2909
2910
5.99k
  r = node_new_true_anychar(&ns[1]);
2911
5.99k
  if (r != 0) goto err1;
2912
2913
5.99k
  x = make_list(2, ns);
2914
5.99k
  if (IS_NULL(x)) goto err;
2915
5.99k
  ns[0] = x;
2916
5.99k
  ns[1] = NULL_NODE;
2917
2918
5.99k
  x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2919
5.99k
  if (IS_NULL(x)) goto err;
2920
2921
5.99k
  ND_BODY(x) = ns[0];
2922
5.99k
  ns[0] = NULL_NODE;
2923
5.99k
  ns[1] = x;
2924
2925
5.99k
  r = node_new_true_anychar(&ns[0]);
2926
5.99k
  if (r != 0) goto err1;
2927
2928
5.99k
  x = make_list(2, ns);
2929
5.99k
  if (IS_NULL(x)) goto err;
2930
2931
5.99k
  ns[0] = x;
2932
5.99k
  ns[1] = NULL_NODE;
2933
2934
5.99k
  x = node_new_bag(BAG_STOP_BACKTRACK);
2935
5.99k
  if (IS_NULL(x)) goto err;
2936
2937
5.99k
  ND_BODY(x) = ns[0];
2938
2939
5.99k
  *node = x;
2940
5.99k
  return ONIG_NORMAL;
2941
2942
0
 err:
2943
0
  r = ONIGERR_MEMORY;
2944
0
 err1:
2945
0
  for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2946
0
  return r;
2947
0
}
2948
2949
static int
2950
make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2951
                   Node* step_one, int lower, int upper, int possessive,
2952
                   int is_range_cutter, ParseEnv* env)
2953
2.54k
{
2954
2.54k
  int r;
2955
2.54k
  int i;
2956
2.54k
  int id;
2957
2.54k
  Node* x;
2958
2.54k
  Node* ns[4];
2959
2960
12.7k
  for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2961
2962
2.54k
  ns[1] = absent;
2963
2.54k
  ns[3] = step_one; /* for err */
2964
2.54k
  r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2965
2.54k
  if (r != 0) goto err;
2966
2967
2.54k
  id = GIMMICK_(ns[0])->id;
2968
2.54k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2969
2.54k
                                  id, env);
2970
2.54k
  if (r != 0) goto err;
2971
2972
2.54k
  if (is_range_cutter != 0)
2973
240
    ND_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2974
2975
2.54k
  r = node_new_fail(&ns[3], env);
2976
2.54k
  if (r != 0) goto err;
2977
2978
2.54k
  x = make_list(4, ns);
2979
2.54k
  if (IS_NULL(x)) goto err0;
2980
2981
2.54k
  ns[0] = x;
2982
2.54k
  ns[1] = step_one;
2983
2.54k
  ns[2] = ns[3] = NULL_NODE;
2984
2985
2.54k
  x = make_alt(2, ns);
2986
2.54k
  if (IS_NULL(x)) goto err0;
2987
2988
2.54k
  ns[0] = x;
2989
2990
2.54k
  x = node_new_quantifier(lower, upper, FALSE);
2991
2.54k
  if (IS_NULL(x)) goto err0;
2992
2993
2.54k
  ND_BODY(x) = ns[0];
2994
2.54k
  ns[0] = x;
2995
2996
2.54k
  if (possessive != 0) {
2997
1.27k
    x = node_new_bag(BAG_STOP_BACKTRACK);
2998
1.27k
    if (IS_NULL(x)) goto err0;
2999
3000
1.27k
    ND_BODY(x) = ns[0];
3001
1.27k
    ns[0] = x;
3002
1.27k
  }
3003
3004
2.54k
  r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3005
2.54k
                                  pre_save_right_id, env);
3006
2.54k
  if (r != 0) goto err;
3007
3008
2.54k
  r = node_new_fail(&ns[2], env);
3009
2.54k
  if (r != 0) goto err;
3010
3011
2.54k
  x = make_list(2, ns + 1);
3012
2.54k
  if (IS_NULL(x)) goto err0;
3013
3014
2.54k
  ns[1] = x; ns[2] = NULL_NODE;
3015
3016
2.54k
  x = make_alt(2, ns);
3017
2.54k
  if (IS_NULL(x)) goto err0;
3018
3019
2.54k
  if (is_range_cutter != FALSE)
3020
240
    ND_STATUS_ADD(x, SUPER);
3021
3022
2.54k
  *node = x;
3023
2.54k
  return ONIG_NORMAL;
3024
3025
0
 err0:
3026
0
  r = ONIGERR_MEMORY;
3027
0
 err:
3028
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3029
0
  return r;
3030
0
}
3031
3032
static int
3033
make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3034
                 ParseEnv* env)
3035
1.00k
{
3036
1.00k
  int r;
3037
1.00k
  int id;
3038
1.00k
  Node* save;
3039
1.00k
  Node* x;
3040
1.00k
  Node* ns[2];
3041
3042
1.00k
  *node1 = *node2 = NULL_NODE;
3043
1.00k
  save = ns[0] = ns[1] = NULL_NODE;
3044
3045
1.00k
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3046
1.00k
  if (r != 0) goto err;
3047
3048
1.00k
  id = GIMMICK_(save)->id;
3049
1.00k
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3050
1.00k
                                  id, env);
3051
1.00k
  if (r != 0) goto err;
3052
3053
1.00k
  r = node_new_fail(&ns[1], env);
3054
1.00k
  if (r != 0) goto err;
3055
3056
1.00k
  x = make_list(2, ns);
3057
1.00k
  if (IS_NULL(x)) goto err0;
3058
3059
1.00k
  ns[0] = NULL_NODE; ns[1] = x;
3060
3061
1.00k
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3062
1.00k
                                  pre_save_right_id, env);
3063
1.00k
  if (r != 0) goto err;
3064
3065
1.00k
  x = make_alt(2, ns);
3066
1.00k
  if (IS_NULL(x)) goto err0;
3067
3068
1.00k
  *node1 = save;
3069
1.00k
  *node2 = x;
3070
1.00k
  return ONIG_NORMAL;
3071
3072
0
 err0:
3073
0
  r = ONIGERR_MEMORY;
3074
0
 err:
3075
0
  onig_node_free(save);
3076
0
  onig_node_free(ns[0]);
3077
0
  onig_node_free(ns[1]);
3078
0
  return r;
3079
0
}
3080
3081
static int
3082
make_range_clear(Node** node, ParseEnv* env)
3083
312
{
3084
312
  int r;
3085
312
  int id;
3086
312
  Node* save;
3087
312
  Node* x;
3088
312
  Node* ns[2];
3089
3090
312
  *node = NULL_NODE;
3091
312
  save = ns[0] = ns[1] = NULL_NODE;
3092
3093
312
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3094
312
  if (r != 0) goto err;
3095
3096
312
  id = GIMMICK_(save)->id;
3097
312
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3098
312
                                  id, env);
3099
312
  if (r != 0) goto err;
3100
3101
312
  r = node_new_fail(&ns[1], env);
3102
312
  if (r != 0) goto err;
3103
3104
312
  x = make_list(2, ns);
3105
312
  if (IS_NULL(x)) goto err0;
3106
3107
312
  ns[0] = NULL_NODE; ns[1] = x;
3108
3109
312
#define ID_NOT_USED_DONT_CARE_ME   0
3110
3111
312
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3112
312
                                  ID_NOT_USED_DONT_CARE_ME, env);
3113
312
  if (r != 0) goto err;
3114
312
  ND_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3115
3116
312
  x = make_alt(2, ns);
3117
312
  if (IS_NULL(x)) goto err0;
3118
3119
312
  ND_STATUS_ADD(x, SUPER);
3120
3121
312
  ns[0] = save;
3122
312
  ns[1] = x;
3123
312
  save = NULL_NODE;
3124
312
  x = make_list(2, ns);
3125
312
  if (IS_NULL(x)) goto err0;
3126
3127
312
  *node = x;
3128
312
  return ONIG_NORMAL;
3129
3130
0
 err0:
3131
0
  r = ONIGERR_MEMORY;
3132
0
 err:
3133
0
  onig_node_free(save);
3134
0
  onig_node_free(ns[0]);
3135
0
  onig_node_free(ns[1]);
3136
0
  return r;
3137
0
}
3138
3139
static int
3140
is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3141
                          int* is_possessive, ParseEnv* env)
3142
1.14k
{
3143
1.14k
  Node* quant;
3144
1.14k
  Node* body;
3145
3146
1.14k
  *rquant = *rbody = 0;
3147
1.14k
  *is_possessive = 0;
3148
3149
1.14k
  if (ND_TYPE(node) == ND_QUANT) {
3150
304
    quant = node;
3151
304
  }
3152
836
  else {
3153
836
    if (ND_TYPE(node) == ND_BAG) {
3154
138
      BagNode* en = BAG_(node);
3155
138
      if (en->type == BAG_STOP_BACKTRACK) {
3156
108
        *is_possessive = 1;
3157
108
        quant = ND_BAG_BODY(en);
3158
108
        if (ND_TYPE(quant) != ND_QUANT)
3159
62
          return 0;
3160
108
      }
3161
30
      else
3162
30
        return 0;
3163
138
    }
3164
698
    else
3165
698
      return 0;
3166
836
  }
3167
3168
350
  if (QUANT_(quant)->greedy == 0)
3169
30
    return 0;
3170
3171
320
  body = ND_BODY(quant);
3172
320
  switch (ND_TYPE(body)) {
3173
152
  case ND_STRING:
3174
152
    {
3175
152
      int len;
3176
152
      StrNode* sn = STR_(body);
3177
152
      UChar *s = sn->s;
3178
3179
152
      len = 0;
3180
446
      while (s < sn->end) {
3181
294
        s += enclen(env->enc, s);
3182
294
        len++;
3183
294
      }
3184
152
      if (len != 1)
3185
74
        return 0;
3186
152
    }
3187
3188
140
  case ND_CCLASS:
3189
140
    break;
3190
3191
106
  default:
3192
106
    return 0;
3193
0
    break;
3194
320
  }
3195
3196
140
  if (node != quant) {
3197
34
    ND_BODY(node) = 0;
3198
34
    onig_node_free(node);
3199
34
  }
3200
140
  ND_BODY(quant) = NULL_NODE;
3201
140
  *rquant = quant;
3202
140
  *rbody  = body;
3203
140
  return 1;
3204
320
}
3205
3206
static int
3207
make_absent_tree_for_simple_one_char_repeat(Node** node,
3208
  Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3209
1.30k
{
3210
1.30k
  int r;
3211
1.30k
  int i;
3212
1.30k
  int id1;
3213
1.30k
  int lower, upper;
3214
1.30k
  Node* x;
3215
1.30k
  Node* ns[4];
3216
3217
1.30k
  *node = NULL_NODE;
3218
1.30k
  r = ONIGERR_MEMORY;
3219
1.30k
  ns[0] = ns[1] = NULL_NODE;
3220
1.30k
  ns[2] = body, ns[3] = absent;
3221
3222
1.30k
  lower = QUANT_(quant)->lower;
3223
1.30k
  upper = QUANT_(quant)->upper;
3224
3225
1.30k
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3226
1.30k
  if (r != 0) goto err;
3227
3228
1.30k
  id1 = GIMMICK_(ns[0])->id;
3229
3230
1.30k
  r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3231
1.30k
                         FALSE, env);
3232
1.30k
  if (r != 0) goto err;
3233
3234
1.30k
  ns[2] = ns[3] = NULL_NODE;
3235
3236
1.30k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3237
1.30k
                                  id1, env);
3238
1.30k
  if (r != 0) goto err;
3239
3240
1.30k
  x = make_list(3, ns);
3241
1.30k
  if (IS_NULL(x)) goto err0;
3242
3243
1.30k
  *node = x;
3244
1.30k
  return ONIG_NORMAL;
3245
3246
0
 err0:
3247
0
  r = ONIGERR_MEMORY;
3248
0
 err:
3249
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3250
0
  return r;
3251
0
}
3252
3253
static int
3254
make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3255
                 ParseEnv* env)
3256
2.54k
{
3257
2.54k
  int r;
3258
2.54k
  int i;
3259
2.54k
  int id1, id2;
3260
2.54k
  int possessive;
3261
2.54k
  Node* x;
3262
2.54k
  Node* ns[7];
3263
3264
2.54k
  r = ONIGERR_MEMORY;
3265
20.3k
  for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3266
2.54k
  ns[4] = expr; ns[5] = absent;
3267
3268
2.54k
  if (is_range_cutter == 0) {
3269
2.30k
    Node* quant;
3270
2.30k
    Node* body;
3271
3272
2.30k
    if (expr == NULL_NODE) {
3273
      /* default expr \O* */
3274
1.16k
      quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3275
1.16k
      if (IS_NULL(quant)) goto err0;
3276
3277
1.16k
      r = node_new_true_anychar(&body);
3278
1.16k
      if (r != 0) {
3279
0
        onig_node_free(quant);
3280
0
        goto err;
3281
0
      }
3282
1.16k
      possessive = 0;
3283
1.16k
      goto simple;
3284
1.16k
    }
3285
1.14k
    else {
3286
1.14k
      if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3287
1.30k
      simple:
3288
1.30k
        r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3289
1.30k
                                                        body, possessive, env);
3290
1.30k
        onig_node_free(quant);
3291
1.30k
        if (r != 0) {
3292
0
          ns[4] = NULL_NODE;
3293
0
          onig_node_free(body);
3294
0
          goto err;
3295
0
        }
3296
3297
1.30k
        return ONIG_NORMAL;
3298
1.30k
      }
3299
1.14k
    }
3300
2.30k
  }
3301
3302
1.24k
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3303
1.24k
  if (r != 0) goto err;
3304
3305
1.24k
  id1 = GIMMICK_(ns[0])->id;
3306
3307
1.24k
  r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3308
1.24k
  if (r != 0) goto err;
3309
3310
1.24k
  id2 = GIMMICK_(ns[1])->id;
3311
3312
1.24k
  r = node_new_true_anychar(&ns[3]);
3313
1.24k
  if (r != 0) goto err;
3314
3315
1.24k
  possessive = 1;
3316
1.24k
  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3317
1.24k
                         possessive, is_range_cutter, env);
3318
1.24k
  if (r != 0) goto err;
3319
3320
1.24k
  ns[3] = NULL_NODE;
3321
1.24k
  ns[5] = NULL_NODE;
3322
3323
1.24k
  r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3324
1.24k
  if (r != 0) goto err;
3325
3326
1.24k
  if (is_range_cutter != 0) {
3327
240
    x = make_list(4, ns);
3328
240
    if (IS_NULL(x)) goto err0;
3329
240
  }
3330
1.00k
  else {
3331
1.00k
    r = make_absent_tail(&ns[5], &ns[6], id1, env);
3332
1.00k
    if (r != 0) goto err;
3333
3334
1.00k
    x = make_list(7, ns);
3335
1.00k
    if (IS_NULL(x)) goto err0;
3336
1.00k
  }
3337
3338
1.24k
  *node = x;
3339
1.24k
  return ONIG_NORMAL;
3340
3341
0
 err0:
3342
0
  r = ONIGERR_MEMORY;
3343
0
 err:
3344
0
  for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3345
0
  return r;
3346
0
}
3347
3348
extern int
3349
onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3350
723k
{
3351
723k
  int addlen = (int )(end - s);
3352
3353
723k
  if (addlen > 0) {
3354
637k
    int len  = (int )(STR_(node)->end - STR_(node)->s);
3355
3356
637k
    if (STR_(node)->capacity > 0 || (len + addlen > ND_STRING_BUF_SIZE - 1)) {
3357
62.9k
      UChar* p;
3358
62.9k
      int capa = len + addlen + ND_STRING_MARGIN;
3359
3360
62.9k
      if (capa <= STR_(node)->capacity) {
3361
0
        onig_strcpy(STR_(node)->s + len, s, end);
3362
0
      }
3363
62.9k
      else {
3364
62.9k
        if (STR_(node)->s == STR_(node)->buf)
3365
2.41k
          p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3366
2.41k
                                      s, end, capa);
3367
60.5k
        else
3368
60.5k
          p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3369
3370
62.9k
        CHECK_NULL_RETURN_MEMERR(p);
3371
62.9k
        STR_(node)->s        = p;
3372
62.9k
        STR_(node)->capacity = capa;
3373
62.9k
      }
3374
62.9k
    }
3375
574k
    else {
3376
574k
      onig_strcpy(STR_(node)->s + len, s, end);
3377
574k
    }
3378
637k
    STR_(node)->end = STR_(node)->s + len + addlen;
3379
637k
  }
3380
3381
723k
  return 0;
3382
723k
}
3383
3384
extern int
3385
onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3386
4.24k
{
3387
4.24k
  onig_node_str_clear(node, need_free);
3388
4.24k
  return onig_node_str_cat(node, s, end);
3389
4.24k
}
3390
3391
static int
3392
node_str_cat_char(Node* node, UChar c)
3393
142
{
3394
142
  UChar s[1];
3395
3396
142
  s[0] = c;
3397
142
  return onig_node_str_cat(node, s, s + 1);
3398
142
}
3399
3400
extern void
3401
onig_node_str_clear(Node* node, int need_free)
3402
4.24k
{
3403
4.24k
  if (need_free != 0 &&
3404
4.24k
      STR_(node)->capacity != 0 &&
3405
4.24k
      IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3406
0
    xfree(STR_(node)->s);
3407
0
  }
3408
3409
4.24k
  STR_(node)->flag     = 0;
3410
4.24k
  STR_(node)->s        = STR_(node)->buf;
3411
4.24k
  STR_(node)->end      = STR_(node)->buf;
3412
4.24k
  STR_(node)->capacity = 0;
3413
4.24k
}
3414
3415
static int
3416
node_set_str(Node* node, const UChar* s, const UChar* end)
3417
305k
{
3418
305k
  int r;
3419
3420
305k
  ND_SET_TYPE(node, ND_STRING);
3421
305k
  STR_(node)->flag     = 0;
3422
305k
  STR_(node)->s        = STR_(node)->buf;
3423
305k
  STR_(node)->end      = STR_(node)->buf;
3424
305k
  STR_(node)->capacity = 0;
3425
3426
305k
  r = onig_node_str_cat(node, s, end);
3427
305k
  return r;
3428
305k
}
3429
3430
static Node*
3431
node_new_str(const UChar* s, const UChar* end)
3432
305k
{
3433
305k
  int r;
3434
305k
  Node* node = node_new();
3435
305k
  CHECK_NULL_RETURN(node);
3436
3437
305k
  r = node_set_str(node, s, end);
3438
305k
  if (r != 0) {
3439
0
    onig_node_free(node);
3440
0
    return NULL;
3441
0
  }
3442
3443
305k
  return node;
3444
305k
}
3445
3446
static int
3447
node_reset_str(Node* node, const UChar* s, const UChar* end)
3448
522
{
3449
522
  node_free_body(node);
3450
522
  return node_set_str(node, s, end);
3451
522
}
3452
3453
extern int
3454
onig_node_reset_empty(Node* node)
3455
522
{
3456
522
  return node_reset_str(node, NULL, NULL);
3457
522
}
3458
3459
extern Node*
3460
onig_node_new_str(const UChar* s, const UChar* end)
3461
114k
{
3462
114k
  return node_new_str(s, end);
3463
114k
}
3464
3465
static Node*
3466
node_new_str_with_options(const UChar* s, const UChar* end,
3467
                          OnigOptionType options)
3468
112k
{
3469
112k
  Node* node;
3470
112k
  node = node_new_str(s, end);
3471
3472
112k
  if (OPTON_IGNORECASE(options))
3473
64.4k
    ND_STATUS_ADD(node, IGNORECASE);
3474
3475
112k
  return node;
3476
112k
}
3477
3478
static Node*
3479
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3480
5.61k
{
3481
5.61k
  Node* node = node_new_str_with_options(s, end, options);
3482
5.61k
  CHECK_NULL_RETURN(node);
3483
5.61k
  ND_STRING_SET_CRUDE(node);
3484
5.61k
  return node;
3485
5.61k
}
3486
3487
static Node*
3488
node_new_empty(void)
3489
30.7k
{
3490
30.7k
  return node_new_str(NULL, NULL);
3491
30.7k
}
3492
3493
static Node*
3494
node_new_str_crude_char(UChar c, OnigOptionType options)
3495
1.98k
{
3496
1.98k
  int i;
3497
1.98k
  UChar p[1];
3498
1.98k
  Node* node;
3499
3500
1.98k
  p[0] = c;
3501
1.98k
  node = node_new_str_crude(p, p + 1, options);
3502
1.98k
  CHECK_NULL_RETURN(node);
3503
3504
  /* clear buf tail */
3505
47.5k
  for (i = 1; i < ND_STRING_BUF_SIZE; i++)
3506
45.5k
    STR_(node)->buf[i] = '\0';
3507
3508
1.98k
  return node;
3509
1.98k
}
3510
3511
static Node*
3512
str_node_split_last_char(Node* node, OnigEncoding enc)
3513
11.6k
{
3514
11.6k
  const UChar *p;
3515
11.6k
  Node* rn;
3516
11.6k
  StrNode* sn;
3517
3518
11.6k
  sn = STR_(node);
3519
11.6k
  rn = NULL_NODE;
3520
11.6k
  if (sn->end > sn->s) {
3521
11.6k
    p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3522
11.6k
    if (p && p > sn->s) { /* can be split. */
3523
11.5k
      rn = node_new_str(p, sn->end);
3524
11.5k
      CHECK_NULL_RETURN(rn);
3525
3526
11.5k
      sn->end = (UChar* )p;
3527
11.5k
      STR_(rn)->flag = sn->flag;
3528
11.5k
      ND_STATUS(rn) = ND_STATUS(node);
3529
11.5k
    }
3530
11.6k
  }
3531
3532
11.6k
  return rn;
3533
11.6k
}
3534
3535
static int
3536
str_node_can_be_split(Node* node, OnigEncoding enc)
3537
30.6k
{
3538
30.6k
  StrNode* sn = STR_(node);
3539
30.6k
  if (sn->end > sn->s) {
3540
30.5k
    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
3541
30.5k
  }
3542
140
  return 0;
3543
30.6k
}
3544
3545
static int
3546
scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3547
51.1k
{
3548
51.1k
  int num, val;
3549
51.1k
  OnigCodePoint c;
3550
51.1k
  UChar* p;
3551
51.1k
  PFETCH_READY;
3552
3553
51.1k
  p = *src;
3554
51.1k
  num = 0;
3555
127k
  while (! PEND) {
3556
114k
    PFETCH(c);
3557
114k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
3558
76.1k
      val = (int )DIGITVAL(c);
3559
76.1k
      if ((ONIG_INT_MAX - val) / 10 < num)
3560
132
        return -1;  /* overflow */
3561
3562
76.0k
      num = num * 10 + val;
3563
76.0k
    }
3564
38.7k
    else {
3565
38.7k
      PUNFETCH;
3566
38.7k
      break;
3567
38.7k
    }
3568
114k
  }
3569
51.0k
  *src = p;
3570
51.0k
  return num;
3571
51.1k
}
3572
3573
static int
3574
scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3575
                        OnigEncoding enc, OnigCodePoint* rcode)
3576
13.7k
{
3577
13.7k
  OnigCodePoint code;
3578
13.7k
  OnigCodePoint c;
3579
13.7k
  unsigned int val;
3580
13.7k
  int n;
3581
13.7k
  UChar* p;
3582
13.7k
  PFETCH_READY;
3583
3584
13.7k
  p = *src;
3585
13.7k
  code = 0;
3586
13.7k
  n = 0;
3587
44.9k
  while (! PEND && n < maxlen) {
3588
41.7k
    PFETCH(c);
3589
41.7k
    if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3590
31.1k
      n++;
3591
31.1k
      val = (unsigned int )XDIGITVAL(enc, c);
3592
31.1k
      if ((UINT_MAX - val) / 16UL < code)
3593
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3594
3595
31.1k
      code = (code << 4) + val;
3596
31.1k
    }
3597
10.5k
    else {
3598
10.5k
      PUNFETCH;
3599
10.5k
      break;
3600
10.5k
    }
3601
41.7k
  }
3602
3603
13.7k
  if (n < minlen)
3604
320
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3605
3606
13.4k
  *rcode = code;
3607
13.4k
  *src = p;
3608
13.4k
  return ONIG_NORMAL;
3609
13.7k
}
3610
3611
static int
3612
scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3613
                  OnigEncoding enc, OnigCodePoint* rcode)
3614
9.52k
{
3615
9.52k
  OnigCodePoint code;
3616
9.52k
  OnigCodePoint c;
3617
9.52k
  unsigned int val;
3618
9.52k
  int n;
3619
9.52k
  UChar* p;
3620
9.52k
  PFETCH_READY;
3621
3622
9.52k
  p = *src;
3623
9.52k
  code = 0;
3624
9.52k
  n = 0;
3625
19.6k
  while (! PEND && n < maxlen) {
3626
18.8k
    PFETCH(c);
3627
18.8k
    if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3628
10.0k
      n++;
3629
10.0k
      val = (unsigned int )ODIGITVAL(c);
3630
10.0k
      if ((UINT_MAX - val) / 8UL < code)
3631
6
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3632
3633
10.0k
      code = (code << 3) + val;
3634
10.0k
    }
3635
8.77k
    else {
3636
8.77k
      PUNFETCH;
3637
8.77k
      break;
3638
8.77k
    }
3639
18.8k
  }
3640
3641
9.52k
  if (n < minlen)
3642
194
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3643
3644
9.32k
  *rcode = code;
3645
9.32k
  *src = p;
3646
9.32k
  return ONIG_NORMAL;
3647
9.52k
}
3648
3649
static int
3650
scan_number_of_base(UChar** src, UChar* end, int minlen,
3651
                    OnigEncoding enc, OnigCodePoint* rcode, int base)
3652
8.68k
{
3653
8.68k
  int r;
3654
3655
8.68k
  if (base == 16)
3656
6.95k
    r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3657
1.73k
  else if (base == 8)
3658
1.73k
    r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3659
0
  else
3660
0
    r = ONIGERR_INVALID_CODE_POINT_VALUE;
3661
3662
8.68k
  return r;
3663
8.68k
}
3664
3665
24.9k
#define IS_CODE_POINT_DIVIDE(c)  ((c) == ' ' || (c) == '\n')
3666
3667
enum CPS_STATE {
3668
  CPS_EMPTY = 0,
3669
  CPS_START = 1,
3670
  CPS_RANGE = 2
3671
};
3672
3673
static int
3674
check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3675
                             OnigEncoding enc, int state)
3676
1.03k
{
3677
1.03k
  int r;
3678
1.03k
  int n;
3679
1.03k
  int end_digit;
3680
1.03k
  OnigCodePoint code;
3681
1.03k
  OnigCodePoint c;
3682
1.03k
  PFETCH_READY;
3683
3684
1.03k
  end_digit = FALSE;
3685
1.03k
  n = 0;
3686
4.36k
  while (! PEND) {
3687
5.86k
  start:
3688
5.86k
    PFETCH(c);
3689
5.86k
    if (c == '}') {
3690
664
    end_char:
3691
664
      if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3692
660
      return n;
3693
664
    }
3694
3695
5.33k
    if (IS_CODE_POINT_DIVIDE(c)) {
3696
2.82k
      while (! PEND) {
3697
2.80k
        PFETCH(c);
3698
2.80k
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3699
2.80k
      }
3700
2.11k
      if (IS_CODE_POINT_DIVIDE(c))
3701
16
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3702
2.11k
    }
3703
3.21k
    else if (c == '-') {
3704
1.53k
    range:
3705
1.53k
      if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3706
1.52k
      if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3707
1.52k
      end_digit = FALSE;
3708
1.52k
      state = CPS_RANGE;
3709
1.52k
      goto start;
3710
1.52k
    }
3711
1.72k
    else if (end_digit == TRUE) {
3712
142
      if (base == 16) {
3713
64
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3714
2
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3715
64
      }
3716
78
      else if (base == 8) {
3717
78
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3718
2
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3719
78
      }
3720
3721
138
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3722
142
    }
3723
3724
3.67k
    if (c == '}') goto end_char;
3725
3.54k
    if (c == '-') goto range;
3726
3727
3.51k
    PUNFETCH;
3728
3.51k
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3729
3.51k
    if (r != 0) return r;
3730
3.32k
    n++;
3731
3.32k
    end_digit = TRUE;
3732
3.32k
    state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3733
3.32k
  }
3734
3735
20
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3736
1.03k
}
3737
3738
static int
3739
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3740
680
{
3741
680
  int r;
3742
680
  int n;
3743
680
  int end_digit;
3744
680
  OnigCodePoint code;
3745
680
  OnigCodePoint c;
3746
680
  PFETCH_READY;
3747
3748
680
  end_digit = FALSE;
3749
680
  n = 0;
3750
1.94k
  while (! PEND) {
3751
1.92k
    PFETCH(c);
3752
1.92k
    if (c == '}') {
3753
234
    end_char:
3754
234
      return n;
3755
168
    }
3756
3757
1.75k
    if (IS_CODE_POINT_DIVIDE(c)) {
3758
2.07k
      while (! PEND) {
3759
2.06k
        PFETCH(c);
3760
2.06k
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3761
2.06k
      }
3762
1.42k
      if (IS_CODE_POINT_DIVIDE(c))
3763
18
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3764
1.42k
    }
3765
328
    else if (end_digit == TRUE) {
3766
146
      if (base == 16) {
3767
56
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3768
2
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3769
56
      }
3770
90
      else if (base == 8) {
3771
90
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3772
2
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3773
90
      }
3774
3775
142
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3776
146
    }
3777
3778
1.58k
    if (c == '}') goto end_char;
3779
3780
1.52k
    PUNFETCH;
3781
1.52k
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3782
1.52k
    if (r != 0) return r;
3783
1.26k
    n++;
3784
1.26k
    end_digit = TRUE;
3785
1.26k
  }
3786
3787
26
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3788
680
}
3789
3790
static int
3791
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3792
5.85k
{
3793
5.85k
  int r;
3794
5.85k
  OnigCodePoint c;
3795
5.85k
  UChar* p;
3796
5.85k
  PFETCH_READY;
3797
3798
5.85k
  p = *src;
3799
9.46k
  while (! PEND) {
3800
9.46k
    PFETCH(c);
3801
9.46k
    if (! IS_CODE_POINT_DIVIDE(c)) {
3802
5.85k
      if (c == '}') {
3803
812
        *src = p;
3804
812
        return 1; /* end of sequence */
3805
812
      }
3806
5.04k
      else if (c == '-' && in_cc == TRUE) {
3807
1.39k
        *src = p;
3808
1.39k
        return 2; /* range */
3809
1.39k
      }
3810
3.65k
      PUNFETCH;
3811
3.65k
      break;
3812
5.85k
    }
3813
3.60k
    else {
3814
3.60k
      if (PEND)
3815
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3816
3.60k
    }
3817
9.46k
  }
3818
3819
3.65k
  r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3820
3.65k
  if (r != 0) return r;
3821
3822
3.65k
  *src = p;
3823
3.65k
  return ONIG_NORMAL;
3824
3.65k
}
3825
3826
3827
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3828
15.9M
    BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3829
3830
/* data format:
3831
     [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3832
     (all data size is OnigCodePoint)
3833
 */
3834
static int
3835
new_code_range(BBuf** pbuf)
3836
217k
{
3837
217k
#define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
3838
217k
  int r;
3839
217k
  OnigCodePoint n;
3840
217k
  BBuf* bbuf;
3841
3842
217k
  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3843
217k
  CHECK_NULL_RETURN_MEMERR(bbuf);
3844
217k
  r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3845
217k
  if (r != 0) {
3846
0
    xfree(bbuf);
3847
0
    *pbuf = 0;
3848
0
    return r;
3849
0
  }
3850
3851
217k
  n = 0;
3852
217k
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3853
217k
  return 0;
3854
217k
}
3855
3856
static int
3857
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3858
5.25M
{
3859
5.25M
  int r, inc_n, pos;
3860
5.25M
  int low, high, bound, x;
3861
5.25M
  OnigCodePoint n, *data;
3862
5.25M
  BBuf* bbuf;
3863
3864
5.25M
  if (from > to) {
3865
0
    n = from; from = to; to = n;
3866
0
  }
3867
3868
5.25M
  if (IS_NULL(*pbuf)) {
3869
217k
    r = new_code_range(pbuf);
3870
217k
    if (r != 0) return r;
3871
217k
    bbuf = *pbuf;
3872
217k
    n = 0;
3873
217k
  }
3874
5.03M
  else {
3875
5.03M
    bbuf = *pbuf;
3876
5.03M
    GET_CODE_POINT(n, bbuf->p);
3877
5.03M
  }
3878
5.25M
  data = (OnigCodePoint* )(bbuf->p);
3879
5.25M
  data++;
3880
3881
30.6M
  for (low = 0, bound = n; low < bound; ) {
3882
25.4M
    x = (low + bound) >> 1;
3883
25.4M
    if (from > data[x*2 + 1])
3884
13.6M
      low = x + 1;
3885
11.7M
    else
3886
11.7M
      bound = x;
3887
25.4M
  }
3888
3889
5.25M
  high = (to == ~((OnigCodePoint )0)) ? n : low;
3890
22.0M
  for (bound = n; high < bound; ) {
3891
16.7M
    x = (high + bound) >> 1;
3892
16.7M
    if (to + 1 >= data[x*2])
3893
4.03M
      high = x + 1;
3894
12.7M
    else
3895
12.7M
      bound = x;
3896
16.7M
  }
3897
3898
5.25M
  inc_n = low + 1 - high;
3899
5.25M
  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3900
0
    return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3901
3902
5.25M
  if (inc_n != 1) {
3903
4.03M
    if (from > data[low*2])
3904
3.69M
      from = data[low*2];
3905
4.03M
    if (to < data[(high - 1)*2 + 1])
3906
3.70M
      to = data[(high - 1)*2 + 1];
3907
4.03M
  }
3908
3909
5.25M
  if (inc_n != 0 && (OnigCodePoint )high < n) {
3910
184k
    int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3911
184k
    int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3912
184k
    int size = (n - high) * 2 * SIZE_CODE_POINT;
3913
3914
184k
    if (inc_n > 0) {
3915
179k
      BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3916
179k
    }
3917
5.33k
    else {
3918
5.33k
      BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3919
5.33k
    }
3920
184k
  }
3921
3922
5.25M
  pos = SIZE_CODE_POINT * (1 + low * 2);
3923
5.25M
  BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3924
5.25M
  BB_WRITE_CODE_POINT(bbuf, pos, from);
3925
5.25M
  BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3926
5.25M
  n += inc_n;
3927
5.25M
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3928
3929
5.25M
  return 0;
3930
5.25M
}
3931
3932
static int
3933
add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3934
8.53k
{
3935
8.53k
  if (from > to) {
3936
150
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3937
130
      return 0;
3938
20
    else
3939
20
      return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3940
150
  }
3941
3942
8.38k
  return add_code_range_to_buf(pbuf, from, to);
3943
8.53k
}
3944
3945
static int
3946
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3947
900
{
3948
900
  int r, i, n;
3949
900
  OnigCodePoint pre, from, *data, to = 0;
3950
3951
900
  *pbuf = (BBuf* )NULL;
3952
900
  if (IS_NULL(bbuf)) {
3953
0
  set_all:
3954
0
    return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3955
0
  }
3956
3957
900
  data = (OnigCodePoint* )(bbuf->p);
3958
900
  GET_CODE_POINT(n, data);
3959
900
  data++;
3960
900
  if (n <= 0) goto set_all;
3961
3962
900
  r = 0;
3963
900
  pre = MBCODE_START_POS(enc);
3964
8.17k
  for (i = 0; i < n; i++) {
3965
7.57k
    from = data[i*2];
3966
7.57k
    to   = data[i*2+1];
3967
7.57k
    if (pre <= from - 1) {
3968
6.95k
      r = add_code_range_to_buf(pbuf, pre, from - 1);
3969
6.95k
      if (r != 0) {
3970
0
        bbuf_free(*pbuf);
3971
0
        return r;
3972
0
      }
3973
6.95k
    }
3974
7.57k
    if (to == ~((OnigCodePoint )0)) break;
3975
7.27k
    pre = to + 1;
3976
7.27k
  }
3977
900
  if (to < ~((OnigCodePoint )0)) {
3978
600
    r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3979
600
    if (r != 0) bbuf_free(*pbuf);
3980
600
  }
3981
900
  return r;
3982
900
}
3983
3984
458
#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3985
458
  BBuf *tbuf; \
3986
458
  int  tnot; \
3987
458
  tnot = not1;  not1  = not2;  not2  = tnot; \
3988
458
  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3989
458
} while (0)
3990
3991
static int
3992
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3993
                  BBuf* bbuf2, int not2, BBuf** pbuf)
3994
2.25k
{
3995
2.25k
  int r;
3996
2.25k
  OnigCodePoint i, n1, *data1;
3997
2.25k
  OnigCodePoint from, to;
3998
3999
2.25k
  *pbuf = (BBuf* )NULL;
4000
2.25k
  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
4001
550
    if (not1 != 0 || not2 != 0)
4002
72
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
4003
478
    return 0;
4004
550
  }
4005
4006
1.70k
  r = 0;
4007
1.70k
  if (IS_NULL(bbuf2))
4008
458
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4009
4010
1.70k
  if (IS_NULL(bbuf1)) {
4011
1.18k
    if (not1 != 0) {
4012
70
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
4013
70
    }
4014
1.11k
    else {
4015
1.11k
      if (not2 == 0) {
4016
546
        return bbuf_clone(pbuf, bbuf2);
4017
546
      }
4018
568
      else {
4019
568
        return not_code_range_buf(enc, bbuf2, pbuf);
4020
568
      }
4021
1.11k
    }
4022
1.18k
  }
4023
4024
520
  if (not1 != 0)
4025
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4026
4027
520
  data1 = (OnigCodePoint* )(bbuf1->p);
4028
520
  GET_CODE_POINT(n1, data1);
4029
520
  data1++;
4030
4031
520
  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4032
188
    r = bbuf_clone(pbuf, bbuf2);
4033
188
  }
4034
332
  else if (not1 == 0) { /* 1 OR (not 2) */
4035
332
    r = not_code_range_buf(enc, bbuf2, pbuf);
4036
332
  }
4037
520
  if (r != 0) return r;
4038
4039
10.0k
  for (i = 0; i < n1; i++) {
4040
9.53k
    from = data1[i*2];
4041
9.53k
    to   = data1[i*2+1];
4042
9.53k
    r = add_code_range_to_buf(pbuf, from, to);
4043
9.53k
    if (r != 0) return r;
4044
9.53k
  }
4045
520
  return 0;
4046
520
}
4047
4048
static int
4049
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4050
                OnigCodePoint* data, int n)
4051
0
{
4052
0
  int i, r;
4053
0
  OnigCodePoint from2, to2;
4054
4055
0
  for (i = 0; i < n; i++) {
4056
0
    from2 = data[i*2];
4057
0
    to2   = data[i*2+1];
4058
0
    if (from2 < from1) {
4059
0
      if (to2 < from1) continue;
4060
0
      else {
4061
0
        from1 = to2 + 1;
4062
0
      }
4063
0
    }
4064
0
    else if (from2 <= to1) {
4065
0
      if (to2 < to1) {
4066
0
        if (from1 <= from2 - 1) {
4067
0
          r = add_code_range_to_buf(pbuf, from1, from2-1);
4068
0
          if (r != 0) return r;
4069
0
        }
4070
0
        from1 = to2 + 1;
4071
0
      }
4072
0
      else {
4073
0
        to1 = from2 - 1;
4074
0
      }
4075
0
    }
4076
0
    else {
4077
0
      from1 = from2;
4078
0
    }
4079
0
    if (from1 > to1) break;
4080
0
  }
4081
0
  if (from1 <= to1) {
4082
0
    r = add_code_range_to_buf(pbuf, from1, to1);
4083
0
    if (r != 0) return r;
4084
0
  }
4085
0
  return 0;
4086
0
}
4087
4088
static int
4089
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4090
904
{
4091
904
  int r;
4092
904
  OnigCodePoint i, j, n1, n2, *data1, *data2;
4093
904
  OnigCodePoint from, to, from1, to1, from2, to2;
4094
4095
904
  *pbuf = (BBuf* )NULL;
4096
904
  if (IS_NULL(bbuf1)) {
4097
456
    if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4098
0
      return bbuf_clone(pbuf, bbuf2);
4099
456
    return 0;
4100
456
  }
4101
448
  else if (IS_NULL(bbuf2)) {
4102
72
    if (not2 != 0)
4103
0
      return bbuf_clone(pbuf, bbuf1);
4104
72
    return 0;
4105
72
  }
4106
4107
376
  if (not1 != 0)
4108
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4109
4110
376
  data1 = (OnigCodePoint* )(bbuf1->p);
4111
376
  data2 = (OnigCodePoint* )(bbuf2->p);
4112
376
  GET_CODE_POINT(n1, data1);
4113
376
  GET_CODE_POINT(n2, data2);
4114
376
  data1++;
4115
376
  data2++;
4116
4117
376
  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4118
15.2k
    for (i = 0; i < n1; i++) {
4119
14.9k
      from1 = data1[i*2];
4120
14.9k
      to1   = data1[i*2+1];
4121
155k
      for (j = 0; j < n2; j++) {
4122
141k
        from2 = data2[j*2];
4123
141k
        to2   = data2[j*2+1];
4124
141k
        if (from2 > to1) break;
4125
140k
        if (to2 < from1) continue;
4126
10.7k
        from = MAX(from1, from2);
4127
10.7k
        to   = MIN(to1, to2);
4128
10.7k
        r = add_code_range_to_buf(pbuf, from, to);
4129
10.7k
        if (r != 0) return r;
4130
10.7k
      }
4131
14.9k
    }
4132
376
  }
4133
0
  else if (not1 == 0) { /* 1 AND (not 2) */
4134
0
    for (i = 0; i < n1; i++) {
4135
0
      from1 = data1[i*2];
4136
0
      to1   = data1[i*2+1];
4137
0
      r = and_code_range1(pbuf, from1, to1, data2, n2);
4138
0
      if (r != 0) return r;
4139
0
    }
4140
0
  }
4141
4142
376
  return 0;
4143
376
}
4144
4145
static int
4146
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4147
1.42k
{
4148
1.42k
  int r, not1, not2;
4149
1.42k
  BBuf *buf1, *buf2, *pbuf;
4150
1.42k
  BitSetRef bsr1, bsr2;
4151
1.42k
  BitSet bs1, bs2;
4152
4153
1.42k
  not1 = IS_NCCLASS_NOT(dest);
4154
1.42k
  bsr1 = dest->bs;
4155
1.42k
  buf1 = dest->mbuf;
4156
1.42k
  not2 = IS_NCCLASS_NOT(cc);
4157
1.42k
  bsr2 = cc->bs;
4158
1.42k
  buf2 = cc->mbuf;
4159
4160
1.42k
  if (not1 != 0) {
4161
0
    bitset_invert_to(bsr1, bs1);
4162
0
    bsr1 = bs1;
4163
0
  }
4164
1.42k
  if (not2 != 0) {
4165
0
    bitset_invert_to(bsr2, bs2);
4166
0
    bsr2 = bs2;
4167
0
  }
4168
1.42k
  bitset_and(bsr1, bsr2);
4169
1.42k
  if (bsr1 != dest->bs) {
4170
0
    bitset_copy(dest->bs, bsr1);
4171
0
  }
4172
1.42k
  if (not1 != 0) {
4173
0
    bitset_invert(dest->bs);
4174
0
  }
4175
4176
1.42k
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4177
904
    if (not1 != 0 && not2 != 0) {
4178
0
      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4179
0
    }
4180
904
    else {
4181
904
      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4182
904
      if (r == 0 && not1 != 0) {
4183
0
        BBuf *tbuf;
4184
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4185
0
        if (r != 0) {
4186
0
          bbuf_free(pbuf);
4187
0
          return r;
4188
0
        }
4189
0
        bbuf_free(pbuf);
4190
0
        pbuf = tbuf;
4191
0
      }
4192
904
    }
4193
904
    if (r != 0) return r;
4194
4195
904
    dest->mbuf = pbuf;
4196
904
    bbuf_free(buf1);
4197
904
    return r;
4198
904
  }
4199
520
  return 0;
4200
1.42k
}
4201
4202
static int
4203
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4204
2.89k
{
4205
2.89k
  int r, not1, not2;
4206
2.89k
  BBuf *buf1, *buf2, *pbuf;
4207
2.89k
  BitSetRef bsr1, bsr2;
4208
2.89k
  BitSet bs1, bs2;
4209
4210
2.89k
  not1 = IS_NCCLASS_NOT(dest);
4211
2.89k
  bsr1 = dest->bs;
4212
2.89k
  buf1 = dest->mbuf;
4213
2.89k
  not2 = IS_NCCLASS_NOT(cc);
4214
2.89k
  bsr2 = cc->bs;
4215
2.89k
  buf2 = cc->mbuf;
4216
4217
2.89k
  if (not1 != 0) {
4218
0
    bitset_invert_to(bsr1, bs1);
4219
0
    bsr1 = bs1;
4220
0
  }
4221
2.89k
  if (not2 != 0) {
4222
1.05k
    bitset_invert_to(bsr2, bs2);
4223
1.05k
    bsr2 = bs2;
4224
1.05k
  }
4225
2.89k
  bitset_or(bsr1, bsr2);
4226
2.89k
  if (bsr1 != dest->bs) {
4227
0
    bitset_copy(dest->bs, bsr1);
4228
0
  }
4229
2.89k
  if (not1 != 0) {
4230
0
    bitset_invert(dest->bs);
4231
0
  }
4232
4233
2.89k
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4234
2.25k
    if (not1 != 0 && not2 != 0) {
4235
0
      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4236
0
    }
4237
2.25k
    else {
4238
2.25k
      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4239
2.25k
      if (r == 0 && not1 != 0) {
4240
0
        BBuf *tbuf;
4241
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4242
0
        if (r != 0) {
4243
0
          bbuf_free(pbuf);
4244
0
          return r;
4245
0
        }
4246
0
        bbuf_free(pbuf);
4247
0
        pbuf = tbuf;
4248
0
      }
4249
2.25k
    }
4250
2.25k
    if (r != 0) return r;
4251
4252
2.25k
    dest->mbuf = pbuf;
4253
2.25k
    bbuf_free(buf1);
4254
2.25k
    return r;
4255
2.25k
  }
4256
642
  else
4257
642
    return 0;
4258
2.89k
}
4259
4260
static OnigCodePoint
4261
conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4262
9.95k
{
4263
9.95k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4264
8.71k
    switch (c) {
4265
228
    case 'n': return '\n';
4266
212
    case 't': return '\t';
4267
462
    case 'r': return '\r';
4268
240
    case 'f': return '\f';
4269
282
    case 'a': return '\007';
4270
208
    case 'b': return '\010';
4271
266
    case 'e': return '\033';
4272
402
    case 'v':
4273
402
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4274
198
        return '\v';
4275
204
      break;
4276
4277
6.41k
    default:
4278
6.41k
      break;
4279
8.71k
    }
4280
8.71k
  }
4281
7.86k
  return c;
4282
9.95k
}
4283
4284
static int
4285
is_invalid_quantifier_target(Node* node)
4286
94.9k
{
4287
94.9k
  switch (ND_TYPE(node)) {
4288
308
  case ND_ANCHOR:
4289
2.74k
  case ND_GIMMICK:
4290
2.74k
    return 1;
4291
0
    break;
4292
4293
15.5k
  case ND_BAG:
4294
    /* allow enclosed elements */
4295
    /* return is_invalid_quantifier_target(ND_BODY(node)); */
4296
15.5k
    break;
4297
4298
6.92k
  case ND_LIST:
4299
8.84k
    do {
4300
8.84k
      if (! is_invalid_quantifier_target(ND_CAR(node))) return 0;
4301
8.84k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4302
810
    return 0;
4303
0
    break;
4304
4305
964
  case ND_ALT:
4306
7.06k
    do {
4307
7.06k
      if (is_invalid_quantifier_target(ND_CAR(node))) return 1;
4308
7.06k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4309
856
    break;
4310
4311
68.7k
  default:
4312
68.7k
    break;
4313
94.9k
  }
4314
85.2k
  return 0;
4315
94.9k
}
4316
4317
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4318
static int
4319
quantifier_type_num(QuantNode* q)
4320
57.9k
{
4321
57.9k
  if (q->greedy) {
4322
47.5k
    if (q->lower == 0) {
4323
20.7k
      if (q->upper == 1) return 0;
4324
12.0k
      else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4325
20.7k
    }
4326
26.8k
    else if (q->lower == 1) {
4327
12.3k
      if (IS_INFINITE_REPEAT(q->upper)) return 2;
4328
12.3k
    }
4329
47.5k
  }
4330
10.3k
  else {
4331
10.3k
    if (q->lower == 0) {
4332
5.56k
      if (q->upper == 1) return 3;
4333
2.89k
      else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4334
5.56k
    }
4335
4.81k
    else if (q->lower == 1) {
4336
3.50k
      if (IS_INFINITE_REPEAT(q->upper)) return 5;
4337
3.50k
    }
4338
10.3k
  }
4339
20.3k
  return -1;
4340
57.9k
}
4341
4342
4343
enum ReduceType {
4344
  RQ_ASIS = 0, /* as is */
4345
  RQ_DEL,      /* delete parent */
4346
  RQ_A,        /* to '*'    */
4347
  RQ_P,        /* to '+'    */
4348
  RQ_AQ,       /* to '*?'   */
4349
  RQ_QQ,       /* to '??'   */
4350
  RQ_P_QQ,     /* to '+)??' */
4351
};
4352
4353
static enum ReduceType ReduceTypeTable[6][6] = {
4354
  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
4355
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
4356
  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
4357
  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
4358
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
4359
  {RQ_ASIS, RQ_A,    RQ_P,   RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
4360
};
4361
4362
extern int
4363
onig_reduce_nested_quantifier(Node* pnode)
4364
12.2k
{
4365
12.2k
  int pnum, cnum;
4366
12.2k
  QuantNode *p, *c;
4367
12.2k
  Node* cnode;
4368
4369
12.2k
  cnode = ND_BODY(pnode);
4370
4371
12.2k
  p = QUANT_(pnode);
4372
12.2k
  c = QUANT_(cnode);
4373
12.2k
  pnum = quantifier_type_num(p);
4374
12.2k
  cnum = quantifier_type_num(c);
4375
12.2k
  if (pnum < 0 || cnum < 0) {
4376
5.85k
    if (p->lower == p->upper && c->lower == c->upper) {
4377
1.11k
      int n = onig_positive_int_multiply(p->lower, c->lower);
4378
1.11k
      if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4379
4380
1.06k
      p->lower = p->upper = n;
4381
1.06k
      ND_BODY(pnode) = ND_BODY(cnode);
4382
1.06k
      goto remove_cnode;
4383
1.11k
    }
4384
4385
4.73k
    return 0;
4386
5.85k
  }
4387
4388
6.42k
  switch(ReduceTypeTable[cnum][pnum]) {
4389
3.65k
  case RQ_DEL:
4390
3.65k
    *pnode = *cnode;
4391
3.65k
    goto remove_cnode;
4392
0
    break;
4393
1.14k
  case RQ_A:
4394
1.14k
    ND_BODY(pnode) = ND_BODY(cnode);
4395
1.14k
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4396
1.14k
    goto remove_cnode;
4397
0
    break;
4398
146
  case RQ_P:
4399
146
    ND_BODY(pnode) = ND_BODY(cnode);
4400
146
    p->lower  = 1;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4401
146
    goto remove_cnode;
4402
0
    break;
4403
266
  case RQ_AQ:
4404
266
    ND_BODY(pnode) = ND_BODY(cnode);
4405
266
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;
4406
266
    goto remove_cnode;
4407
0
    break;
4408
62
  case RQ_QQ:
4409
62
    ND_BODY(pnode) = ND_BODY(cnode);
4410
62
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4411
62
    goto remove_cnode;
4412
0
    break;
4413
342
  case RQ_P_QQ:
4414
342
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4415
342
    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;
4416
342
    break;
4417
812
  case RQ_ASIS:
4418
812
    break;
4419
6.42k
  }
4420
4421
1.15k
  return 0;
4422
4423
6.33k
 remove_cnode:
4424
6.33k
  ND_BODY(cnode) = NULL_NODE;
4425
6.33k
  onig_node_free(cnode);
4426
6.33k
  return 0;
4427
6.42k
}
4428
4429
static int
4430
node_new_general_newline(Node** node, ParseEnv* env)
4431
3.63k
{
4432
3.63k
  int r;
4433
3.63k
  int dlen, alen;
4434
3.63k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4435
3.63k
  Node* crnl;
4436
3.63k
  Node* ncc;
4437
3.63k
  Node* x;
4438
3.63k
  CClassNode* cc;
4439
4440
3.63k
  dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4441
3.63k
  if (dlen < 0) return dlen;
4442
3.63k
  alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4443
3.63k
  if (alen < 0) return alen;
4444
4445
3.63k
  crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4446
3.63k
  CHECK_NULL_RETURN_MEMERR(crnl);
4447
4448
3.63k
  ncc = node_new_cclass();
4449
3.63k
  if (IS_NULL(ncc)) goto err2;
4450
4451
3.63k
  cc = CCLASS_(ncc);
4452
3.63k
  if (dlen == 1) {
4453
3.44k
    bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4454
3.44k
  }
4455
184
  else {
4456
184
    r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4457
184
    if (r != 0) {
4458
0
    err1:
4459
0
      onig_node_free(ncc);
4460
0
    err2:
4461
0
      onig_node_free(crnl);
4462
0
      return ONIGERR_MEMORY;
4463
0
    }
4464
184
  }
4465
4466
3.63k
  if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4467
716
    r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4468
716
    if (r != 0) goto err1;
4469
716
    r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4470
716
    if (r != 0) goto err1;
4471
716
  }
4472
4473
3.63k
  x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4474
3.63k
  if (IS_NULL(x)) goto err1;
4475
4476
3.63k
  *node = x;
4477
3.63k
  return 0;
4478
3.63k
}
4479
4480
enum TokenSyms {
4481
  TK_EOT      = 0,   /* end of token */
4482
  TK_CRUDE_BYTE,
4483
  TK_CHAR,
4484
  TK_STRING,
4485
  TK_CODE_POINT,
4486
  TK_ANYCHAR,
4487
  TK_CHAR_TYPE,
4488
  TK_BACKREF,
4489
  TK_CALL,
4490
  TK_ANCHOR,
4491
  TK_REPEAT,
4492
  TK_INTERVAL,
4493
  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
4494
  TK_ALT,
4495
  TK_SUBEXP_OPEN,
4496
  TK_SUBEXP_CLOSE,
4497
  TK_OPEN_CC,
4498
  TK_QUOTE_OPEN,
4499
  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
4500
  TK_KEEP,             /* \K */
4501
  TK_GENERAL_NEWLINE,  /* \R */
4502
  TK_NO_NEWLINE,       /* \N */
4503
  TK_TRUE_ANYCHAR,     /* \O */
4504
  TK_TEXT_SEGMENT,     /* \X */
4505
4506
  /* in cc */
4507
  TK_CC_CLOSE,
4508
  TK_CC_RANGE,
4509
  TK_CC_POSIX_BRACKET_OPEN,
4510
  TK_CC_AND,           /* && */
4511
  TK_CC_OPEN_CC        /* [ */
4512
};
4513
4514
typedef struct {
4515
  enum TokenSyms type;
4516
  int code_point_continue;
4517
  int escaped;
4518
  int base_num;   /* is number: 8, 16 (used in [....]) */
4519
  UChar* backp;
4520
  union {
4521
    UChar* s;
4522
    UChar byte;
4523
    OnigCodePoint code;
4524
    int   anchor;
4525
    int   subtype;
4526
    struct {
4527
      int lower;
4528
      int upper;
4529
      int greedy;
4530
      int possessive;
4531
    } repeat;
4532
    struct {
4533
      int  num;
4534
      int  ref1;
4535
      int* refs;
4536
      int  by_name;
4537
#ifdef USE_BACKREF_WITH_LEVEL
4538
      int  exist_level;
4539
      int  level;   /* \k<name+n> */
4540
#endif
4541
    } backref;
4542
    struct {
4543
      UChar* name;
4544
      UChar* name_end;
4545
      int    gnum;
4546
      int    by_number;
4547
    } call;
4548
    struct {
4549
      int ctype;
4550
      int not;
4551
    } prop;
4552
  } u;
4553
} PToken;
4554
4555
static void
4556
ptoken_init(PToken* tok)
4557
51.2k
{
4558
51.2k
  tok->code_point_continue = 0;
4559
51.2k
}
4560
4561
static int
4562
fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4563
25.3k
{
4564
25.3k
  int low, up, syn_allow, non_low;
4565
25.3k
  int r;
4566
25.3k
  OnigCodePoint c;
4567
25.3k
  OnigEncoding enc;
4568
25.3k
  UChar* p;
4569
25.3k
  PFETCH_READY;
4570
4571
25.3k
  p = *src;
4572
25.3k
  r = 0;
4573
25.3k
  non_low = 0;
4574
25.3k
  enc = env->enc;
4575
25.3k
  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4576
4577
25.3k
  if (PEND) {
4578
380
    if (syn_allow)
4579
362
      return 1;  /* "....{" : OK! */
4580
18
    else
4581
18
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
4582
380
  }
4583
4584
24.9k
  if (! syn_allow) {
4585
1.93k
    c = PPEEK;
4586
1.93k
    if (c == ')' || c == '(' || c == '|') {
4587
8
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4588
8
    }
4589
1.93k
  }
4590
4591
24.9k
  low = scan_number(&p, end, env->enc);
4592
24.9k
  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4593
24.9k
  if (low > ONIG_MAX_REPEAT_NUM)
4594
32
    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4595
4596
24.9k
  if (p == *src) { /* can't read low */
4597
6.93k
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4598
      /* allow {,n} as {0,n} */
4599
3.84k
      low = 0;
4600
3.84k
      non_low = 1;
4601
3.84k
    }
4602
3.08k
    else
4603
3.08k
      goto invalid;
4604
6.93k
  }
4605
4606
21.8k
  if (PEND) goto invalid;
4607
21.7k
  PFETCH(c);
4608
21.7k
  if (c == ',') {
4609
5.01k
    UChar* prev = p;
4610
5.01k
    up = scan_number(&p, end, env->enc);
4611
5.01k
    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4612
5.00k
    if (up > ONIG_MAX_REPEAT_NUM)
4613
24
      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4614
4615
4.98k
    if (p == prev) {
4616
3.04k
      if (non_low != 0)
4617
292
        goto invalid;
4618
2.75k
      up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */
4619
2.75k
    }
4620
4.98k
  }
4621
16.7k
  else {
4622
16.7k
    if (non_low != 0)
4623
2.53k
      goto invalid;
4624
4625
14.1k
    PUNFETCH;
4626
14.1k
    up = low;  /* {n} : exact n times */
4627
14.1k
    r = 2;     /* fixed */
4628
14.1k
  }
4629
4630
18.8k
  if (PEND) goto invalid;
4631
18.8k
  PFETCH(c);
4632
18.8k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4633
196
    if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4634
160
    PFETCH(c);
4635
160
  }
4636
18.7k
  if (c != '}') goto invalid;
4637
4638
17.3k
  if (!IS_INFINITE_REPEAT(up) && low > up) {
4639
    /* {n,m}+ supported case */
4640
210
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4641
2
      return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4642
4643
208
    tok->u.repeat.possessive = 1;
4644
208
    {
4645
208
      int tmp;
4646
208
      tmp = low; low = up; up = tmp;
4647
208
    }
4648
208
  }
4649
17.1k
  else
4650
17.1k
    tok->u.repeat.possessive = 0;
4651
4652
17.3k
  tok->type = TK_INTERVAL;
4653
17.3k
  tok->u.repeat.lower = low;
4654
17.3k
  tok->u.repeat.upper = up;
4655
17.3k
  *src = p;
4656
17.3k
  return r; /* 0: normal {n,m}, 2: fixed {n} */
4657
4658
7.58k
 invalid:
4659
7.58k
  if (syn_allow) {
4660
    /* *src = p; */ /* !!! Don't do this line !!! */
4661
7.39k
    return 1;  /* OK */
4662
7.39k
  }
4663
184
  else
4664
184
    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4665
7.58k
}
4666
4667
/* \M-, \C-, \c, or \... */
4668
static int
4669
fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4670
                        OnigCodePoint* val)
4671
11.2k
{
4672
11.2k
  int v;
4673
11.2k
  OnigCodePoint c;
4674
11.2k
  OnigEncoding enc = env->enc;
4675
11.2k
  UChar* p = *src;
4676
4677
11.2k
  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4678
4679
11.2k
  PFETCH_S(c);
4680
11.2k
  switch (c) {
4681
804
  case 'M':
4682
804
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4683
574
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4684
544
      PFETCH_S(c);
4685
544
      if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4686
510
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4687
504
      PFETCH_S(c);
4688
504
      if (c == MC_ESC(env->syntax)) {
4689
288
        v = fetch_escaped_value_raw(&p, end, env, &c);
4690
288
        if (v < 0) return v;
4691
288
      }
4692
438
      c = ((c & 0xff) | 0x80);
4693
438
    }
4694
230
    else
4695
230
      goto backslash;
4696
438
    break;
4697
4698
438
  case 'C':
4699
428
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4700
162
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4701
152
      PFETCH_S(c);
4702
152
      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4703
98
      goto control;
4704
152
    }
4705
266
    else
4706
266
      goto backslash;
4707
4708
842
  case 'c':
4709
842
    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4710
622
    control:
4711
622
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4712
598
      PFETCH_S(c);
4713
598
      if (c == '?') {
4714
68
        c = 0177;
4715
68
      }
4716
530
      else {
4717
530
        if (c == MC_ESC(env->syntax)) {
4718
292
          v = fetch_escaped_value_raw(&p, end, env, &c);
4719
292
          if (v < 0) return v;
4720
292
        }
4721
462
        c &= 0x9f;
4722
462
      }
4723
530
      break;
4724
598
    }
4725
    /* fall through */
4726
4727
9.46k
  default:
4728
9.46k
    {
4729
9.95k
    backslash:
4730
9.95k
      c = conv_backslash_value(c, env);
4731
9.95k
    }
4732
9.95k
    break;
4733
11.2k
  }
4734
4735
10.9k
  *src = p;
4736
10.9k
  *val = c;
4737
10.9k
  return 0;
4738
11.2k
}
4739
4740
static int
4741
fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4742
10.6k
{
4743
10.6k
  int r;
4744
10.6k
  int len;
4745
4746
10.6k
  r = fetch_escaped_value_raw(src, end, env, val);
4747
10.6k
  if (r != 0) return r;
4748
4749
10.4k
  len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4750
10.4k
  if (len < 0) return len;
4751
4752
10.4k
  return 0;
4753
10.4k
}
4754
4755
static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4756
4757
static OnigCodePoint
4758
get_name_end_code_point(OnigCodePoint start)
4759
20.5k
{
4760
20.5k
  switch (start) {
4761
5.42k
  case '<':  return (OnigCodePoint )'>';  break;
4762
8.13k
  case '\'': return (OnigCodePoint )'\''; break;
4763
6.95k
  case '(':  return (OnigCodePoint )')';  break;
4764
0
  default:
4765
0
    break;
4766
20.5k
  }
4767
4768
0
  return (OnigCodePoint )0;
4769
20.5k
}
4770
4771
enum REF_NUM {
4772
  IS_NOT_NUM = 0,
4773
  IS_ABS_NUM = 1,
4774
  IS_REL_NUM = 2
4775
};
4776
4777
#ifdef USE_BACKREF_WITH_LEVEL
4778
/*
4779
   \k<name+n>, \k<name-n>
4780
   \k<num+n>,  \k<num-n>
4781
   \k<-num+n>, \k<-num-n>
4782
   \k<+num+n>, \k<+num-n>
4783
*/
4784
static int
4785
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4786
                      UChar** rname_end, ParseEnv* env,
4787
                      int* rback_num, int* rlevel, enum REF_NUM* num_type)
4788
5.16k
{
4789
5.16k
  int r, sign, exist_level;
4790
5.16k
  int digit_count;
4791
5.16k
  OnigCodePoint end_code;
4792
5.16k
  OnigCodePoint c;
4793
5.16k
  OnigEncoding enc;
4794
5.16k
  UChar *name_end;
4795
5.16k
  UChar *pnum_head;
4796
5.16k
  UChar *p;
4797
5.16k
  PFETCH_READY;
4798
4799
5.16k
  p = *src;
4800
5.16k
  c = 0;
4801
5.16k
  enc = env->enc;
4802
5.16k
  *rback_num = 0;
4803
5.16k
  exist_level = 0;
4804
5.16k
  *num_type = IS_NOT_NUM;
4805
5.16k
  sign = 1;
4806
5.16k
  pnum_head = *src;
4807
4808
5.16k
  end_code = get_name_end_code_point(start_code);
4809
4810
5.16k
  digit_count = 0;
4811
5.16k
  name_end = end;
4812
5.16k
  r = 0;
4813
5.16k
  if (PEND) {
4814
24
    return ONIGERR_EMPTY_GROUP_NAME;
4815
24
  }
4816
5.14k
  else {
4817
5.14k
    PFETCH(c);
4818
5.14k
    if (c == end_code)
4819
4
      return ONIGERR_EMPTY_GROUP_NAME;
4820
4821
5.13k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4822
2.13k
      *num_type = IS_ABS_NUM;
4823
2.13k
      digit_count++;
4824
2.13k
    }
4825
2.99k
    else if (c == '-') {
4826
670
      *num_type = IS_REL_NUM;
4827
670
      sign = -1;
4828
670
      pnum_head = p;
4829
670
    }
4830
2.32k
    else if (c == '+') {
4831
252
      *num_type = IS_REL_NUM;
4832
252
      sign = 1;
4833
252
      pnum_head = p;
4834
252
    }
4835
2.07k
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4836
138
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4837
138
    }
4838
5.13k
  }
4839
4840
10.8k
  while (!PEND) {
4841
10.3k
    name_end = p;
4842
10.3k
    PFETCH(c);
4843
10.3k
    if (c == end_code || c == ')' || c == '+' || c == '-') {
4844
4.69k
      if (*num_type != IS_NOT_NUM && digit_count == 0)
4845
50
        r = ONIGERR_INVALID_GROUP_NAME;
4846
4.69k
      break;
4847
4.69k
    }
4848
4849
5.68k
    if (*num_type != IS_NOT_NUM) {
4850
3.68k
      if (IS_CODE_DIGIT_ASCII(enc, c)) {
4851
3.37k
        digit_count++;
4852
3.37k
      }
4853
314
      else {
4854
314
        r = ONIGERR_INVALID_GROUP_NAME;
4855
314
        *num_type = IS_NOT_NUM;
4856
314
      }
4857
3.68k
    }
4858
1.99k
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4859
948
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4860
948
    }
4861
5.68k
  }
4862
4863
5.13k
  if (r == 0 && c != end_code) {
4864
1.47k
    if (c == '+' || c == '-') {
4865
1.32k
      int level;
4866
1.32k
      int flag = (c == '-' ? -1 : 1);
4867
4868
1.32k
      if (PEND) {
4869
10
        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4870
10
        goto end;
4871
10
      }
4872
1.31k
      PFETCH(c);
4873
1.31k
      if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4874
1.15k
      PUNFETCH;
4875
1.15k
      level = scan_number(&p, end, enc);
4876
1.15k
      if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4877
1.14k
      *rlevel = (level * flag);
4878
1.14k
      exist_level = 1;
4879
4880
1.14k
      if (!PEND) {
4881
1.08k
        PFETCH(c);
4882
1.08k
        if (c == end_code)
4883
944
          goto end;
4884
1.08k
      }
4885
1.14k
    }
4886
4887
510
  err:
4888
510
    name_end = end;
4889
558
  err2:
4890
558
    r = ONIGERR_INVALID_GROUP_NAME;
4891
558
  }
4892
4893
5.17k
 end:
4894
5.17k
  if (r == 0) {
4895
4.08k
    if (*num_type != IS_NOT_NUM) {
4896
2.35k
      *rback_num = scan_number(&pnum_head, name_end, enc);
4897
2.35k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4898
2.31k
      else if (*rback_num == 0) {
4899
58
        if (*num_type == IS_REL_NUM)
4900
48
          goto err2;
4901
58
      }
4902
4903
2.27k
      *rback_num *= sign;
4904
2.27k
    }
4905
4906
3.99k
    *rname_end = name_end;
4907
3.99k
    *src = p;
4908
3.99k
    return (exist_level ? 1 : 0);
4909
4.08k
  }
4910
1.08k
  else {
4911
1.08k
    onig_scan_env_set_error_string(env, r, *src, name_end);
4912
1.08k
    return r;
4913
1.08k
  }
4914
5.17k
}
4915
#endif /* USE_BACKREF_WITH_LEVEL */
4916
4917
/*
4918
  ref: 0 -> define name    (don't allow number name)
4919
       1 -> reference name (allow number name)
4920
*/
4921
static int
4922
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4923
           UChar** rname_end, ParseEnv* env, int* rback_num,
4924
           enum REF_NUM* num_type, int is_ref)
4925
15.3k
{
4926
15.3k
  int r, sign;
4927
15.3k
  int digit_count;
4928
15.3k
  OnigCodePoint end_code;
4929
15.3k
  OnigCodePoint c = 0;
4930
15.3k
  OnigEncoding enc = env->enc;
4931
15.3k
  UChar *name_end;
4932
15.3k
  UChar *pnum_head;
4933
15.3k
  UChar *p = *src;
4934
4935
15.3k
  *rback_num = 0;
4936
4937
15.3k
  end_code = get_name_end_code_point(start_code);
4938
4939
15.3k
  digit_count = 0;
4940
15.3k
  name_end = end;
4941
15.3k
  pnum_head = *src;
4942
15.3k
  r = 0;
4943
15.3k
  *num_type = IS_NOT_NUM;
4944
15.3k
  sign = 1;
4945
15.3k
  if (PEND) {
4946
50
    return ONIGERR_EMPTY_GROUP_NAME;
4947
50
  }
4948
15.2k
  else {
4949
15.2k
    PFETCH_S(c);
4950
15.2k
    if (c == end_code)
4951
2
      return ONIGERR_EMPTY_GROUP_NAME;
4952
4953
15.2k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4954
5.75k
      if (is_ref == TRUE)
4955
5.73k
        *num_type = IS_ABS_NUM;
4956
20
      else {
4957
20
        r = ONIGERR_INVALID_GROUP_NAME;
4958
20
      }
4959
5.75k
      digit_count++;
4960
5.75k
    }
4961
9.53k
    else if (c == '-') {
4962
1.85k
      if (is_ref == TRUE) {
4963
1.84k
        *num_type = IS_REL_NUM;
4964
1.84k
        sign = -1;
4965
1.84k
        pnum_head = p;
4966
1.84k
      }
4967
10
      else {
4968
10
        r = ONIGERR_INVALID_GROUP_NAME;
4969
10
      }
4970
1.85k
    }
4971
7.68k
    else if (c == '+') {
4972
552
      if (is_ref == TRUE) {
4973
550
        *num_type = IS_REL_NUM;
4974
550
        sign = 1;
4975
550
        pnum_head = p;
4976
550
      }
4977
2
      else {
4978
2
        r = ONIGERR_INVALID_GROUP_NAME;
4979
2
      }
4980
552
    }
4981
7.13k
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4982
232
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4983
232
    }
4984
15.2k
  }
4985
4986
15.2k
  if (r == 0) {
4987
25.1k
    while (!PEND) {
4988
24.7k
      name_end = p;
4989
24.7k
      PFETCH_S(c);
4990
24.7k
      if (c == end_code || c == ')') {
4991
14.6k
        if (*num_type != IS_NOT_NUM && digit_count == 0)
4992
2
          r = ONIGERR_INVALID_GROUP_NAME;
4993
14.6k
        break;
4994
14.6k
      }
4995
4996
10.1k
      if (*num_type != IS_NOT_NUM) {
4997
5.34k
        if (IS_CODE_DIGIT_ASCII(enc, c)) {
4998
5.04k
          digit_count++;
4999
5.04k
        }
5000
298
        else {
5001
298
          if (!ONIGENC_IS_CODE_WORD(enc, c))
5002
234
            r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
5003
64
          else
5004
64
            r = ONIGERR_INVALID_GROUP_NAME;
5005
5006
298
          *num_type = IS_NOT_NUM;
5007
298
        }
5008
5.34k
      }
5009
4.77k
      else {
5010
4.77k
        if (!ONIGENC_IS_CODE_WORD(enc, c)) {
5011
2.44k
          r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
5012
2.44k
        }
5013
4.77k
      }
5014
10.1k
    }
5015
5016
15.0k
    if (c != end_code) {
5017
384
      r = ONIGERR_INVALID_GROUP_NAME;
5018
384
      goto err;
5019
384
    }
5020
5021
14.6k
    if (*num_type != IS_NOT_NUM) {
5022
7.75k
      *rback_num = scan_number(&pnum_head, name_end, enc);
5023
7.75k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
5024
7.74k
      else if (*rback_num == 0) {
5025
1.97k
        if (*num_type == IS_REL_NUM) {
5026
8
          r = ONIGERR_INVALID_GROUP_NAME;
5027
8
          goto err;
5028
8
        }
5029
1.97k
      }
5030
5031
7.74k
      *rback_num *= sign;
5032
7.74k
    }
5033
5034
14.6k
    *rname_end = name_end;
5035
14.6k
    *src = p;
5036
14.6k
    return 0;
5037
14.6k
  }
5038
264
  else {
5039
672
    while (!PEND) {
5040
420
      name_end = p;
5041
420
      PFETCH_S(c);
5042
420
      if (c == end_code || c == ')')
5043
12
        break;
5044
420
    }
5045
264
    if (PEND)
5046
260
      name_end = end;
5047
5048
656
  err:
5049
656
    onig_scan_env_set_error_string(env, r, *src, name_end);
5050
656
    return r;
5051
264
  }
5052
15.2k
}
5053
5054
static void
5055
CC_ESC_WARN(ParseEnv* env, UChar *c)
5056
5.84k
{
5057
5.84k
  if (onig_warn == onig_null_warn) return ;
5058
5059
0
  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
5060
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
5061
0
    UChar buf[WARN_BUFSIZE];
5062
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5063
0
                               env->pattern, env->pattern_end,
5064
0
                               (UChar* )"character class has '%s' without escape",
5065
0
                               c);
5066
0
    (*onig_warn)((char* )buf);
5067
0
  }
5068
0
}
5069
5070
static void
5071
CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
5072
3.20k
{
5073
3.20k
  if (onig_warn == onig_null_warn) return ;
5074
5075
0
  if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
5076
0
    UChar buf[WARN_BUFSIZE];
5077
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
5078
0
                         (env)->pattern, (env)->pattern_end,
5079
0
                         (UChar* )"regular expression has '%s' without escape", c);
5080
0
    (*onig_warn)((char* )buf);
5081
0
  }
5082
0
}
5083
5084
static UChar*
5085
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
5086
                  UChar **next, OnigEncoding enc)
5087
420
{
5088
420
  int i;
5089
420
  OnigCodePoint x;
5090
420
  UChar *q;
5091
420
  UChar *p = from;
5092
5093
2.35k
  while (p < to) {
5094
2.20k
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5095
2.20k
    q = p + enclen(enc, p);
5096
2.20k
    if (x == s[0]) {
5097
1.03k
      for (i = 1; i < n && q < to; i++) {
5098
744
        x = ONIGENC_MBC_TO_CODE(enc, q, to);
5099
744
        if (x != s[i]) break;
5100
270
        q += enclen(enc, q);
5101
270
      }
5102
768
      if (i >= n) {
5103
270
        if (IS_NOT_NULL(next))
5104
270
          *next = q;
5105
270
        return p;
5106
270
      }
5107
768
    }
5108
1.93k
    p = q;
5109
1.93k
  }
5110
150
  return NULL_UCHARP;
5111
420
}
5112
5113
static int
5114
is_head_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5115
1.16k
{
5116
1.16k
  UChar* start;
5117
1.16k
  OnigCodePoint code;
5118
5119
1.16k
  start = env->pattern;
5120
1.16k
  if (p > start) {
5121
1.06k
    p = onigenc_get_prev_char_head(enc, start, p);
5122
1.06k
    if (p > start) {
5123
998
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5124
998
      if (code == '(' ||
5125
998
          (code == '|' &&
5126
686
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT))) {
5127
474
        p = onigenc_get_prev_char_head(enc, start, p);
5128
474
        code = ONIGENC_MBC_TO_CODE(enc, p, end);
5129
474
        if (IS_MC_ESC_CODE(code, env->syntax)) {
5130
134
          int count = 0;
5131
362
          while (p > start) {
5132
348
            p = onigenc_get_prev_char_head(enc, start, p);
5133
348
            code = ONIGENC_MBC_TO_CODE(enc, p, end);
5134
348
            if (! IS_MC_ESC_CODE(code, env->syntax)) break;
5135
228
            count++;
5136
228
          }
5137
134
          return (count % 2 == 0);
5138
134
        }
5139
474
      }
5140
998
    }
5141
934
    return FALSE;
5142
1.06k
  }
5143
100
  else {
5144
100
    return TRUE;
5145
100
  }
5146
1.16k
}
5147
5148
static int
5149
is_end_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5150
896
{
5151
896
  OnigCodePoint code;
5152
5153
896
  if (p == end) return TRUE;
5154
5155
852
  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5156
852
  if (IS_MC_ESC_CODE(code, env->syntax)) {
5157
456
    p += ONIGENC_MBC_ENC_LEN(enc, p);
5158
456
    if (p < end) {
5159
438
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5160
438
      if (code == ')' ||
5161
438
          (code == '|' &&
5162
342
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT)))
5163
162
        return TRUE;
5164
438
    }
5165
456
  }
5166
5167
690
  return FALSE;
5168
852
}
5169
5170
static int
5171
is_posix_bracket_start(UChar* from, UChar* to, OnigEncoding enc)
5172
1.28k
{
5173
1.28k
  int n;
5174
1.28k
  OnigCodePoint x;
5175
1.28k
  UChar *p;
5176
5177
1.28k
  n = 0;
5178
1.28k
  p = from;
5179
4.05k
  while (p < to) {
5180
3.99k
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5181
3.99k
    p += enclen(enc, p);
5182
3.99k
    if (x == ':') {
5183
776
      if (p < to) {
5184
768
        x = ONIGENC_MBC_TO_CODE(enc, p, to);
5185
768
        if (x == ']') {
5186
532
          if (n == 0) return FALSE;
5187
462
          else        return TRUE;
5188
532
        }
5189
768
      }
5190
5191
244
      return FALSE;
5192
776
    }
5193
3.22k
    else if (x == '^' && n == 0) {
5194
228
      ;
5195
228
    }
5196
2.99k
    else if (! ONIGENC_IS_CODE_ALPHA(enc, x)) {
5197
448
      break;
5198
448
    }
5199
5200
2.77k
    n += 1;
5201
2.77k
  }
5202
5203
504
  return FALSE;
5204
1.28k
}
5205
5206
static int
5207
fetch_token_cc(PToken* tok, UChar** src, UChar* end, ParseEnv* env, int state)
5208
94.0k
{
5209
94.0k
  int r;
5210
94.0k
  OnigCodePoint code;
5211
94.0k
  OnigCodePoint c, c2;
5212
94.0k
  int mindigits, maxdigits;
5213
94.0k
  OnigSyntaxType* syn;
5214
94.0k
  OnigEncoding enc;
5215
94.0k
  UChar* prev;
5216
94.0k
  UChar* p;
5217
94.0k
  PFETCH_READY;
5218
5219
94.0k
  p = *src;
5220
94.0k
  enc = env->enc;
5221
94.0k
  syn = env->syntax;
5222
94.0k
  if (tok->code_point_continue != 0) {
5223
4.86k
    r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
5224
4.86k
    if (r == 1) {
5225
592
      tok->code_point_continue = 0;
5226
592
    }
5227
4.27k
    else if (r == 2) {
5228
1.39k
      tok->type = TK_CC_RANGE;
5229
1.39k
      goto end;
5230
1.39k
    }
5231
2.88k
    else if (r == 0) {
5232
2.87k
      tok->type   = TK_CODE_POINT;
5233
2.87k
      tok->u.code = code;
5234
2.87k
      goto end;
5235
2.87k
    }
5236
2
    else
5237
2
      return r; /* error */
5238
4.86k
  }
5239
5240
89.8k
  if (PEND) {
5241
2.88k
    tok->type = TK_EOT;
5242
2.88k
    return tok->type;
5243
2.88k
  }
5244
5245
86.9k
  PFETCH(c);
5246
86.9k
  tok->type = TK_CHAR;
5247
86.9k
  tok->base_num = 0;
5248
86.9k
  tok->u.code   = c;
5249
86.9k
  tok->escaped  = 0;
5250
5251
86.9k
  if (c == ']') {
5252
15.1k
    tok->type = TK_CC_CLOSE;
5253
15.1k
  }
5254
71.7k
  else if (c == '-') {
5255
3.51k
    tok->type = TK_CC_RANGE;
5256
3.51k
  }
5257
68.2k
  else if (c == MC_ESC(syn)) {
5258
28.1k
    if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
5259
586
      goto end;
5260
5261
27.6k
    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5262
5263
27.5k
    PFETCH(c);
5264
27.5k
    tok->escaped = 1;
5265
27.5k
    tok->u.code = c;
5266
27.5k
    switch (c) {
5267
3.62k
    case 'w':
5268
3.62k
      tok->type = TK_CHAR_TYPE;
5269
3.62k
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5270
3.62k
      tok->u.prop.not   = 0;
5271
3.62k
      break;
5272
3.66k
    case 'W':
5273
3.66k
      tok->type = TK_CHAR_TYPE;
5274
3.66k
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5275
3.66k
      tok->u.prop.not   = 1;
5276
3.66k
      break;
5277
304
    case 'd':
5278
304
      tok->type = TK_CHAR_TYPE;
5279
304
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5280
304
      tok->u.prop.not   = 0;
5281
304
      break;
5282
1.11k
    case 'D':
5283
1.11k
      tok->type = TK_CHAR_TYPE;
5284
1.11k
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5285
1.11k
      tok->u.prop.not   = 1;
5286
1.11k
      break;
5287
288
    case 's':
5288
288
      tok->type = TK_CHAR_TYPE;
5289
288
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5290
288
      tok->u.prop.not   = 0;
5291
288
      break;
5292
2.97k
    case 'S':
5293
2.97k
      tok->type = TK_CHAR_TYPE;
5294
2.97k
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5295
2.97k
      tok->u.prop.not   = 1;
5296
2.97k
      break;
5297
482
    case 'h':
5298
482
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5299
244
      tok->type = TK_CHAR_TYPE;
5300
244
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5301
244
      tok->u.prop.not   = 0;
5302
244
      break;
5303
952
    case 'H':
5304
952
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5305
754
      tok->type = TK_CHAR_TYPE;
5306
754
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5307
754
      tok->u.prop.not   = 1;
5308
754
      break;
5309
5310
668
    case 'p':
5311
1.05k
    case 'P':
5312
1.05k
      if (PEND) break;
5313
5314
1.04k
      c2 = PPEEK;
5315
1.04k
      if (c2 == '{' &&
5316
1.04k
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5317
570
        PINC;
5318
570
        tok->type = TK_CHAR_PROPERTY;
5319
570
        tok->u.prop.not = c == 'P';
5320
5321
570
        if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5322
320
          PFETCH(c2);
5323
320
          if (c2 == '^') {
5324
62
            tok->u.prop.not = tok->u.prop.not == 0;
5325
62
          }
5326
258
          else
5327
258
            PUNFETCH;
5328
320
        }
5329
570
      }
5330
1.04k
      break;
5331
5332
1.05k
    case 'o':
5333
1.05k
      if (PEND) break;
5334
5335
1.05k
      prev = p;
5336
1.05k
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5337
594
        PINC;
5338
594
        r = scan_octal_number(&p, end, 0, 11, enc, &code);
5339
594
        if (r < 0) return r;
5340
592
        if (!PEND) {
5341
576
          c2 = PPEEK;
5342
576
          if (IS_CODE_DIGIT_ASCII(enc, c2))
5343
4
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5344
576
        }
5345
5346
588
        tok->base_num = 8;
5347
588
        goto brace_code_point_entry;
5348
592
      }
5349
458
      break;
5350
5351
4.07k
    case 'x':
5352
4.07k
      if (PEND) break;
5353
5354
4.06k
      prev = p;
5355
4.06k
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5356
1.20k
        PINC;
5357
1.20k
        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5358
1.20k
        if (r < 0) return r;
5359
1.20k
        if (!PEND) {
5360
1.18k
          c2 = PPEEK;
5361
1.18k
          if (IS_CODE_XDIGIT_ASCII(enc, c2))
5362
2
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5363
1.18k
        }
5364
5365
1.20k
        tok->base_num = 16;
5366
1.79k
      brace_code_point_entry:
5367
1.79k
        if ((p > prev + enclen(enc, prev))) {
5368
1.13k
          if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5369
1.12k
          if (PPEEK_IS('}')) {
5370
86
            PINC;
5371
86
          }
5372
1.03k
          else {
5373
1.03k
            int curr_state;
5374
5375
1.03k
            curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
5376
1.03k
            r = check_code_point_sequence_cc(p, end, tok->base_num, enc,
5377
1.03k
                                             curr_state);
5378
1.03k
            if (r < 0) return r;
5379
660
            if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5380
652
            tok->code_point_continue = TRUE;
5381
652
          }
5382
738
          tok->type   = TK_CODE_POINT;
5383
738
          tok->u.code = code;
5384
738
        }
5385
662
        else {
5386
          /* can't read nothing or invalid format */
5387
662
          p = prev;
5388
662
        }
5389
1.79k
      }
5390
2.85k
      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5391
2.65k
        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5392
2.65k
        if (r < 0) return r;
5393
2.65k
        if (p == prev) {  /* can't read nothing. */
5394
1.43k
          code = 0; /* but, it's not error */
5395
1.43k
        }
5396
2.65k
        tok->type = TK_CRUDE_BYTE;
5397
2.65k
        tok->base_num = 16;
5398
2.65k
        tok->u.byte   = (UChar )code;
5399
2.65k
      }
5400
4.25k
      break;
5401
5402
4.25k
    case 'u':
5403
350
      if (PEND) break;
5404
342
      prev = p;
5405
342
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5406
126
        mindigits = maxdigits = 4;
5407
206
      u_hex_digits:
5408
206
        r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5409
206
        if (r < 0) return r;
5410
184
        if (p == prev) {  /* can't read nothing. */
5411
0
          code = 0; /* but, it's not error */
5412
0
        }
5413
184
        tok->type = TK_CODE_POINT;
5414
184
        tok->base_num = 16;
5415
184
        tok->u.code   = code;
5416
184
      }
5417
400
      break;
5418
5419
400
    case 'U':
5420
304
      if (PEND) break;
5421
296
      prev = p;
5422
296
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5423
80
        mindigits = maxdigits = 8;
5424
80
        goto u_hex_digits;
5425
80
      }
5426
216
      break;
5427
5428
912
    case '0':
5429
5.31k
    case '1': case '2': case '3': case '4': case '5': case '6': case '7':
5430
5.31k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5431
5.11k
        PUNFETCH;
5432
5.11k
        prev = p;
5433
5.11k
        r = scan_octal_number(&p, end, 0, 3, enc, &code);
5434
5.11k
        if (r < 0) return r;
5435
5.11k
        if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;
5436
5.10k
        if (p == prev) {  /* can't read nothing. */
5437
0
          code = 0; /* but, it's not error */
5438
0
        }
5439
5.10k
        tok->type = TK_CRUDE_BYTE;
5440
5.10k
        tok->base_num = 8;
5441
5.10k
        tok->u.byte   = (UChar )code;
5442
5.10k
      }
5443
5.30k
      break;
5444
5445
5.30k
    default:
5446
1.95k
      PUNFETCH;
5447
1.95k
      r = fetch_escaped_value(&p, end, env, &c2);
5448
1.95k
      if (r < 0) return r;
5449
1.92k
      if (tok->u.code != c2) {
5450
328
        tok->u.code = c2;
5451
328
        tok->type   = TK_CODE_POINT;
5452
328
      }
5453
1.92k
      break;
5454
27.5k
    }
5455
27.5k
  }
5456
40.0k
  else if (c == '[') {
5457
5.98k
    if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
5458
1.28k
      tok->backp = p; /* point at '[' is read */
5459
1.28k
      PINC;
5460
1.28k
      if (is_posix_bracket_start(p, end, enc)) {
5461
462
        tok->type = TK_CC_POSIX_BRACKET_OPEN;
5462
462
      }
5463
818
      else {
5464
818
        PUNFETCH;
5465
818
        goto cc_in_cc;
5466
818
      }
5467
1.28k
    }
5468
4.70k
    else {
5469
5.51k
    cc_in_cc:
5470
5.51k
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
5471
3.40k
        tok->type = TK_CC_OPEN_CC;
5472
3.40k
      }
5473
2.11k
      else {
5474
2.11k
        CC_ESC_WARN(env, (UChar* )"[");
5475
2.11k
      }
5476
5.51k
    }
5477
5.98k
  }
5478
34.0k
  else if (c == '&') {
5479
2.29k
    if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
5480
2.29k
        !PEND && (PPEEK_IS('&'))) {
5481
1.56k
      PINC;
5482
1.56k
      tok->type = TK_CC_AND;
5483
1.56k
    }
5484
2.29k
  }
5485
5486
90.6k
 end:
5487
90.6k
  *src = p;
5488
90.6k
  return tok->type;
5489
86.9k
}
5490
5491
static int
5492
fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
5493
667k
{
5494
667k
  int r;
5495
667k
  OnigCodePoint code;
5496
667k
  OnigCodePoint c;
5497
667k
  int mindigits, maxdigits;
5498
667k
  UChar* prev;
5499
667k
  int allow_num;
5500
667k
  OnigEncoding enc;
5501
667k
  OnigSyntaxType* syn;
5502
667k
  UChar* p;
5503
667k
  PFETCH_READY;
5504
5505
667k
  enc = env->enc;
5506
667k
  syn = env->syntax;
5507
667k
  p = *src;
5508
5509
667k
  if (tok->code_point_continue != 0) {
5510
994
    r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
5511
994
    if (r == 1) {
5512
220
      tok->code_point_continue = 0;
5513
220
    }
5514
774
    else if (r == 0) {
5515
774
      tok->type   = TK_CODE_POINT;
5516
774
      tok->u.code = code;
5517
774
      goto out;
5518
774
    }
5519
0
    else
5520
0
      return r; /* error */
5521
994
  }
5522
5523
668k
 start:
5524
668k
  if (PEND) {
5525
39.8k
    tok->type = TK_EOT;
5526
39.8k
    return tok->type;
5527
39.8k
  }
5528
5529
628k
  tok->type = TK_STRING;
5530
628k
  tok->base_num = 0;
5531
628k
  tok->backp    = p;
5532
5533
628k
  PFETCH(c);
5534
628k
  if (IS_MC_ESC_CODE(c, syn)) {
5535
80.4k
    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5536
5537
80.1k
    tok->backp = p;
5538
80.1k
    PFETCH(c);
5539
5540
80.1k
    tok->u.code = c;
5541
80.1k
    tok->escaped = 1;
5542
80.1k
    switch (c) {
5543
236
    case '*':
5544
236
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
5545
0
      tok->type = TK_REPEAT;
5546
0
      tok->u.repeat.lower = 0;
5547
0
      tok->u.repeat.upper = INFINITE_REPEAT;
5548
0
      goto greedy_check;
5549
0
      break;
5550
5551
592
    case '+':
5552
592
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
5553
200
      tok->type = TK_REPEAT;
5554
200
      tok->u.repeat.lower = 1;
5555
200
      tok->u.repeat.upper = INFINITE_REPEAT;
5556
200
      goto greedy_check;
5557
0
      break;
5558
5559
542
    case '?':
5560
542
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
5561
194
      tok->type = TK_REPEAT;
5562
194
      tok->u.repeat.lower = 0;
5563
194
      tok->u.repeat.upper = 1;
5564
62.3k
    greedy_check:
5565
62.3k
      tok->u.repeat.possessive = 0;
5566
76.2k
    greedy_check2:
5567
76.2k
      if (!PEND && PPEEK_IS('?') &&
5568
76.2k
          IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
5569
76.2k
          tok->u.repeat.possessive == 0) {
5570
5.64k
        PFETCH(c);
5571
5.64k
        tok->u.repeat.greedy = 0;
5572
5.64k
        tok->u.repeat.possessive = 0;
5573
5.64k
      }
5574
70.6k
      else {
5575
73.9k
      possessive_check:
5576
73.9k
        tok->u.repeat.greedy = 1;
5577
73.9k
        if (!PEND && PPEEK_IS('+') &&
5578
73.9k
            ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
5579
11.2k
              tok->type != TK_INTERVAL)  ||
5580
11.2k
             (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
5581
5.36k
              tok->type == TK_INTERVAL)) &&
5582
73.9k
          tok->u.repeat.possessive == 0) {
5583
7.44k
          PFETCH(c);
5584
7.44k
          tok->u.repeat.possessive = 1;
5585
7.44k
        }
5586
73.9k
      }
5587
79.6k
      break;
5588
5589
79.6k
    case '{':
5590
372
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
5591
200
      r = fetch_interval(&p, end, tok, env);
5592
200
      if (r < 0) return r;  /* error */
5593
148
      if (r == 0) goto greedy_check2;
5594
74
      else if (r == 2) { /* {n} */
5595
74
        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5596
0
          goto possessive_check;
5597
5598
74
        goto greedy_check2;
5599
74
      }
5600
      /* r == 1 : normal char */
5601
0
      break;
5602
5603
644
    case '|':
5604
644
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
5605
362
      tok->type = TK_ALT;
5606
362
      break;
5607
5608
882
    case '(':
5609
882
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5610
254
      tok->type = TK_SUBEXP_OPEN;
5611
254
      break;
5612
5613
480
    case ')':
5614
480
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5615
148
      tok->type = TK_SUBEXP_CLOSE;
5616
148
      break;
5617
5618
3.46k
    case 'w':
5619
3.46k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5620
3.16k
      tok->type = TK_CHAR_TYPE;
5621
3.16k
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5622
3.16k
      tok->u.prop.not   = 0;
5623
3.16k
      break;
5624
5625
4.36k
    case 'W':
5626
4.36k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5627
4.13k
      tok->type = TK_CHAR_TYPE;
5628
4.13k
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5629
4.13k
      tok->u.prop.not   = 1;
5630
4.13k
      break;
5631
5632
1.24k
    case 'b':
5633
1.24k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5634
1.04k
      tok->type = TK_ANCHOR;
5635
1.04k
      tok->u.anchor = ANCR_WORD_BOUNDARY;
5636
1.04k
      break;
5637
5638
960
    case 'B':
5639
960
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5640
760
      tok->type = TK_ANCHOR;
5641
760
      tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
5642
760
      break;
5643
5644
1.27k
    case 'y':
5645
1.27k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5646
1.08k
      tok->type = TK_ANCHOR;
5647
1.08k
      tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
5648
1.08k
      break;
5649
5650
1.37k
    case 'Y':
5651
1.37k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5652
1.16k
      tok->type = TK_ANCHOR;
5653
1.16k
      tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
5654
1.16k
      break;
5655
5656
0
#ifdef USE_WORD_BEGIN_END
5657
780
    case '<':
5658
780
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5659
512
      tok->type = TK_ANCHOR;
5660
512
      tok->u.anchor = ANCR_WORD_BEGIN;
5661
512
      break;
5662
5663
770
    case '>':
5664
770
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5665
516
      tok->type = TK_ANCHOR;
5666
516
      tok->u.anchor = ANCR_WORD_END;
5667
516
      break;
5668
0
#endif
5669
5670
784
    case 's':
5671
784
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5672
580
      tok->type = TK_CHAR_TYPE;
5673
580
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5674
580
      tok->u.prop.not   = 0;
5675
580
      break;
5676
5677
3.46k
    case 'S':
5678
3.46k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5679
3.25k
      tok->type = TK_CHAR_TYPE;
5680
3.25k
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5681
3.25k
      tok->u.prop.not   = 1;
5682
3.25k
      break;
5683
5684
672
    case 'd':
5685
672
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5686
428
      tok->type = TK_CHAR_TYPE;
5687
428
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5688
428
      tok->u.prop.not   = 0;
5689
428
      break;
5690
5691
1.27k
    case 'D':
5692
1.27k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5693
1.05k
      tok->type = TK_CHAR_TYPE;
5694
1.05k
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5695
1.05k
      tok->u.prop.not   = 1;
5696
1.05k
      break;
5697
5698
464
    case 'h':
5699
464
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5700
252
      tok->type = TK_CHAR_TYPE;
5701
252
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5702
252
      tok->u.prop.not   = 0;
5703
252
      break;
5704
5705
1.04k
    case 'H':
5706
1.04k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5707
836
      tok->type = TK_CHAR_TYPE;
5708
836
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5709
836
      tok->u.prop.not   = 1;
5710
836
      break;
5711
5712
3.49k
    case 'K':
5713
3.49k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5714
3.22k
      tok->type = TK_KEEP;
5715
3.22k
      break;
5716
5717
3.84k
    case 'R':
5718
3.84k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5719
3.63k
      tok->type = TK_GENERAL_NEWLINE;
5720
3.63k
      break;
5721
5722
588
    case 'N':
5723
588
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5724
370
      tok->type = TK_NO_NEWLINE;
5725
370
      break;
5726
5727
976
    case 'O':
5728
976
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5729
752
      tok->type = TK_TRUE_ANYCHAR;
5730
752
      break;
5731
5732
6.51k
    case 'X':
5733
6.51k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5734
5.99k
      tok->type = TK_TEXT_SEGMENT;
5735
5.99k
      break;
5736
5737
540
    case 'A':
5738
540
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5739
538
    begin_buf:
5740
538
      tok->type = TK_ANCHOR;
5741
538
      tok->u.subtype = ANCR_BEGIN_BUF;
5742
538
      break;
5743
5744
692
    case 'Z':
5745
692
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5746
266
        goto end_buf;
5747
266
      }
5748
426
      else {
5749
426
        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5750
228
        tok->type = TK_ANCHOR;
5751
228
        tok->u.subtype = ANCR_SEMI_END_BUF;
5752
228
      }
5753
228
      break;
5754
5755
576
    case 'z':
5756
576
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON))
5757
4
        return ONIGERR_UNDEFINED_OPERATOR;
5758
5759
572
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5760
840
    end_buf:
5761
840
      tok->type = TK_ANCHOR;
5762
840
      tok->u.subtype = ANCR_END_BUF;
5763
840
      break;
5764
5765
812
    case 'G':
5766
812
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5767
544
      tok->type = TK_ANCHOR;
5768
544
      tok->u.subtype = ANCR_BEGIN_POSITION;
5769
544
      break;
5770
5771
416
    case '`':
5772
416
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5773
190
      goto begin_buf;
5774
190
      break;
5775
5776
478
    case '\'':
5777
478
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5778
196
      goto end_buf;
5779
196
      break;
5780
5781
1.32k
    case 'o':
5782
1.32k
      if (PEND) break;
5783
5784
1.32k
      prev = p;
5785
1.32k
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5786
778
        PINC;
5787
778
        r = scan_octal_number(&p, end, 0, 11, enc, &code);
5788
778
        if (r < 0) return r;
5789
776
        if (!PEND) {
5790
750
          if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5791
2
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5792
750
        }
5793
5794
774
        tok->base_num = 8;
5795
774
        goto brace_code_point_entry;
5796
776
      }
5797
542
      break;
5798
5799
1.97k
    case 'x':
5800
1.97k
      if (PEND) break;
5801
5802
1.97k
      prev = p;
5803
1.97k
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5804
920
        PINC;
5805
920
        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5806
920
        if (r < 0) return r;
5807
920
        if (!PEND) {
5808
892
          if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5809
2
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5810
892
        }
5811
5812
918
        tok->base_num = 16;
5813
1.69k
      brace_code_point_entry:
5814
1.69k
        if ((p > prev + enclen(enc, prev))) {
5815
956
          if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5816
940
          if (PPEEK_IS('}')) {
5817
260
            PINC;
5818
260
          }
5819
680
          else {
5820
680
            r = check_code_point_sequence(p, end, tok->base_num, enc);
5821
680
            if (r < 0) return r;
5822
234
            if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5823
230
            tok->code_point_continue = TRUE;
5824
230
          }
5825
490
          tok->type   = TK_CODE_POINT;
5826
490
          tok->u.code = code;
5827
490
        }
5828
736
        else {
5829
          /* can't read nothing or invalid format */
5830
736
          p = prev;
5831
736
        }
5832
1.69k
      }
5833
1.05k
      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5834
818
        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5835
818
        if (r < 0) return r;
5836
818
        if (p == prev) {  /* can't read nothing. */
5837
568
          code = 0; /* but, it's not error */
5838
568
        }
5839
818
        tok->type = TK_CRUDE_BYTE;
5840
818
        tok->base_num = 16;
5841
818
        tok->u.byte   = (UChar )code;
5842
818
      }
5843
2.27k
      break;
5844
5845
2.27k
    case 'u':
5846
620
      if (PEND) break;
5847
616
      prev = p;
5848
616
      mindigits = maxdigits = 4;
5849
616
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5850
1.02k
    u_hex_digits:
5851
1.02k
        r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5852
1.02k
        if (r < 0) return r;
5853
972
        if (p == prev) {  /* can't read nothing. */
5854
0
          code = 0; /* but, it's not error */
5855
0
        }
5856
972
        tok->type = TK_CODE_POINT;
5857
972
        tok->base_num = 16;
5858
972
        tok->u.code   = code;
5859
972
      }
5860
1.18k
      break;
5861
5862
1.18k
    case 'U':
5863
844
      if (PEND) break;
5864
838
      prev = p;
5865
838
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5866
618
        mindigits = maxdigits = 8;
5867
618
        goto u_hex_digits;
5868
618
      }
5869
220
      break;
5870
5871
8.57k
    case '1': case '2': case '3': case '4':
5872
9.92k
    case '5': case '6': case '7': case '8': case '9':
5873
9.92k
      PUNFETCH;
5874
9.92k
      prev = p;
5875
9.92k
      r = scan_number(&p, end, enc);
5876
9.92k
      if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {
5877
242
        goto skip_backref;
5878
242
      }
5879
5880
9.68k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5881
9.68k
          (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */
5882
8.72k
        if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5883
0
          if (r > env->num_mem || IS_NULL(PARSEENV_MEMENV(env)[r].mem_node))
5884
0
            return ONIGERR_INVALID_BACKREF;
5885
0
        }
5886
5887
8.72k
        tok->type = TK_BACKREF;
5888
8.72k
        tok->u.backref.num     = 1;
5889
8.72k
        tok->u.backref.ref1    = r;
5890
8.72k
        tok->u.backref.by_name = 0;
5891
8.72k
#ifdef USE_BACKREF_WITH_LEVEL
5892
8.72k
        tok->u.backref.exist_level = 0;
5893
8.72k
#endif
5894
8.72k
        break;
5895
8.72k
      }
5896
5897
1.20k
    skip_backref:
5898
1.20k
      if (c == '8' || c == '9') {
5899
        /* normal char */
5900
344
        p = prev; PINC;
5901
344
        break;
5902
344
      }
5903
5904
856
      p = prev;
5905
      /* fall through */
5906
1.82k
    case '0':
5907
1.82k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5908
1.30k
        prev = p;
5909
1.30k
        r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code);
5910
1.30k
        if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;
5911
1.30k
        if (p == prev) {  /* can't read nothing. */
5912
678
          code = 0; /* but, it's not error */
5913
678
        }
5914
1.30k
        tok->type = TK_CRUDE_BYTE;
5915
1.30k
        tok->base_num = 8;
5916
1.30k
        tok->u.byte   = (UChar )code;
5917
1.30k
      }
5918
514
      else if (c != '0') {
5919
286
        PINC;
5920
286
      }
5921
1.82k
      break;
5922
5923
3.94k
    case 'k':
5924
3.94k
      if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5925
3.69k
        PFETCH(c);
5926
3.69k
        if (c == '<' || c == '\'') {
5927
3.14k
          UChar* name_end;
5928
3.14k
          int* backs;
5929
3.14k
          int back_num;
5930
3.14k
          enum REF_NUM num_type;
5931
5932
3.14k
          allow_num = 1;
5933
5934
3.15k
        backref_start:
5935
3.15k
          prev = p;
5936
5937
3.15k
#ifdef USE_BACKREF_WITH_LEVEL
5938
3.15k
          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5939
3.15k
          r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5940
3.15k
                                 env, &back_num, &tok->u.backref.level, &num_type);
5941
3.15k
          if (r == 1) tok->u.backref.exist_level = 1;
5942
2.58k
          else        tok->u.backref.exist_level = 0;
5943
#else
5944
          r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
5945
#endif
5946
3.15k
          if (r < 0) return r;
5947
5948
2.73k
          if (num_type != IS_NOT_NUM) {
5949
1.09k
            if (allow_num == 0) return ONIGERR_INVALID_BACKREF;
5950
5951
1.09k
            if (num_type == IS_REL_NUM) {
5952
380
              back_num = backref_rel_to_abs(back_num, env);
5953
380
            }
5954
1.09k
            if (back_num <= 0)
5955
68
              return ONIGERR_INVALID_BACKREF;
5956
5957
1.02k
            if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5958
0
              if (back_num > env->num_mem ||
5959
0
                  IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
5960
0
                return ONIGERR_INVALID_BACKREF;
5961
0
            }
5962
1.02k
            tok->type = TK_BACKREF;
5963
1.02k
            tok->u.backref.by_name = 0;
5964
1.02k
            tok->u.backref.num  = 1;
5965
1.02k
            tok->u.backref.ref1 = back_num;
5966
1.02k
          }
5967
1.64k
          else {
5968
1.64k
            int num = name_to_group_numbers(env, prev, name_end, &backs);
5969
1.64k
            if (num <= 0) {
5970
6
              return ONIGERR_UNDEFINED_NAME_REFERENCE;
5971
6
            }
5972
1.63k
            if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5973
0
              int i;
5974
0
              for (i = 0; i < num; i++) {
5975
0
                if (backs[i] > env->num_mem ||
5976
0
                    IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
5977
0
                  return ONIGERR_INVALID_BACKREF;
5978
0
              }
5979
0
            }
5980
5981
1.63k
            tok->type = TK_BACKREF;
5982
1.63k
            tok->u.backref.by_name = 1;
5983
1.63k
            if (num == 1) {
5984
322
              tok->u.backref.num  = 1;
5985
322
              tok->u.backref.ref1 = backs[0];
5986
322
            }
5987
1.31k
            else {
5988
1.31k
              tok->u.backref.num  = num;
5989
1.31k
              tok->u.backref.refs = backs;
5990
1.31k
            }
5991
1.63k
          }
5992
2.73k
        }
5993
546
        else
5994
546
          PUNFETCH;
5995
3.69k
      }
5996
3.45k
      break;
5997
5998
3.45k
#ifdef USE_CALL
5999
4.02k
    case 'g':
6000
4.02k
      if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
6001
3.78k
        PFETCH(c);
6002
3.78k
        if (c == '<' || c == '\'') {
6003
3.31k
          int gnum;
6004
3.31k
          UChar* name_end;
6005
3.31k
          enum REF_NUM num_type;
6006
6007
3.31k
          allow_num = 1;
6008
6009
3.34k
        call_start:
6010
3.34k
          prev = p;
6011
3.34k
          r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
6012
3.34k
                         &gnum, &num_type, TRUE);
6013
3.34k
          if (r < 0) return r;
6014
6015
3.24k
          if (num_type != IS_NOT_NUM) {
6016
2.97k
            if (allow_num == 0) return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6017
6018
2.96k
            if (num_type == IS_REL_NUM) {
6019
1.70k
              gnum = backref_rel_to_abs(gnum, env);
6020
1.70k
              if (gnum < 0) {
6021
64
                onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
6022
64
                                               prev, name_end);
6023
64
                return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6024
64
              }
6025
1.70k
            }
6026
2.90k
            tok->u.call.by_number = 1;
6027
2.90k
            tok->u.call.gnum      = gnum;
6028
2.90k
          }
6029
278
          else {
6030
278
            tok->u.call.by_number = 0;
6031
278
            tok->u.call.gnum      = 0;
6032
278
          }
6033
6034
3.18k
          tok->type = TK_CALL;
6035
3.18k
          tok->u.call.name     = prev;
6036
3.18k
          tok->u.call.name_end = name_end;
6037
3.18k
        }
6038
476
        else
6039
476
          PUNFETCH;
6040
3.78k
      }
6041
3.89k
      break;
6042
3.89k
#endif
6043
6044
3.89k
    case 'Q':
6045
502
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
6046
420
        tok->type = TK_QUOTE_OPEN;
6047
420
      }
6048
502
      break;
6049
6050
2.05k
    case 'p':
6051
2.74k
    case 'P':
6052
2.74k
      if (!PEND && PPEEK_IS('{') &&
6053
2.74k
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
6054
1.87k
        PINC;
6055
1.87k
        tok->type = TK_CHAR_PROPERTY;
6056
1.87k
        tok->u.prop.not = c == 'P';
6057
6058
1.87k
        if (!PEND &&
6059
1.87k
            IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
6060
1.41k
          PFETCH(c);
6061
1.41k
          if (c == '^') {
6062
66
            tok->u.prop.not = tok->u.prop.not == 0;
6063
66
          }
6064
1.34k
          else
6065
1.34k
            PUNFETCH;
6066
1.41k
        }
6067
1.87k
      }
6068
2.74k
      break;
6069
6070
8.70k
    default:
6071
8.70k
      {
6072
8.70k
        OnigCodePoint c2;
6073
6074
8.70k
        PUNFETCH;
6075
8.70k
        r = fetch_escaped_value(&p, end, env, &c2);
6076
8.70k
        if (r < 0) return r;
6077
8.53k
        if (tok->u.code != c2) {
6078
2.31k
          tok->type = TK_CODE_POINT;
6079
2.31k
          tok->u.code = c2;
6080
2.31k
        }
6081
6.21k
        else { /* string */
6082
6.21k
          p = tok->backp + enclen(enc, tok->backp);
6083
6.21k
        }
6084
8.53k
      }
6085
0
      break;
6086
80.1k
    }
6087
80.1k
  }
6088
548k
  else {
6089
548k
    tok->u.code = c;
6090
548k
    tok->escaped = 0;
6091
6092
548k
#ifdef USE_VARIABLE_META_CHARS
6093
548k
    if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
6094
548k
        IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
6095
0
      if (c == MC_ANYCHAR(syn))
6096
0
        goto any_char;
6097
0
      else if (c == MC_ANYTIME(syn))
6098
0
        goto any_time;
6099
0
      else if (c == MC_ZERO_OR_ONE_TIME(syn))
6100
0
        goto zero_or_one_time;
6101
0
      else if (c == MC_ONE_OR_MORE_TIME(syn))
6102
0
        goto one_or_more_time;
6103
0
      else if (c == MC_ANYCHAR_ANYTIME(syn)) {
6104
0
        tok->type = TK_ANYCHAR_ANYTIME;
6105
0
        goto out;
6106
0
      }
6107
0
    }
6108
548k
#endif
6109
6110
548k
    switch (c) {
6111
17.7k
    case '.':
6112
17.7k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
6113
17.7k
#ifdef USE_VARIABLE_META_CHARS
6114
17.7k
    any_char:
6115
17.7k
#endif
6116
17.7k
      tok->type = TK_ANYCHAR;
6117
17.7k
      break;
6118
6119
18.4k
    case '*':
6120
18.4k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
6121
18.4k
#ifdef USE_VARIABLE_META_CHARS
6122
18.4k
    any_time:
6123
18.4k
#endif
6124
18.4k
      tok->type = TK_REPEAT;
6125
18.4k
      tok->u.repeat.lower = 0;
6126
18.4k
      tok->u.repeat.upper = INFINITE_REPEAT;
6127
18.4k
      goto greedy_check;
6128
0
      break;
6129
6130
30.2k
    case '+':
6131
30.2k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
6132
30.0k
#ifdef USE_VARIABLE_META_CHARS
6133
30.0k
    one_or_more_time:
6134
30.0k
#endif
6135
30.0k
      tok->type = TK_REPEAT;
6136
30.0k
      tok->u.repeat.lower = 1;
6137
30.0k
      tok->u.repeat.upper = INFINITE_REPEAT;
6138
30.0k
      goto greedy_check;
6139
0
      break;
6140
6141
14.0k
    case '?':
6142
14.0k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
6143
13.5k
#ifdef USE_VARIABLE_META_CHARS
6144
13.5k
    zero_or_one_time:
6145
13.5k
#endif
6146
13.5k
      tok->type = TK_REPEAT;
6147
13.5k
      tok->u.repeat.lower = 0;
6148
13.5k
      tok->u.repeat.upper = 1;
6149
13.5k
      goto greedy_check;
6150
0
      break;
6151
6152
25.5k
    case '{':
6153
25.5k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
6154
25.1k
      r = fetch_interval(&p, end, tok, env);
6155
25.1k
      if (r < 0) return r;  /* error */
6156
24.9k
      if (r == 0) goto greedy_check2;
6157
20.5k
      else if (r == 2) { /* {n} */
6158
12.8k
        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
6159
3.34k
          goto possessive_check;
6160
6161
9.47k
        goto greedy_check2;
6162
12.8k
      }
6163
      /* r == 1 : normal char */
6164
7.75k
      break;
6165
6166
23.1k
    case '|':
6167
23.1k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
6168
22.6k
      tok->type = TK_ALT;
6169
22.6k
      break;
6170
6171
66.2k
    case '(':
6172
66.2k
      if (!PEND && PPEEK_IS('?') &&
6173
66.2k
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
6174
31.8k
        prev = p;
6175
31.8k
        PINC;
6176
31.8k
        if (! PEND) {
6177
31.8k
          c = PPEEK;
6178
31.8k
          if (c == '#') {
6179
280
            PFETCH(c);
6180
1.78k
            while (1) {
6181
1.78k
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6182
1.60k
              PFETCH(c);
6183
1.60k
              if (c == MC_ESC(syn)) {
6184
234
                if (! PEND) PFETCH(c);
6185
234
              }
6186
1.37k
              else {
6187
1.37k
                if (c == ')') break;
6188
1.37k
              }
6189
1.60k
            }
6190
100
            goto start;
6191
280
          }
6192
31.5k
          else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
6193
16.8k
            int gnum;
6194
16.8k
            UChar* name;
6195
16.8k
            UChar* name_end;
6196
16.8k
            enum REF_NUM num_type;
6197
6198
16.8k
            switch (c) {
6199
134
            case '&':
6200
134
              {
6201
134
                PINC;
6202
134
                name = p;
6203
134
                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6204
134
                               &gnum, &num_type, FALSE);
6205
134
                if (r < 0) return r;
6206
6207
70
                tok->type = TK_CALL;
6208
70
                tok->u.call.by_number = 0;
6209
70
                tok->u.call.gnum      = 0;
6210
70
                tok->u.call.name      = name;
6211
70
                tok->u.call.name_end  = name_end;
6212
70
              }
6213
0
              break;
6214
6215
916
            case 'R':
6216
916
              tok->type = TK_CALL;
6217
916
              tok->u.call.by_number = 1;
6218
916
              tok->u.call.gnum      = 0;
6219
916
              tok->u.call.name      = p;
6220
916
              PINC;
6221
916
              if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
6222
828
              tok->u.call.name_end  = p;
6223
828
              break;
6224
6225
430
            case '-':
6226
818
            case '+':
6227
818
              if (! PEND) {
6228
818
                PINC;
6229
818
                if (! PEND) {
6230
800
                  c = PPEEK;
6231
800
                  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
6232
660
                    PUNFETCH;
6233
660
                    goto lparen_qmark_num;
6234
660
                  }
6235
800
                }
6236
818
              }
6237
158
              p = prev;
6238
158
              goto lparen_qmark_end2;
6239
0
              break;
6240
6241
14.9k
            default:
6242
14.9k
              if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
6243
6244
5.00k
            lparen_qmark_num:
6245
5.00k
              {
6246
5.00k
                name = p;
6247
5.00k
                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6248
5.00k
                               &gnum, &num_type, TRUE);
6249
5.00k
                if (r < 0) return r;
6250
6251
4.77k
                if (num_type == IS_NOT_NUM) {
6252
4
                  return ONIGERR_INVALID_GROUP_NAME;
6253
4
                }
6254
4.77k
                else {
6255
4.77k
                  if (num_type == IS_REL_NUM) {
6256
636
                    gnum = backref_rel_to_abs(gnum, env);
6257
636
                    if (gnum < 0) {
6258
84
                      onig_scan_env_set_error_string(env,
6259
84
                             ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
6260
84
                      return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6261
84
                    }
6262
636
                  }
6263
4.68k
                  tok->u.call.by_number = 1;
6264
4.68k
                  tok->u.call.gnum      = gnum;
6265
4.68k
                }
6266
6267
4.68k
                tok->type = TK_CALL;
6268
4.68k
                tok->u.call.name     = name;
6269
4.68k
                tok->u.call.name_end = name_end;
6270
4.68k
              }
6271
0
              break;
6272
16.8k
            }
6273
5.58k
            break;
6274
16.8k
          }
6275
14.7k
          else if (c == 'P' &&
6276
14.7k
                   IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
6277
168
            PINC; /* skip 'P' */
6278
168
            if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6279
152
            PFETCH(c);
6280
152
            allow_num = 0;
6281
152
            if (c == '=') {
6282
6
              c = '(';
6283
6
              goto backref_start;
6284
6
            }
6285
146
            else if (c == '>') {
6286
36
#ifdef USE_CALL
6287
36
              c = '(';
6288
36
              goto call_start;
6289
#else
6290
              return ONIGERR_UNDEFINED_OPERATOR;
6291
#endif
6292
36
            }
6293
110
            else {
6294
110
              p = prev;
6295
110
              goto lparen_qmark_end2;
6296
110
            }
6297
152
          }
6298
31.8k
        }
6299
25.2k
      lparen_qmark_end:
6300
25.2k
        PUNFETCH;
6301
25.2k
      }
6302
6303
59.8k
    lparen_qmark_end2:
6304
59.8k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6305
58.8k
      tok->type = TK_SUBEXP_OPEN;
6306
58.8k
      break;
6307
6308
53.0k
    case ')':
6309
53.0k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6310
52.5k
      tok->type = TK_SUBEXP_CLOSE;
6311
52.5k
      break;
6312
6313
6.00k
    case '^':
6314
6.00k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6315
6.00k
      if (IS_SYNTAX_BV(syn, ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP)) {
6316
1.16k
        if (! is_head_of_bre_subexp(PPREV, end, enc, env)) break;
6317
1.16k
      }
6318
5.06k
      tok->type = TK_ANCHOR;
6319
5.06k
      tok->u.subtype = (OPTON_SINGLELINE(env->options)
6320
5.06k
                        ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
6321
5.06k
      break;
6322
6323
6.24k
    case '$':
6324
6.24k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6325
6.24k
      if (IS_SYNTAX_BV(syn, ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP)) {
6326
896
        if (! is_end_of_bre_subexp(p, end, enc, env)) break;
6327
896
      }
6328
5.55k
      tok->type = TK_ANCHOR;
6329
5.55k
      tok->u.subtype = (OPTON_SINGLELINE(env->options)
6330
5.55k
                        ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
6331
5.55k
      break;
6332
6333
13.6k
    case '[':
6334
13.6k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
6335
13.6k
      tok->type = TK_OPEN_CC;
6336
13.6k
      break;
6337
6338
3.45k
    case ']':
6339
3.45k
      if (*src > env->pattern)   /* /].../ is allowed. */
6340
3.20k
        CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
6341
3.45k
      break;
6342
6343
1.64k
    case '#':
6344
1.64k
      if (OPTON_EXTEND(env->options)) {
6345
3.64k
        while (!PEND) {
6346
3.43k
          PFETCH(c);
6347
3.43k
          if (ONIGENC_IS_CODE_NEWLINE(enc, c))
6348
64
            break;
6349
3.43k
        }
6350
276
        goto start;
6351
0
        break;
6352
276
      }
6353
1.36k
      break;
6354
6355
6.52k
    case ' ': case '\t': case '\n': case '\r': case '\f':
6356
6.52k
      if (OPTON_EXTEND(env->options))
6357
1.33k
        goto start;
6358
5.19k
      break;
6359
6360
262k
    default:
6361
      /* string */
6362
262k
      break;
6363
548k
    }
6364
548k
  }
6365
6366
625k
 out:
6367
625k
  *src = p;
6368
625k
  return tok->type;
6369
628k
}
6370
6371
static int
6372
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
6373
                         OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
6374
                         const OnigCodePoint mbr[])
6375
3.76k
{
6376
3.76k
  int i, r;
6377
3.76k
  OnigCodePoint j;
6378
6379
3.76k
  int n = ONIGENC_CODE_RANGE_NUM(mbr);
6380
6381
3.76k
  if (not == 0) {
6382
6.82k
    for (i = 0; i < n; i++) {
6383
6.58k
      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6384
65.6k
           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6385
61.0k
        if (j >= sb_out) {
6386
1.99k
          if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6387
264
            r = add_code_range_to_buf(&(cc->mbuf), j,
6388
264
                                      ONIGENC_CODE_RANGE_TO(mbr, i));
6389
264
            if (r != 0) return r;
6390
264
            i++;
6391
264
          }
6392
6393
1.99k
          goto sb_end;
6394
1.99k
        }
6395
61.0k
        BITSET_SET_BIT(cc->bs, j);
6396
59.0k
      }
6397
6.58k
    }
6398
6399
2.23k
  sb_end:
6400
600k
    for ( ; i < n; i++) {
6401
598k
      r = add_code_range_to_buf(&(cc->mbuf),
6402
598k
                                ONIGENC_CODE_RANGE_FROM(mbr, i),
6403
598k
                                ONIGENC_CODE_RANGE_TO(mbr, i));
6404
598k
      if (r != 0) return r;
6405
598k
    }
6406
2.23k
  }
6407
1.52k
  else {
6408
1.52k
    OnigCodePoint prev = 0;
6409
6410
5.12k
    for (i = 0; i < n; i++) {
6411
152k
      for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
6412
148k
        if (j >= sb_out) {
6413
1.19k
          goto sb_end2;
6414
1.19k
        }
6415
148k
        BITSET_SET_BIT(cc->bs, j);
6416
147k
      }
6417
3.60k
      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6418
3.60k
    }
6419
7.08k
    for (j = prev; j < sb_out; j++) {
6420
6.75k
      BITSET_SET_BIT(cc->bs, j);
6421
6.75k
    }
6422
6423
1.52k
  sb_end2:
6424
1.52k
    prev = sb_out;
6425
6426
321k
    for (i = 0; i < n; i++) {
6427
320k
      if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6428
318k
        r = add_code_range_to_buf(&(cc->mbuf), prev,
6429
318k
                                  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
6430
318k
        if (r != 0) return r;
6431
318k
      }
6432
320k
      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6433
320k
      if (prev == 0) goto end;
6434
320k
    }
6435
6436
1.52k
    r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6437
1.52k
    if (r != 0) return r;
6438
1.52k
  }
6439
6440
3.76k
 end:
6441
3.76k
  return 0;
6442
3.76k
}
6443
6444
static int
6445
add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
6446
                               OnigEncoding enc ARG_UNUSED,
6447
                               OnigCodePoint sb_out,
6448
                               const OnigCodePoint mbr[], OnigCodePoint limit)
6449
2.73k
{
6450
2.73k
  int i, r;
6451
2.73k
  OnigCodePoint j;
6452
2.73k
  OnigCodePoint from;
6453
2.73k
  OnigCodePoint to;
6454
6455
2.73k
  int n = ONIGENC_CODE_RANGE_NUM(mbr);
6456
6457
2.73k
  if (not == 0) {
6458
4.59k
    for (i = 0; i < n; i++) {
6459
4.12k
      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6460
29.9k
           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6461
26.8k
        if (j > limit) goto end;
6462
26.0k
        if (j >= sb_out) {
6463
240
          if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6464
0
            to = ONIGENC_CODE_RANGE_TO(mbr, i);
6465
0
            if (to > limit) to = limit;
6466
0
            r = add_code_range_to_buf(&(cc->mbuf), j, to);
6467
0
            if (r != 0) return r;
6468
0
            i++;
6469
0
          }
6470
6471
240
          goto sb_end;
6472
240
        }
6473
26.0k
        BITSET_SET_BIT(cc->bs, j);
6474
25.8k
      }
6475
4.12k
    }
6476
6477
718
  sb_end:
6478
1.35k
    for ( ; i < n; i++) {
6479
716
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6480
716
      to   = ONIGENC_CODE_RANGE_TO(mbr, i);
6481
716
      if (from > limit) break;
6482
640
      if (to   > limit) to = limit;
6483
640
      r = add_code_range_to_buf(&(cc->mbuf), from, to);
6484
640
      if (r != 0) return r;
6485
640
    }
6486
718
  }
6487
1.20k
  else {
6488
1.20k
    OnigCodePoint prev = 0;
6489
6490
4.61k
    for (i = 0; i < n; i++) {
6491
4.32k
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6492
4.32k
      if (from > limit) {
6493
31.9k
        for (j = prev; j < sb_out; j++) {
6494
31.1k
          BITSET_SET_BIT(cc->bs, j);
6495
31.1k
        }
6496
824
        goto sb_end2;
6497
824
      }
6498
68.4k
      for (j = prev; j < from; j++) {
6499
65.0k
        if (j >= sb_out) goto sb_end2;
6500
65.0k
        BITSET_SET_BIT(cc->bs, j);
6501
64.9k
      }
6502
3.41k
      prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6503
3.41k
      if (prev > limit) prev = limit;
6504
3.41k
      prev++;
6505
3.41k
      if (prev == 0) goto end;
6506
3.41k
    }
6507
6.09k
    for (j = prev; j < sb_out; j++) {
6508
5.80k
      BITSET_SET_BIT(cc->bs, j);
6509
5.80k
    }
6510
6511
1.20k
  sb_end2:
6512
1.20k
    prev = sb_out;
6513
6514
4.87k
    for (i = 0; i < n; i++) {
6515
4.49k
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6516
4.49k
      if (from > limit) goto last;
6517
6518
3.66k
      if (prev < from) {
6519
2.55k
        r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
6520
2.55k
        if (r != 0) return r;
6521
2.55k
      }
6522
3.66k
      prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6523
3.66k
      if (prev > limit) prev = limit;
6524
3.66k
      prev++;
6525
3.66k
      if (prev == 0) goto end;
6526
3.66k
    }
6527
6528
1.20k
  last:
6529
1.20k
    r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6530
1.20k
    if (r != 0) return r;
6531
1.20k
  }
6532
6533
2.73k
 end:
6534
2.73k
  return 0;
6535
2.73k
}
6536
6537
static int
6538
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ParseEnv* env)
6539
21.1k
{
6540
21.1k
  int c, r;
6541
21.1k
  int ascii_mode;
6542
21.1k
  int is_single;
6543
21.1k
  const OnigCodePoint *ranges;
6544
21.1k
  OnigCodePoint limit;
6545
21.1k
  OnigCodePoint sb_out;
6546
21.1k
  OnigEncoding enc = env->enc;
6547
6548
21.1k
  ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
6549
6550
21.1k
  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
6551
21.1k
  if (r == 0) {
6552
6.49k
    if (ascii_mode == 0)
6553
3.76k
      r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
6554
2.73k
    else
6555
2.73k
      r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
6556
2.73k
                                         ranges, ASCII_LIMIT);
6557
6.49k
    return r;
6558
6.49k
  }
6559
14.6k
  else if (r != ONIG_NO_SUPPORT_CONFIG) {
6560
0
    return r;
6561
0
  }
6562
6563
14.6k
  r = 0;
6564
14.6k
  is_single = ONIGENC_IS_SINGLEBYTE(enc);
6565
14.6k
  limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
6566
6567
14.6k
  switch (ctype) {
6568
62
  case ONIGENC_CTYPE_ALPHA:
6569
92
  case ONIGENC_CTYPE_BLANK:
6570
158
  case ONIGENC_CTYPE_CNTRL:
6571
2.25k
  case ONIGENC_CTYPE_DIGIT:
6572
2.28k
  case ONIGENC_CTYPE_LOWER:
6573
2.34k
  case ONIGENC_CTYPE_PUNCT:
6574
7.43k
  case ONIGENC_CTYPE_SPACE:
6575
7.49k
  case ONIGENC_CTYPE_UPPER:
6576
8.13k
  case ONIGENC_CTYPE_XDIGIT:
6577
8.29k
  case ONIGENC_CTYPE_ASCII:
6578
8.33k
  case ONIGENC_CTYPE_ALNUM:
6579
8.33k
    if (not != 0) {
6580
565k
      for (c = 0; c < (int )limit; c++) {
6581
561k
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6582
495k
          if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6583
495k
            BITSET_SET_BIT(cc->bs, c);
6584
495k
        }
6585
561k
      }
6586
268k
      for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6587
265k
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6588
265k
          BITSET_SET_BIT(cc->bs, c);
6589
265k
      }
6590
6591
3.23k
      if (is_single == 0)
6592
1.33k
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6593
3.23k
    }
6594
5.10k
    else {
6595
972k
      for (c = 0; c < (int )limit; c++) {
6596
967k
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6597
843k
          if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6598
843k
            BITSET_SET_BIT(cc->bs, c);
6599
843k
        }
6600
967k
      }
6601
5.10k
    }
6602
8.33k
    break;
6603
6604
8.33k
  case ONIGENC_CTYPE_GRAPH:
6605
296
  case ONIGENC_CTYPE_PRINT:
6606
6.28k
  case ONIGENC_CTYPE_WORD:
6607
6.28k
    if (not != 0) {
6608
481k
      for (c = 0; c < (int )limit; c++) {
6609
        /* check invalid code point */
6610
478k
        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6611
478k
            && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6612
478k
          BITSET_SET_BIT(cc->bs, c);
6613
478k
      }
6614
268k
      for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6615
265k
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6616
265k
          BITSET_SET_BIT(cc->bs, c);
6617
265k
      }
6618
2.90k
      if (ascii_mode != 0 && is_single == 0)
6619
502
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6620
2.90k
    }
6621
3.37k
    else {
6622
615k
      for (c = 0; c < (int )limit; c++) {
6623
611k
        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6624
611k
            && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6625
611k
          BITSET_SET_BIT(cc->bs, c);
6626
611k
      }
6627
3.37k
      if (ascii_mode == 0 && is_single == 0)
6628
1.02k
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6629
3.37k
    }
6630
6.28k
    break;
6631
6632
6.28k
  default:
6633
0
    return ONIGERR_PARSER_BUG;
6634
0
    break;
6635
14.6k
  }
6636
6637
14.6k
  return r;
6638
14.6k
}
6639
6640
static int
6641
prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ParseEnv* env)
6642
448
{
6643
448
  static PosixBracketEntryType PBS[] = {
6644
448
    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
6645
448
    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
6646
448
    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
6647
448
    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
6648
448
    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
6649
448
    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
6650
448
    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
6651
448
    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
6652
448
    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
6653
448
    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
6654
448
    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
6655
448
    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
6656
448
    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
6657
448
    { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
6658
448
    { (UChar* )NULL,     -1, 0 }
6659
448
  };
6660
6661
448
  PosixBracketEntryType *pb;
6662
448
  int not, r;
6663
448
  OnigEncoding enc = env->enc;
6664
448
  UChar *p = *src;
6665
6666
448
  if (PPEEK_IS('^')) {
6667
140
    PINC_S;
6668
140
    not = 1;
6669
140
  }
6670
308
  else
6671
308
    not = 0;
6672
6673
2.89k
  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
6674
2.85k
    if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
6675
408
      p = (UChar* )onigenc_step(enc, p, end, pb->len);
6676
408
      if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
6677
4
        break;
6678
6679
404
      r = add_ctype_to_cc(cc, pb->ctype, not, env);
6680
404
      if (r != 0) return r;
6681
6682
404
      PINC_S; PINC_S;
6683
404
      *src = p;
6684
404
      return 0;
6685
404
    }
6686
2.85k
  }
6687
6688
44
  return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6689
448
}
6690
6691
static int
6692
fetch_char_property_to_ctype(UChar** src, UChar* end, ParseEnv* env)
6693
2.43k
{
6694
2.43k
  int r;
6695
2.43k
  OnigCodePoint c;
6696
2.43k
  OnigEncoding enc;
6697
2.43k
  UChar *prev, *start, *p;
6698
6699
2.43k
  p = *src;
6700
2.43k
  enc = env->enc;
6701
2.43k
  r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6702
2.43k
  start = prev = p;
6703
6704
15.4k
  while (!PEND) {
6705
15.2k
    prev = p;
6706
15.2k
    PFETCH_S(c);
6707
15.2k
    if (c == '}') {
6708
2.16k
      r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6709
2.16k
      if (r >= 0) {
6710
1.66k
        *src = p;
6711
1.66k
      }
6712
506
      else {
6713
506
        onig_scan_env_set_error_string(env, r, *src, prev);
6714
506
      }
6715
6716
2.16k
      return r;
6717
2.16k
    }
6718
13.0k
    else if (c == '(' || c == ')' || c == '{' || c == '|') {
6719
24
      break;
6720
24
    }
6721
15.2k
  }
6722
6723
268
  return r;
6724
2.43k
}
6725
6726
static int
6727
prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end,
6728
                  ParseEnv* env)
6729
1.87k
{
6730
1.87k
  int r, ctype;
6731
1.87k
  CClassNode* cc;
6732
6733
1.87k
  ctype = fetch_char_property_to_ctype(src, end, env);
6734
1.87k
  if (ctype < 0) return ctype;
6735
6736
1.23k
  if (ctype == ONIGENC_CTYPE_WORD) {
6737
304
    *np = node_new_ctype(ctype, tok->u.prop.not, env->options);
6738
304
    CHECK_NULL_RETURN_MEMERR(*np);
6739
304
    return 0;
6740
304
  }
6741
6742
932
  *np = node_new_cclass();
6743
932
  CHECK_NULL_RETURN_MEMERR(*np);
6744
932
  cc = CCLASS_(*np);
6745
932
  r = add_ctype_to_cc(cc, ctype, FALSE, env);
6746
932
  if (r != 0) return r;
6747
932
  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6748
6749
932
  return 0;
6750
932
}
6751
6752
6753
static int
6754
cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state,
6755
              ParseEnv* env)
6756
13.7k
{
6757
13.7k
  int r;
6758
6759
13.7k
  if (*state == CS_RANGE)
6760
2
    return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6761
6762
13.7k
  if (*state == CS_VALUE) {
6763
11.3k
    if (*val == CV_SB)
6764
11.3k
      BITSET_SET_BIT(cc->bs, (int )(*pcode));
6765
9.22k
    else if (*val == CV_MB) {
6766
206
      r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);
6767
206
      if (r < 0) return r;
6768
206
    }
6769
11.3k
  }
6770
6771
13.7k
  *state = CS_VALUE;
6772
13.7k
  *val   = CV_CPROP;
6773
13.7k
  return 0;
6774
13.7k
}
6775
6776
static int
6777
cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6778
             int* from_raw, int to_raw, CVAL intype, CVAL* type,
6779
             CSTATE* state, ParseEnv* env)
6780
61.7k
{
6781
61.7k
  int r;
6782
6783
61.7k
  switch (*state) {
6784
42.5k
  case CS_VALUE:
6785
42.5k
    if (*type == CV_SB) {
6786
33.3k
      if (*from > 0xff)
6787
0
          return ONIGERR_INVALID_CODE_POINT_VALUE;
6788
6789
33.3k
      BITSET_SET_BIT(cc->bs, (int )(*from));
6790
33.3k
    }
6791
9.16k
    else if (*type == CV_MB) {
6792
4.86k
      r = add_code_range(&(cc->mbuf), env, *from, *from);
6793
4.86k
      if (r < 0) return r;
6794
4.86k
    }
6795
42.5k
    break;
6796
6797
42.5k
  case CS_RANGE:
6798
2.99k
    if (intype == *type) {
6799
1.42k
      if (intype == CV_SB) {
6800
1.11k
        if (*from > 0xff || to > 0xff)
6801
0
          return ONIGERR_INVALID_CODE_POINT_VALUE;
6802
6803
1.11k
        if (*from > to) {
6804
142
          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6805
138
            goto ccs_range_end;
6806
4
          else
6807
4
            return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6808
142
        }
6809
968
        bitset_set_range(cc->bs, (int )*from, (int )to);
6810
968
      }
6811
314
      else {
6812
314
        r = add_code_range(&(cc->mbuf), env, *from, to);
6813
314
        if (r < 0) return r;
6814
314
      }
6815
1.42k
    }
6816
1.56k
    else {
6817
1.56k
      if (*from > to) {
6818
100
        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6819
88
          goto ccs_range_end;
6820
12
        else
6821
12
          return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6822
100
      }
6823
1.46k
      bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6824
1.46k
      r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6825
1.46k
      if (r < 0) return r;
6826
1.46k
    }
6827
2.95k
  ccs_range_end:
6828
2.95k
    *state = CS_COMPLETE;
6829
2.95k
    break;
6830
6831
2.70k
  case CS_COMPLETE:
6832
16.2k
  case CS_START:
6833
16.2k
    *state = CS_VALUE;
6834
16.2k
    break;
6835
6836
0
  default:
6837
0
    break;
6838
61.7k
  }
6839
6840
61.6k
  *from_raw = to_raw;
6841
61.6k
  *from     = to;
6842
61.6k
  *type     = intype;
6843
61.6k
  return 0;
6844
61.7k
}
6845
6846
static int
6847
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6848
                 ParseEnv* env)
6849
2.37k
{
6850
2.37k
  int in_esc;
6851
2.37k
  OnigCodePoint code;
6852
2.37k
  OnigEncoding enc = env->enc;
6853
2.37k
  UChar* p = from;
6854
6855
2.37k
  in_esc = 0;
6856
4.83k
  while (! PEND) {
6857
4.69k
    if (ignore_escaped && in_esc) {
6858
452
      in_esc = 0;
6859
452
    }
6860
4.24k
    else {
6861
4.24k
      PFETCH_S(code);
6862
4.24k
      if (code == c) return 1;
6863
2.00k
      if (code == MC_ESC(env->syntax)) in_esc = 1;
6864
2.00k
    }
6865
4.69k
  }
6866
140
  return 0;
6867
2.37k
}
6868
6869
static int
6870
prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
6871
17.0k
{
6872
17.0k
  int r, neg, len, fetched, and_start;
6873
17.0k
  OnigCodePoint in_code, curr_code;
6874
17.0k
  UChar *p;
6875
17.0k
  Node* node;
6876
17.0k
  CClassNode *cc, *prev_cc;
6877
17.0k
  CClassNode work_cc;
6878
17.0k
  int curr_raw, in_raw;
6879
17.0k
  CSTATE state;
6880
17.0k
  CVAL in_type;
6881
17.0k
  CVAL curr_type;
6882
6883
17.0k
  *np = NULL_NODE;
6884
17.0k
  INC_PARSE_DEPTH(env->parse_depth);
6885
6886
17.0k
  state = CS_START;
6887
17.0k
  prev_cc = (CClassNode* )NULL;
6888
17.0k
  r = fetch_token_cc(tok, src, end, env, state);
6889
17.0k
  if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
6890
1.97k
    neg = 1;
6891
1.97k
    r = fetch_token_cc(tok, src, end, env, state);
6892
1.97k
  }
6893
15.0k
  else {
6894
15.0k
    neg = 0;
6895
15.0k
  }
6896
6897
17.0k
  if (r < 0) return r;
6898
16.6k
  if (r == TK_CC_CLOSE) {
6899
2.37k
    if (! code_exist_check((OnigCodePoint )']',
6900
2.37k
                           *src, env->pattern_end, 1, env))
6901
140
      return ONIGERR_EMPTY_CHAR_CLASS;
6902
6903
2.23k
    CC_ESC_WARN(env, (UChar* )"]");
6904
2.23k
    r = tok->type = TK_CHAR;  /* allow []...] */
6905
2.23k
  }
6906
6907
16.5k
  *np = node = node_new_cclass();
6908
16.5k
  CHECK_NULL_RETURN_MEMERR(node);
6909
16.5k
  cc = CCLASS_(node);
6910
6911
16.5k
  and_start = 0;
6912
16.5k
  curr_type = CV_UNDEF;
6913
6914
16.5k
  p = *src;
6915
86.1k
  while (r != TK_CC_CLOSE) {
6916
73.3k
    fetched = 0;
6917
73.3k
    switch (r) {
6918
39.6k
    case TK_CHAR:
6919
40.1k
    any_char_in:
6920
40.1k
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
6921
40.1k
      if (len < 0) {
6922
0
        r = len;
6923
0
        goto err;
6924
0
      }
6925
40.1k
      in_type = (len == 1) ? CV_SB : CV_MB;
6926
40.1k
      in_code = tok->u.code;
6927
40.1k
      in_raw = 0;
6928
40.1k
      goto val_entry2;
6929
0
      break;
6930
6931
3.10k
    case TK_CRUDE_BYTE:
6932
      /* tok->base_num != 0 : octal or hexadec. */
6933
3.10k
      if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
6934
2.84k
        int i, j;
6935
2.84k
        UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6936
2.84k
        UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6937
2.84k
        UChar* psave = p;
6938
2.84k
        int base_num = tok->base_num;
6939
6940
2.84k
        buf[0] = tok->u.byte;
6941
7.25k
        for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6942
6.06k
          r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6943
6.06k
          if (r < 0) goto err;
6944
6.05k
          if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
6945
1.64k
            fetched = 1;
6946
1.64k
            break;
6947
1.64k
          }
6948
4.40k
          buf[i] = tok->u.byte;
6949
4.40k
        }
6950
6951
2.83k
        if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6952
8
          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6953
8
          goto err;
6954
8
        }
6955
6956
        /* clear buf tail */
6957
15.3k
        for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
6958
6959
2.82k
        len = enclen(env->enc, buf);
6960
2.82k
        if (i < len) {
6961
2
          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6962
2
          goto err;
6963
2
        }
6964
2.82k
        else if (i > len) { /* fetch back */
6965
1.86k
          p = psave;
6966
2.05k
          for (i = 1; i < len; i++) {
6967
190
            r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6968
190
            if (r < 0) goto err;
6969
190
          }
6970
1.86k
          fetched = 0;
6971
1.86k
        }
6972
6973
2.82k
        if (i == 1) {
6974
2.65k
          in_code = (OnigCodePoint )buf[0];
6975
2.65k
          goto crude_single;
6976
2.65k
        }
6977
166
        else {
6978
166
          if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, buf, buf + len)) {
6979
24
            r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
6980
24
            goto err;
6981
24
          }
6982
6983
142
          in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6984
142
          in_type = CV_MB;
6985
142
        }
6986
2.82k
      }
6987
258
      else {
6988
258
        in_code = (OnigCodePoint )tok->u.byte;
6989
2.91k
      crude_single:
6990
2.91k
        in_type = CV_SB;
6991
2.91k
      }
6992
3.05k
      in_raw = 1;
6993
3.05k
      goto val_entry2;
6994
0
      break;
6995
6996
4.02k
    case TK_CODE_POINT:
6997
4.02k
      in_code = tok->u.code;
6998
4.02k
      in_raw  = 1;
6999
5.28k
    val_entry:
7000
5.28k
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);
7001
5.28k
      if (len < 0) {
7002
1.32k
        if (state != CS_RANGE ||
7003
1.32k
            ! IS_SYNTAX_BV(env->syntax,
7004
1.32k
                           ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
7005
1.32k
            in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
7006
62
          r = len;
7007
62
          goto err;
7008
62
        }
7009
1.32k
      }
7010
5.21k
      in_type = (len == 1 ? CV_SB : CV_MB);
7011
48.4k
    val_entry2:
7012
48.4k
      r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type,
7013
48.4k
                       &curr_type, &state, env);
7014
48.4k
      if (r != 0) goto err;
7015
48.4k
      break;
7016
7017
48.4k
    case TK_CC_POSIX_BRACKET_OPEN:
7018
448
      r = prs_posix_bracket(cc, &p, end, env);
7019
448
      if (r < 0) goto err;
7020
404
      if (r == 1) {  /* is not POSIX bracket */
7021
0
        CC_ESC_WARN(env, (UChar* )"[");
7022
0
        p = tok->backp;
7023
0
        in_code = tok->u.code;
7024
0
        in_raw = 0;
7025
0
        goto val_entry;
7026
0
      }
7027
404
      goto next_cprop;
7028
404
      break;
7029
7030
12.9k
    case TK_CHAR_TYPE:
7031
12.9k
      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
7032
12.9k
      if (r != 0) goto err;
7033
7034
13.7k
    next_cprop:
7035
13.7k
      r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);
7036
13.7k
      if (r != 0) goto err;
7037
13.7k
      break;
7038
7039
13.7k
    case TK_CHAR_PROPERTY:
7040
564
      {
7041
564
        int ctype = fetch_char_property_to_ctype(&p, end, env);
7042
564
        if (ctype < 0) {
7043
140
          r = ctype;
7044
140
          goto err;
7045
140
        }
7046
424
        r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
7047
424
        if (r != 0) goto err;
7048
424
        goto next_cprop;
7049
424
      }
7050
424
      break;
7051
7052
4.90k
    case TK_CC_RANGE:
7053
4.90k
      if (state == CS_VALUE) {
7054
3.23k
        r = fetch_token_cc(tok, &p, end, env, CS_RANGE);
7055
3.23k
        if (r < 0) goto err;
7056
7057
3.22k
        fetched = 1;
7058
3.22k
        if (r == TK_CC_CLOSE) { /* allow [x-] */
7059
822
        range_end_val:
7060
822
          in_code = (OnigCodePoint )'-';
7061
822
          in_raw = 0;
7062
822
          goto val_entry;
7063
88
        }
7064
3.13k
        else if (r == TK_CC_AND) {
7065
74
          CC_ESC_WARN(env, (UChar* )"-");
7066
74
          goto range_end_val;
7067
74
        }
7068
7069
3.06k
        if (curr_type == CV_CPROP) {
7070
8
          r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7071
8
          goto err;
7072
8
        }
7073
7074
3.05k
        state = CS_RANGE;
7075
3.05k
      }
7076
1.67k
      else if (state == CS_START) {
7077
        /* [-xa] is allowed */
7078
446
        in_code = tok->u.code;
7079
446
        in_raw = 0;
7080
7081
446
        r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7082
446
        if (r < 0) goto err;
7083
7084
436
        fetched = 1;
7085
        /* [--x] or [a&&-x] is warned. */
7086
436
        if (r == TK_CC_RANGE || and_start != 0)
7087
270
          CC_ESC_WARN(env, (UChar* )"-");
7088
7089
436
        goto val_entry;
7090
446
      }
7091
1.22k
      else if (state == CS_RANGE) {
7092
560
        CC_ESC_WARN(env, (UChar* )"-");
7093
560
        goto any_char_in;  /* [!--] is allowed */
7094
560
      }
7095
666
      else { /* CS_COMPLETE */
7096
666
        r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7097
666
        if (r < 0) goto err;
7098
7099
664
        fetched = 1;
7100
664
        if (r == TK_CC_CLOSE)
7101
70
          goto range_end_val; /* allow [a-b-] */
7102
594
        else if (r == TK_CC_AND) {
7103
66
          CC_ESC_WARN(env, (UChar* )"-");
7104
66
          goto range_end_val;
7105
66
        }
7106
7107
528
        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
7108
524
          CC_ESC_WARN(env, (UChar* )"-");
7109
524
          goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */
7110
524
        }
7111
4
        r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7112
4
        goto err;
7113
528
      }
7114
3.05k
      break;
7115
7116
3.39k
    case TK_CC_OPEN_CC: /* [ */
7117
3.39k
      {
7118
3.39k
        Node *anode;
7119
3.39k
        CClassNode* acc;
7120
7121
3.39k
        if (state == CS_VALUE) {
7122
970
          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7123
970
                           &state, env);
7124
970
          if (r != 0) goto err;
7125
970
        }
7126
3.39k
        state = CS_COMPLETE;
7127
7128
3.39k
        r = prs_cc(&anode, tok, &p, end, env);
7129
3.39k
        if (r != 0) {
7130
498
          onig_node_free(anode);
7131
498
          goto cc_open_err;
7132
498
        }
7133
2.89k
        acc = CCLASS_(anode);
7134
2.89k
        r = or_cclass(cc, acc, env->enc);
7135
2.89k
        onig_node_free(anode);
7136
7137
3.39k
      cc_open_err:
7138
3.39k
        if (r != 0) goto err;
7139
3.39k
      }
7140
2.89k
      break;
7141
7142
2.89k
    case TK_CC_AND: /* && */
7143
1.56k
      {
7144
1.56k
        if (state == CS_VALUE) {
7145
782
          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7146
782
                           &state, env);
7147
782
          if (r != 0) goto err;
7148
782
        }
7149
        /* initialize local variables */
7150
1.56k
        and_start = 1;
7151
1.56k
        state = CS_START;
7152
7153
1.56k
        if (IS_NOT_NULL(prev_cc)) {
7154
1.05k
          r = and_cclass(prev_cc, cc, env->enc);
7155
1.05k
          if (r != 0) goto err;
7156
1.05k
          bbuf_free(cc->mbuf);
7157
1.05k
        }
7158
510
        else {
7159
510
          prev_cc = cc;
7160
510
          cc = &work_cc;
7161
510
        }
7162
1.56k
        initialize_cclass(cc);
7163
1.56k
      }
7164
0
      break;
7165
7166
2.76k
    case TK_EOT:
7167
2.76k
      r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
7168
2.76k
      goto err;
7169
0
      break;
7170
0
    default:
7171
0
      r = ONIGERR_PARSER_BUG;
7172
0
      goto err;
7173
0
      break;
7174
73.3k
    }
7175
7176
69.7k
    if (fetched)
7177
5.25k
      r = tok->type;
7178
64.4k
    else {
7179
64.4k
      r = fetch_token_cc(tok, &p, end, env, state);
7180
64.4k
      if (r < 0) goto err;
7181
64.4k
    }
7182
69.7k
  }
7183
7184
12.7k
  if (state == CS_VALUE) {
7185
11.5k
    r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7186
11.5k
                     &state, env);
7187
11.5k
    if (r != 0) goto err;
7188
11.5k
  }
7189
7190
12.7k
  if (IS_NOT_NULL(prev_cc)) {
7191
366
    r = and_cclass(prev_cc, cc, env->enc);
7192
366
    if (r != 0) goto err;
7193
366
    bbuf_free(cc->mbuf);
7194
366
    cc = prev_cc;
7195
366
  }
7196
7197
12.7k
  if (neg != 0)
7198
12.7k
    NCCLASS_SET_NOT(cc);
7199
10.9k
  else
7200
12.7k
    NCCLASS_CLEAR_NOT(cc);
7201
12.7k
  if (IS_NCCLASS_NOT(cc) &&
7202
12.7k
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
7203
426
    int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
7204
426
    if (is_empty != 0)
7205
286
      BITSET_IS_EMPTY(cc->bs, is_empty);
7206
7207
426
    if (is_empty == 0) {
7208
364
      if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
7209
364
        if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
7210
364
          BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
7211
62
        else
7212
62
          add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
7213
364
      }
7214
364
    }
7215
426
  }
7216
12.7k
  *src = p;
7217
12.7k
  DEC_PARSE_DEPTH(env->parse_depth);
7218
12.7k
  return 0;
7219
7220
3.73k
 err:
7221
3.73k
  if (cc != CCLASS_(*np))
7222
144
    bbuf_free(cc->mbuf);
7223
3.73k
  return r;
7224
12.7k
}
7225
7226
static int prs_alts(Node** top, PToken* tok, int term,
7227
                    UChar** src, UChar* end, ParseEnv* env, int group_head);
7228
7229
#ifdef USE_CALLOUT
7230
7231
/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
7232
static int
7233
prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end,
7234
                        ParseEnv* env)
7235
1.40k
{
7236
1.40k
  int r;
7237
1.40k
  int i;
7238
1.40k
  int in;
7239
1.40k
  int num;
7240
1.40k
  OnigCodePoint c;
7241
1.40k
  UChar* code_start;
7242
1.40k
  UChar* code_end;
7243
1.40k
  UChar* contents;
7244
1.40k
  UChar* tag_start;
7245
1.40k
  UChar* tag_end;
7246
1.40k
  int brace_nest;
7247
1.40k
  CalloutListEntry* e;
7248
1.40k
  RegexExt* ext;
7249
1.40k
  OnigEncoding enc = env->enc;
7250
1.40k
  UChar* p = *src;
7251
7252
1.40k
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7253
7254
1.37k
  brace_nest = 0;
7255
2.01k
  while (PPEEK_IS('{')) {
7256
648
    brace_nest++;
7257
648
    PINC_S;
7258
648
    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7259
648
  }
7260
7261
1.36k
  in = ONIG_CALLOUT_IN_PROGRESS;
7262
1.36k
  code_start = p;
7263
3.87k
  while (1) {
7264
3.87k
    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7265
7266
3.65k
    code_end = p;
7267
3.65k
    PFETCH_S(c);
7268
3.65k
    if (c == '}') {
7269
1.44k
      i = brace_nest;
7270
1.88k
      while (i > 0) {
7271
742
        if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7272
734
        PFETCH_S(c);
7273
734
        if (c == '}') i--;
7274
298
        else break;
7275
734
      }
7276
1.43k
      if (i == 0) break;
7277
1.43k
    }
7278
3.65k
  }
7279
7280
1.13k
  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7281
7282
1.13k
  PFETCH_S(c);
7283
1.13k
  if (c == '[') {
7284
450
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7285
448
    tag_end = tag_start = p;
7286
2.15k
    while (! PEND) {
7287
2.00k
      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7288
2.00k
      tag_end = p;
7289
2.00k
      PFETCH_S(c);
7290
2.00k
      if (c == ']') break;
7291
2.00k
    }
7292
448
    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7293
126
      return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7294
7295
322
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7296
280
    PFETCH_S(c);
7297
280
  }
7298
680
  else {
7299
680
    tag_start = tag_end = 0;
7300
680
  }
7301
7302
960
  if (c == 'X') {
7303
112
    in |= ONIG_CALLOUT_IN_RETRACTION;
7304
112
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7305
108
    PFETCH_S(c);
7306
108
  }
7307
848
  else if (c == '<') {
7308
148
    in = ONIG_CALLOUT_IN_RETRACTION;
7309
148
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7310
146
    PFETCH_S(c);
7311
146
  }
7312
700
  else if (c == '>') { /* no needs (default) */
7313
210
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7314
198
    PFETCH_S(c);
7315
198
  }
7316
7317
942
  if (c != cterm)
7318
134
    return ONIGERR_INVALID_CALLOUT_PATTERN;
7319
7320
808
  r = reg_callout_list_entry(env, &num);
7321
808
  if (r != 0) return r;
7322
7323
808
  ext = onig_get_regex_ext(env->reg);
7324
808
  CHECK_NULL_RETURN_MEMERR(ext);
7325
808
  if (IS_NULL(ext->pattern)) {
7326
374
    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7327
374
    if (r != ONIG_NORMAL) return r;
7328
374
  }
7329
7330
808
  if (tag_start != tag_end) {
7331
272
    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7332
272
    if (r != ONIG_NORMAL) return r;
7333
272
  }
7334
7335
798
  contents = onigenc_strdup(enc, code_start, code_end);
7336
798
  CHECK_NULL_RETURN_MEMERR(contents);
7337
7338
798
  e = onig_reg_callout_list_at(env->reg, num);
7339
798
  if (IS_NULL(e)) {
7340
0
    xfree(contents);
7341
0
    return ONIGERR_MEMORY;
7342
0
  }
7343
7344
798
  r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
7345
798
  if (r != 0) {
7346
0
    xfree(contents);
7347
0
    return r;
7348
0
  }
7349
7350
798
  e->of      = ONIG_CALLOUT_OF_CONTENTS;
7351
798
  e->in      = in;
7352
798
  e->name_id = ONIG_NON_NAME_ID;
7353
798
  e->u.content.start = contents;
7354
798
  e->u.content.end   = contents + (code_end - code_start);
7355
7356
798
  *src = p;
7357
798
  return 0;
7358
798
}
7359
7360
static long
7361
prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
7362
3.49k
{
7363
3.49k
  long v;
7364
3.49k
  long d;
7365
3.49k
  int flag;
7366
3.49k
  UChar* p;
7367
3.49k
  OnigCodePoint c;
7368
7369
3.49k
  if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
7370
7371
3.49k
  flag = 1;
7372
3.49k
  v = 0;
7373
3.49k
  p = s;
7374
21.2k
  while (p < end) {
7375
18.2k
    c = ONIGENC_MBC_TO_CODE(enc, p, end);
7376
18.2k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
7377
18.2k
    if (c >= '0' && c <= '9') {
7378
16.2k
      d = (long )(c - '0');
7379
16.2k
      if (v > (max - d) / 10)
7380
110
        return ONIGERR_INVALID_CALLOUT_ARG;
7381
7382
16.1k
      v = v * 10 + d;
7383
16.1k
    }
7384
2.03k
    else if (sign_on != 0 && (c == '-' || c == '+')) {
7385
1.65k
      if (c == '-') flag = -1;
7386
1.65k
    }
7387
384
    else
7388
384
      return ONIGERR_INVALID_CALLOUT_ARG;
7389
7390
17.7k
    sign_on = 0;
7391
17.7k
  }
7392
7393
2.99k
  *rl = flag * v;
7394
2.99k
  return ONIG_NORMAL;
7395
3.49k
}
7396
7397
static void
7398
clear_callout_args(int n, unsigned int types[], OnigValue vals[])
7399
430
{
7400
430
  int i;
7401
7402
552
  for (i = 0; i < n; i++) {
7403
122
    switch (types[i]) {
7404
24
    case ONIG_TYPE_STRING:
7405
24
      if (IS_NOT_NULL(vals[i].s.start))
7406
24
        xfree(vals[i].s.start);
7407
24
      break;
7408
98
    default:
7409
98
      break;
7410
122
    }
7411
122
  }
7412
430
}
7413
7414
static int
7415
prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
7416
                 int max_arg_num, unsigned int types[], OnigValue vals[],
7417
                 ParseEnv* env)
7418
5.13k
{
7419
22.6k
#define MAX_CALLOUT_ARG_BYTE_LENGTH   128
7420
7421
5.13k
  int r;
7422
5.13k
  int n;
7423
5.13k
  int esc;
7424
5.13k
  int cn;
7425
5.13k
  UChar* s;
7426
5.13k
  UChar* e;
7427
5.13k
  UChar* eesc;
7428
5.13k
  OnigCodePoint c;
7429
5.13k
  UChar* bufend;
7430
5.13k
  UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
7431
5.13k
  OnigEncoding enc = env->enc;
7432
5.13k
  UChar* p = *src;
7433
7434
5.13k
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7435
7436
5.13k
  c = 0;
7437
5.13k
  n = 0;
7438
11.5k
  while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
7439
11.5k
    cn  = 0;
7440
11.5k
    esc = 0;
7441
11.5k
    eesc = 0;
7442
11.5k
    bufend = buf;
7443
11.5k
    s = e = p;
7444
60.4k
    while (1) {
7445
60.4k
      if (PEND) {
7446
146
        r = ONIGERR_INVALID_CALLOUT_PATTERN;
7447
146
        goto err_clear;
7448
146
      }
7449
7450
60.3k
      e = p;
7451
60.3k
      PFETCH_S(c);
7452
60.3k
      if (esc != 0) {
7453
2.10k
        esc = 0;
7454
2.10k
        if (c == '\\' || c == cterm || c == ',') {
7455
          /* */
7456
1.14k
        }
7457
960
        else {
7458
960
          e = eesc;
7459
960
          cn++;
7460
960
        }
7461
2.10k
        goto add_char;
7462
2.10k
      }
7463
58.2k
      else {
7464
58.2k
        if (c == '\\') {
7465
2.10k
          esc = 1;
7466
2.10k
          eesc = e;
7467
2.10k
        }
7468
56.1k
        else if (c == cterm || c == ',')
7469
11.4k
          break;
7470
44.6k
        else {
7471
44.6k
          size_t clen;
7472
7473
46.7k
        add_char:
7474
46.7k
          if (skip_mode == FALSE) {
7475
22.6k
            clen = p - e;
7476
22.6k
            if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) {
7477
4
              r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
7478
4
              goto err_clear;
7479
4
            }
7480
7481
22.6k
            xmemcpy(bufend, e, clen);
7482
22.6k
            bufend += clen;
7483
22.6k
          }
7484
46.7k
          cn++;
7485
46.7k
        }
7486
58.2k
      }
7487
60.3k
    }
7488
7489
11.4k
    if (cn != 0) {
7490
10.4k
      if (max_arg_num >= 0 && n >= max_arg_num) {
7491
10
        r = ONIGERR_INVALID_CALLOUT_ARG;
7492
10
        goto err_clear;
7493
10
      }
7494
7495
10.4k
      if (skip_mode == FALSE) {
7496
5.15k
        if ((types[n] & ONIG_TYPE_LONG) != 0) {
7497
3.49k
          int fixed = 0;
7498
3.49k
          if (cn > 0) {
7499
3.49k
            long rl;
7500
3.49k
            r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);
7501
3.49k
            if (r == ONIG_NORMAL) {
7502
2.99k
              vals[n].l = rl;
7503
2.99k
              fixed = 1;
7504
2.99k
              types[n] = ONIG_TYPE_LONG;
7505
2.99k
            }
7506
3.49k
          }
7507
7508
3.49k
          if (fixed == 0) {
7509
494
            types[n] = (types[n] & ~ONIG_TYPE_LONG);
7510
494
            if (types[n] == ONIG_TYPE_VOID) {
7511
0
              r = ONIGERR_INVALID_CALLOUT_ARG;
7512
0
              goto err_clear;
7513
0
            }
7514
494
          }
7515
3.49k
        }
7516
7517
5.15k
        switch (types[n]) {
7518
2.99k
        case ONIG_TYPE_LONG:
7519
2.99k
          break;
7520
7521
436
        case ONIG_TYPE_CHAR:
7522
436
          if (cn != 1) {
7523
6
            r = ONIGERR_INVALID_CALLOUT_ARG;
7524
6
            goto err_clear;
7525
6
          }
7526
430
          vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
7527
430
          break;
7528
7529
1.22k
        case ONIG_TYPE_STRING:
7530
1.22k
          {
7531
1.22k
            UChar* rs = onigenc_strdup(enc, buf, bufend);
7532
1.22k
            if (IS_NULL(rs)) {
7533
0
              r = ONIGERR_MEMORY; goto err_clear;
7534
0
            }
7535
1.22k
            vals[n].s.start = rs;
7536
1.22k
            vals[n].s.end   = rs + (e - s);
7537
1.22k
          }
7538
0
          break;
7539
7540
494
        case ONIG_TYPE_TAG:
7541
494
          if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) {
7542
276
            r = ONIGERR_INVALID_CALLOUT_TAG_NAME;
7543
276
            goto err_clear;
7544
276
          }
7545
7546
218
          vals[n].s.start = s;
7547
218
          vals[n].s.end   = e;
7548
218
          break;
7549
7550
0
        case ONIG_TYPE_VOID:
7551
0
        case ONIG_TYPE_POINTER:
7552
0
          r = ONIGERR_PARSER_BUG;
7553
0
          goto err_clear;
7554
0
          break;
7555
5.15k
        }
7556
5.15k
      }
7557
7558
10.1k
      n++;
7559
10.1k
    }
7560
7561
11.1k
    if (c == cterm) break;
7562
11.1k
  }
7563
7564
4.69k
  if (c != cterm) {
7565
2
    r = ONIGERR_INVALID_CALLOUT_PATTERN;
7566
2
    goto err_clear;
7567
2
  }
7568
7569
4.69k
  *src = p;
7570
4.69k
  return n;
7571
7572
444
 err_clear:
7573
444
  if (skip_mode == FALSE)
7574
296
    clear_callout_args(n, types, vals);
7575
444
  return r;
7576
4.69k
}
7577
7578
/* (*name[TAG]) (*name[TAG]{a,b,..}) */
7579
static int
7580
prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end,
7581
                    ParseEnv* env)
7582
3.97k
{
7583
3.97k
  int r;
7584
3.97k
  int i;
7585
3.97k
  int in;
7586
3.97k
  int num;
7587
3.97k
  int name_id;
7588
3.97k
  int arg_num;
7589
3.97k
  int max_arg_num;
7590
3.97k
  int opt_arg_num;
7591
3.97k
  int is_not_single;
7592
3.97k
  OnigCodePoint c;
7593
3.97k
  UChar* name_start;
7594
3.97k
  UChar* name_end;
7595
3.97k
  UChar* tag_start;
7596
3.97k
  UChar* tag_end;
7597
3.97k
  Node*  node;
7598
3.97k
  CalloutListEntry* e;
7599
3.97k
  RegexExt* ext;
7600
3.97k
  unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
7601
3.97k
  OnigValue    vals[ONIG_CALLOUT_MAX_ARGS_NUM];
7602
3.97k
  OnigEncoding enc = env->enc;
7603
3.97k
  UChar* p = *src;
7604
7605
  /* PFETCH_READY; */
7606
3.97k
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7607
7608
3.95k
  node = 0;
7609
3.95k
  name_start = p;
7610
17.7k
  while (1) {
7611
17.7k
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7612
17.6k
    name_end = p;
7613
17.6k
    PFETCH_S(c);
7614
17.6k
    if (c == cterm || c == '[' || c == '{') break;
7615
17.6k
  }
7616
7617
3.89k
  if (! is_allowed_callout_name(enc, name_start, name_end))
7618
132
    return ONIGERR_INVALID_CALLOUT_NAME;
7619
7620
3.76k
  if (c == '[') {
7621
488
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7622
476
    tag_end = tag_start = p;
7623
2.11k
    while (! PEND) {
7624
1.91k
      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7625
1.91k
      tag_end = p;
7626
1.91k
      PFETCH_S(c);
7627
1.91k
      if (c == ']') break;
7628
1.91k
    }
7629
476
    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7630
156
      return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7631
7632
320
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7633
274
    PFETCH_S(c);
7634
274
  }
7635
3.27k
  else {
7636
3.27k
    tag_start = tag_end = 0;
7637
3.27k
  }
7638
7639
3.54k
  if (c == '{') {
7640
2.69k
    UChar* save;
7641
7642
2.69k
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7643
7644
    /* read for single check only */
7645
2.68k
    save = p;
7646
2.68k
    arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
7647
2.68k
    if (arg_num < 0) return arg_num;
7648
7649
2.53k
    is_not_single = PPEEK_IS(cterm) ?  0 : 1;
7650
2.53k
    p = save;
7651
2.53k
    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7652
2.53k
                                    &name_id);
7653
2.53k
    if (r != ONIG_NORMAL) return r;
7654
7655
2.45k
    max_arg_num = get_callout_arg_num_by_name_id(name_id);
7656
8.58k
    for (i = 0; i < max_arg_num; i++) {
7657
6.12k
      types[i] = get_callout_arg_type_by_name_id(name_id, i);
7658
6.12k
    }
7659
7660
2.45k
    arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
7661
2.45k
    if (arg_num < 0) return arg_num;
7662
7663
2.15k
    if (PEND) {
7664
0
      r = ONIGERR_END_PATTERN_IN_GROUP;
7665
0
      goto err_clear;
7666
0
    }
7667
2.15k
    PFETCH_S(c);
7668
2.15k
  }
7669
852
  else {
7670
852
    arg_num = 0;
7671
7672
852
    is_not_single = 0;
7673
852
    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7674
852
                                      &name_id);
7675
852
    if (r != ONIG_NORMAL) return r;
7676
7677
700
    max_arg_num = get_callout_arg_num_by_name_id(name_id);
7678
948
    for (i = 0; i < max_arg_num; i++) {
7679
248
      types[i] = get_callout_arg_type_by_name_id(name_id, i);
7680
248
    }
7681
700
  }
7682
7683
2.85k
  in = onig_get_callout_in_by_name_id(name_id);
7684
2.85k
  opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
7685
2.85k
  if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) {
7686
92
    r = ONIGERR_INVALID_CALLOUT_ARG;
7687
92
    goto err_clear;
7688
92
  }
7689
7690
2.76k
  if (c != cterm) {
7691
38
    r = ONIGERR_INVALID_CALLOUT_PATTERN;
7692
38
    goto err_clear;
7693
38
  }
7694
7695
2.72k
  r = reg_callout_list_entry(env, &num);
7696
2.72k
  if (r != 0) goto err_clear;
7697
7698
2.72k
  ext = onig_get_regex_ext(env->reg);
7699
2.72k
  if (IS_NULL(ext)) {
7700
0
    r = ONIGERR_MEMORY; goto err_clear;
7701
0
  }
7702
2.72k
  if (IS_NULL(ext->pattern)) {
7703
1.97k
    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7704
1.97k
    if (r != ONIG_NORMAL) goto err_clear;
7705
1.97k
  }
7706
7707
2.72k
  if (tag_start != tag_end) {
7708
204
    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7709
204
    if (r != ONIG_NORMAL) goto err_clear;
7710
204
  }
7711
7712
2.72k
  e = onig_reg_callout_list_at(env->reg, num);
7713
2.72k
  if (IS_NULL(e)) {
7714
0
    r = ONIGERR_MEMORY; goto err_clear;
7715
0
  }
7716
7717
2.72k
  r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
7718
2.72k
  if (r != ONIG_NORMAL) goto err_clear;
7719
7720
2.72k
  e->of         = ONIG_CALLOUT_OF_NAME;
7721
2.72k
  e->in         = in;
7722
2.72k
  e->name_id    = name_id;
7723
2.72k
  e->type       = onig_get_callout_type_by_name_id(name_id);
7724
2.72k
  e->start_func = onig_get_callout_start_func_by_name_id(name_id);
7725
2.72k
  e->end_func   = onig_get_callout_end_func_by_name_id(name_id);
7726
2.72k
  e->u.arg.num        = max_arg_num;
7727
2.72k
  e->u.arg.passed_num = arg_num;
7728
8.08k
  for (i = 0; i < max_arg_num; i++) {
7729
5.36k
    e->u.arg.types[i] = types[i];
7730
5.36k
    if (i < arg_num)
7731
4.74k
      e->u.arg.vals[i] = vals[i];
7732
616
    else
7733
616
      e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
7734
5.36k
  }
7735
7736
2.72k
  *np = node;
7737
2.72k
  *src = p;
7738
2.72k
  return 0;
7739
7740
134
 err_clear:
7741
134
  clear_callout_args(arg_num, types, vals);
7742
134
  return r;
7743
2.72k
}
7744
#endif
7745
7746
#ifdef USE_WHOLE_OPTIONS
7747
static int
7748
set_whole_options(OnigOptionType option, ParseEnv* env)
7749
46
{
7750
46
  if ((env->flags & PE_FLAG_HAS_WHOLE_OPTIONS) != 0)
7751
4
    return ONIGERR_INVALID_GROUP_OPTION;
7752
7753
42
  env->flags |= PE_FLAG_HAS_WHOLE_OPTIONS;
7754
7755
42
  if (OPTON_DONT_CAPTURE_GROUP(option)) {
7756
18
    env->reg->options |= ONIG_OPTION_DONT_CAPTURE_GROUP;
7757
18
    if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
7758
4
      return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
7759
18
  }
7760
7761
38
  if ((option & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) {
7762
14
    env->reg->case_fold_flag &=
7763
14
      ~(INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR |
7764
14
        ONIGENC_CASE_FOLD_TURKISH_AZERI);
7765
14
    env->reg->case_fold_flag |= ONIGENC_CASE_FOLD_ASCII_ONLY;
7766
14
    env->reg->options |= ONIG_OPTION_IGNORECASE_IS_ASCII;
7767
14
  }
7768
7769
38
  if (OPTON_FIND_LONGEST(option)) {
7770
22
    env->reg->options |= ONIG_OPTION_FIND_LONGEST;
7771
22
  }
7772
7773
38
  return 0;
7774
42
}
7775
#endif
7776
7777
static int
7778
prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7779
        ParseEnv* env)
7780
59.0k
{
7781
59.0k
  int r, num;
7782
59.0k
  Node *target;
7783
59.0k
  OnigOptionType option;
7784
59.0k
  OnigCodePoint c;
7785
59.0k
  int list_capture;
7786
59.0k
  OnigEncoding enc;
7787
59.0k
  UChar* p;
7788
59.0k
  PFETCH_READY;
7789
7790
59.0k
  p = *src;
7791
59.0k
  enc = env->enc;
7792
59.0k
  *np = NULL;
7793
59.0k
  if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7794
7795
59.0k
  option = env->options;
7796
59.0k
  c = PPEEK;
7797
59.0k
  if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7798
25.4k
    PINC;
7799
25.4k
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7800
7801
25.4k
    PFETCH(c);
7802
25.4k
    switch (c) {
7803
100
    case ':':   /* (?:...) grouping only */
7804
1.19k
    group:
7805
1.19k
      r = fetch_token(tok, &p, end, env);
7806
1.19k
      if (r < 0) return r;
7807
1.18k
      r = prs_alts(np, tok, term, &p, end, env, FALSE);
7808
1.18k
      if (r < 0) return r;
7809
1.07k
      *src = p;
7810
1.07k
      return 1; /* group */
7811
0
      break;
7812
7813
2.19k
    case '=':
7814
2.19k
      *np = node_new_anchor(ANCR_PREC_READ);
7815
2.19k
      break;
7816
636
    case '!':  /*         preceding read */
7817
636
      *np = node_new_anchor(ANCR_PREC_READ_NOT);
7818
636
      break;
7819
178
    case '>':            /* (?>...) stop backtrack */
7820
178
      *np = node_new_bag(BAG_STOP_BACKTRACK);
7821
178
      break;
7822
7823
6.30k
    case '\'':
7824
6.30k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7825
6.30k
        goto named_group1;
7826
6.30k
      }
7827
2
      else
7828
2
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7829
0
      break;
7830
7831
4.68k
    case '<':   /* look behind (?<=...), (?<!...) */
7832
4.68k
      if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7833
4.68k
      PFETCH(c);
7834
4.68k
      if (c == '=')
7835
2.68k
        *np = node_new_anchor(ANCR_LOOK_BEHIND);
7836
2.00k
      else if (c == '!')
7837
1.47k
        *np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
7838
530
      else {
7839
530
        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7840
528
          UChar *name;
7841
528
          UChar *name_end;
7842
528
          enum REF_NUM num_type;
7843
7844
528
          PUNFETCH;
7845
528
          c = '<';
7846
7847
6.85k
        named_group1:
7848
6.85k
          list_capture = 0;
7849
7850
6.85k
#ifdef USE_CAPTURE_HISTORY
7851
6.85k
        named_group2:
7852
6.85k
#endif
7853
6.85k
          name = p;
7854
6.85k
          r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7855
6.85k
                         &num_type, FALSE);
7856
6.85k
          if (r < 0) return r;
7857
7858
6.52k
          num = scan_env_add_mem_entry(env);
7859
6.52k
          if (num < 0) return num;
7860
6.52k
          if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7861
0
            return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7862
7863
6.52k
          r = name_add(env->reg, name, name_end, num, env);
7864
6.52k
          if (r != 0) return r;
7865
6.52k
          *np = node_new_memory(1);
7866
6.52k
          CHECK_NULL_RETURN_MEMERR(*np);
7867
6.52k
          BAG_(*np)->m.regnum = num;
7868
6.52k
          if (list_capture != 0)
7869
0
            MEM_STATUS_ON_SIMPLE(env->cap_history, num);
7870
6.52k
          env->num_named++;
7871
6.52k
        }
7872
2
        else {
7873
2
          return ONIGERR_UNDEFINED_GROUP_OPTION;
7874
2
        }
7875
530
      }
7876
10.6k
      break;
7877
7878
10.6k
    case '~':
7879
3.07k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7880
3.07k
        Node* absent;
7881
3.07k
        Node* expr;
7882
3.07k
        int head_bar;
7883
3.07k
        int is_range_cutter;
7884
7885
3.07k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7886
7887
3.06k
        if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7888
1.80k
          PINC;
7889
1.80k
          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7890
7891
1.79k
          head_bar = 1;
7892
1.79k
          if (PPEEK_IS(')')) { /* (?~|)  : range clear */
7893
312
            PINC;
7894
312
            r = make_range_clear(np, env);
7895
312
            if (r != 0) return r;
7896
312
            env->flags |= PE_FLAG_HAS_ABSENT_STOPPER;
7897
312
            goto end;
7898
312
          }
7899
1.79k
        }
7900
1.26k
        else
7901
1.26k
          head_bar = 0;
7902
7903
2.74k
        r = fetch_token(tok, &p, end, env);
7904
2.74k
        if (r < 0) return r;
7905
2.72k
        r = prs_alts(&absent, tok, term, &p, end, env, TRUE);
7906
2.72k
        if (r < 0) {
7907
188
          onig_node_free(absent);
7908
188
          return r;
7909
188
        }
7910
7911
2.54k
        expr = NULL_NODE;
7912
2.54k
        is_range_cutter = 0;
7913
2.54k
        if (head_bar != 0) {
7914
1.38k
          Node* top = absent;
7915
1.38k
          if (ND_TYPE(top) != ND_ALT || IS_NULL(ND_CDR(top))) {
7916
240
            expr = NULL_NODE;
7917
240
            is_range_cutter = 1;
7918
240
            env->flags |= PE_FLAG_HAS_ABSENT_STOPPER;
7919
240
          }
7920
1.14k
          else {
7921
1.14k
            absent = ND_CAR(top);
7922
1.14k
            expr   = ND_CDR(top);
7923
1.14k
            ND_CAR(top) = NULL_NODE;
7924
1.14k
            ND_CDR(top) = NULL_NODE;
7925
1.14k
            onig_node_free(top);
7926
1.14k
            if (IS_NULL(ND_CDR(expr))) {
7927
932
              top = expr;
7928
932
              expr = ND_CAR(top);
7929
932
              ND_CAR(top) = NULL_NODE;
7930
932
              onig_node_free(top);
7931
932
            }
7932
1.14k
          }
7933
1.38k
        }
7934
7935
2.54k
        r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7936
2.54k
        if (r != 0) {
7937
0
          return r;
7938
0
        }
7939
2.54k
        goto end;
7940
2.54k
      }
7941
2
      else {
7942
2
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7943
2
      }
7944
0
      break;
7945
7946
0
#ifdef USE_CALLOUT
7947
1.33k
    case '{':
7948
1.33k
      if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7949
2
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7950
7951
1.33k
      r = prs_callout_of_contents(np, ')', &p, end, env);
7952
1.33k
      if (r != 0) return r;
7953
7954
758
      goto end;
7955
758
      break;
7956
758
#endif
7957
7958
5.00k
    case '(':
7959
      /* (?()...) */
7960
5.00k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7961
5.00k
        UChar *prev;
7962
5.00k
        Node* condition;
7963
5.00k
        int condition_is_checker;
7964
7965
5.00k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7966
5.00k
        PFETCH(c);
7967
5.00k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7968
7969
4.98k
        if (IS_CODE_DIGIT_ASCII(enc, c)
7970
4.98k
            || c == '-' || c == '+' || c == '<' || c == '\'') {
7971
2.01k
#ifdef USE_BACKREF_WITH_LEVEL
7972
2.01k
          int exist_level;
7973
2.01k
          int level;
7974
2.01k
#endif
7975
2.01k
          UChar* name_end;
7976
2.01k
          int back_num;
7977
2.01k
          enum REF_NUM num_type;
7978
2.01k
          int is_enclosed;
7979
7980
2.01k
          is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7981
2.01k
          if (! is_enclosed)
7982
1.77k
            PUNFETCH;
7983
2.01k
          prev = p;
7984
2.01k
#ifdef USE_BACKREF_WITH_LEVEL
7985
2.01k
          exist_level = 0;
7986
2.01k
          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7987
2.01k
          r = fetch_name_with_level(
7988
2.01k
                    (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7989
2.01k
                    &p, end, &name_end,
7990
2.01k
                    env, &back_num, &level, &num_type);
7991
2.01k
          if (r == 1) exist_level = 1;
7992
#else
7993
          r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7994
                         &p, end, &name_end, env, &back_num, &num_type, TRUE);
7995
#endif
7996
2.01k
          if (r < 0) {
7997
750
            if (is_enclosed == 0) {
7998
704
              goto any_condition;
7999
704
            }
8000
46
            else
8001
46
              return r;
8002
750
          }
8003
8004
1.26k
          condition_is_checker = 1;
8005
1.26k
          if (num_type != IS_NOT_NUM) {
8006
1.17k
            if (num_type == IS_REL_NUM) {
8007
304
              back_num = backref_rel_to_abs(back_num, env);
8008
304
            }
8009
1.17k
            if (back_num <= 0)
8010
96
              return ONIGERR_INVALID_BACKREF;
8011
8012
1.08k
            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
8013
0
              if (back_num > env->num_mem ||
8014
0
                  IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
8015
0
                return ONIGERR_INVALID_BACKREF;
8016
0
            }
8017
8018
1.08k
            condition = node_new_backref_checker(1, &back_num, FALSE,
8019
1.08k
#ifdef USE_BACKREF_WITH_LEVEL
8020
1.08k
                                                 exist_level, level,
8021
1.08k
#endif
8022
1.08k
                                                 env);
8023
1.08k
          }
8024
86
          else {
8025
86
            int num;
8026
86
            int* backs;
8027
8028
86
            num = name_to_group_numbers(env, prev, name_end, &backs);
8029
86
            if (num <= 0) {
8030
2
              return ONIGERR_UNDEFINED_NAME_REFERENCE;
8031
2
            }
8032
84
            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
8033
0
              int i;
8034
0
              for (i = 0; i < num; i++) {
8035
0
                if (backs[i] > env->num_mem ||
8036
0
                    IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
8037
0
                  return ONIGERR_INVALID_BACKREF;
8038
0
              }
8039
0
            }
8040
8041
84
            condition = node_new_backref_checker(num, backs, TRUE,
8042
84
#ifdef USE_BACKREF_WITH_LEVEL
8043
84
                                                 exist_level, level,
8044
84
#endif
8045
84
                                                 env);
8046
84
          }
8047
8048
1.16k
          if (is_enclosed != 0) {
8049
190
            if (PEND) goto err_if_else;
8050
186
            PFETCH(c);
8051
186
            if (c != ')') goto err_if_else;
8052
186
          }
8053
1.16k
        }
8054
2.97k
#ifdef USE_CALLOUT
8055
2.97k
        else if (c == '?') {
8056
116
          if (IS_SYNTAX_OP2(env->syntax,
8057
116
                            ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
8058
114
            if (! PEND && PPEEK_IS('{')) {
8059
              /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
8060
66
              condition_is_checker = 0;
8061
66
              PFETCH(c);
8062
66
              r = prs_callout_of_contents(&condition, ')', &p, end, env);
8063
66
              if (r != 0) return r;
8064
40
              goto end_condition;
8065
66
            }
8066
114
          }
8067
50
          goto any_condition;
8068
116
        }
8069
2.85k
        else if (c == '*' &&
8070
2.85k
                 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8071
40
          condition_is_checker = 0;
8072
40
          r = prs_callout_of_name(&condition, ')', &p, end, env);
8073
40
          if (r != 0) return r;
8074
36
          goto end_condition;
8075
40
        }
8076
2.81k
#endif
8077
2.81k
        else {
8078
3.57k
        any_condition:
8079
3.57k
          PUNFETCH;
8080
3.57k
          condition_is_checker = 0;
8081
3.57k
          r = fetch_token(tok, &p, end, env);
8082
3.57k
          if (r < 0) return r;
8083
3.56k
          r = prs_alts(&condition, tok, term, &p, end, env, FALSE);
8084
3.56k
          if (r < 0) {
8085
498
            onig_node_free(condition);
8086
498
            return r;
8087
498
          }
8088
3.56k
        }
8089
8090
4.15k
#ifdef USE_CALLOUT
8091
4.22k
      end_condition:
8092
4.22k
#endif
8093
4.22k
        CHECK_NULL_RETURN_MEMERR(condition);
8094
8095
4.22k
        if (PEND) {
8096
228
        err_if_else:
8097
228
          onig_node_free(condition);
8098
228
          return ONIGERR_END_PATTERN_IN_GROUP;
8099
150
        }
8100
8101
4.07k
        if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
8102
490
          if (condition_is_checker == 0) {
8103
10
            onig_node_free(condition);
8104
10
            return ONIGERR_INVALID_IF_ELSE_SYNTAX;
8105
10
          }
8106
480
          PFETCH(c);
8107
480
          *np = condition;
8108
480
        }
8109
3.58k
        else { /* if-else */
8110
3.58k
          int then_is_empty;
8111
3.58k
          Node *Then, *Else;
8112
8113
3.58k
          Then = 0;
8114
3.58k
          if (PPEEK_IS('|')) {
8115
204
            PFETCH(c);
8116
204
            then_is_empty = 1;
8117
204
          }
8118
3.38k
          else
8119
3.38k
            then_is_empty = 0;
8120
8121
3.58k
          r = fetch_token(tok, &p, end, env);
8122
3.58k
          if (r < 0) {
8123
4
            onig_node_free(condition);
8124
4
            return r;
8125
4
          }
8126
3.58k
          r = prs_alts(&target, tok, term, &p, end, env, TRUE);
8127
3.58k
          if (r < 0) {
8128
250
            onig_node_free(condition);
8129
250
            onig_node_free(target);
8130
250
            return r;
8131
250
          }
8132
8133
3.33k
          if (then_is_empty != 0) {
8134
186
            Else = target;
8135
186
          }
8136
3.14k
          else {
8137
3.14k
            if (ND_TYPE(target) == ND_ALT) {
8138
520
              Then = ND_CAR(target);
8139
520
              if (ND_CDR(ND_CDR(target)) == NULL_NODE) {
8140
398
                Else = ND_CAR(ND_CDR(target));
8141
398
                cons_node_free_alone(ND_CDR(target));
8142
398
              }
8143
122
              else {
8144
122
                Else = ND_CDR(target);
8145
122
              }
8146
520
              cons_node_free_alone(target);
8147
520
            }
8148
2.62k
            else {
8149
2.62k
              Then = target;
8150
2.62k
              Else = 0;
8151
2.62k
            }
8152
3.14k
          }
8153
8154
3.33k
          *np = node_new_bag_if_else(condition, Then, Else);
8155
3.33k
          if (IS_NULL(*np)) {
8156
0
            onig_node_free(condition);
8157
0
            onig_node_free(Then);
8158
0
            onig_node_free(Else);
8159
0
            return ONIGERR_MEMORY;
8160
0
          }
8161
3.33k
        }
8162
3.81k
        goto end;
8163
4.07k
      }
8164
2
      else {
8165
2
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8166
2
      }
8167
0
      break;
8168
8169
0
#ifdef USE_CAPTURE_HISTORY
8170
2
    case '@':
8171
2
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
8172
0
        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
8173
0
          PFETCH(c);
8174
0
          if (c == '<' || c == '\'') {
8175
0
            list_capture = 1;
8176
0
            goto named_group2; /* (?@<name>...) */
8177
0
          }
8178
0
          PUNFETCH;
8179
0
        }
8180
8181
0
        *np = node_new_memory(0);
8182
0
        CHECK_NULL_RETURN_MEMERR(*np);
8183
0
        num = scan_env_add_mem_entry(env);
8184
0
        if (num < 0) {
8185
0
          return num;
8186
0
        }
8187
0
        else if (num >= (int )MEM_STATUS_BITS_NUM) {
8188
0
          return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
8189
0
        }
8190
0
        BAG_(*np)->m.regnum = num;
8191
0
        MEM_STATUS_ON_SIMPLE(env->cap_history, num);
8192
0
      }
8193
2
      else {
8194
2
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8195
2
      }
8196
0
      break;
8197
0
#endif
8198
8199
0
#ifdef USE_WHOLE_OPTIONS
8200
32
    case 'C':
8201
58
    case 'I':
8202
86
    case 'L':
8203
86
      if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8204
4
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8205
8206
82
      goto options_start;
8207
82
      break;
8208
82
#endif
8209
8210
208
    case 'P':
8211
208
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
8212
110
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8213
110
        PFETCH(c);
8214
110
        if (c == '<') goto named_group1;
8215
8216
88
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8217
110
      }
8218
      /* else fall */
8219
428
    case 'W': case 'D': case 'S':
8220
558
    case 'y':
8221
558
      if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8222
8
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8223
      /* else fall */
8224
8225
#ifdef USE_POSIXLINE_OPTION
8226
    case 'p':
8227
#endif
8228
634
    case 'a':
8229
1.60k
    case '-': case 'i': case 'm': case 's': case 'x':
8230
1.60k
#ifdef USE_WHOLE_OPTIONS
8231
1.68k
      options_start:
8232
1.68k
#endif
8233
1.68k
      {
8234
1.68k
        int neg;
8235
1.68k
#ifdef USE_WHOLE_OPTIONS
8236
1.68k
        int whole_options;
8237
1.68k
        whole_options = FALSE;
8238
1.68k
#endif
8239
1.68k
        neg = FALSE;
8240
7.86k
        while (1) {
8241
7.86k
          switch (c) {
8242
542
          case ':':
8243
1.23k
          case ')':
8244
1.23k
            break;
8245
8246
468
          case '-':  neg = TRUE; break;
8247
600
          case 'x':  OPTION_NEGATE(option, ONIG_OPTION_EXTEND,     neg); break;
8248
726
          case 'i':  OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
8249
472
          case 's':
8250
472
            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8251
470
              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8252
470
            }
8253
2
            else
8254
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8255
470
            break;
8256
8257
974
          case 'm':
8258
974
            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8259
498
              OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == FALSE ? TRUE : FALSE));
8260
498
            }
8261
476
            else if (IS_SYNTAX_OP2(env->syntax,
8262
476
                        ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
8263
476
              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8264
476
            }
8265
0
            else
8266
0
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8267
974
            break;
8268
#ifdef USE_POSIXLINE_OPTION
8269
          case 'p':
8270
            OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
8271
            break;
8272
#endif
8273
974
          case 'W':
8274
536
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8275
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8276
534
            OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg);
8277
534
            break;
8278
464
          case 'D':
8279
464
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8280
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8281
462
            OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg);
8282
462
            break;
8283
504
          case 'S':
8284
504
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8285
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8286
502
            OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg);
8287
502
            break;
8288
474
          case 'P':
8289
474
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8290
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8291
472
            OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8292
472
            break;
8293
8294
236
          case 'y': /* y{g}, y{w} */
8295
236
            {
8296
236
              if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8297
2
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8298
8299
234
              if (neg == TRUE) return ONIGERR_UNDEFINED_GROUP_OPTION;
8300
8301
232
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8302
228
              if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
8303
198
              PFETCH(c);
8304
198
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8305
184
              PFETCH(c);
8306
184
              switch (c) {
8307
44
              case 'g':
8308
44
                if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8309
2
                  return ONIGERR_UNDEFINED_GROUP_OPTION;
8310
8311
42
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
8312
42
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
8313
42
                break;
8314
0
#ifdef USE_UNICODE_WORD_BREAK
8315
120
              case 'w':
8316
120
                if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8317
2
                  return ONIGERR_UNDEFINED_GROUP_OPTION;
8318
8319
118
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
8320
118
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
8321
118
                break;
8322
0
#endif
8323
20
              default:
8324
20
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8325
0
                break;
8326
184
              }
8327
160
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8328
152
              PFETCH(c);
8329
152
              if (c != '}')
8330
6
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8331
152
            } /* case 'y' */
8332
146
            break;
8333
8334
460
          case 'a':
8335
460
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_PYTHON))
8336
4
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8337
8338
456
            OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8339
456
            break;
8340
8341
0
#ifdef USE_WHOLE_OPTIONS
8342
226
          case 'C':
8343
226
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8344
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8345
8346
224
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8347
222
            OPTION_NEGATE(option, ONIG_OPTION_DONT_CAPTURE_GROUP, neg);
8348
222
            whole_options = TRUE;
8349
222
            break;
8350
8351
210
          case 'I':
8352
210
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8353
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8354
8355
208
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8356
206
            OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE_IS_ASCII, neg);
8357
206
            whole_options = TRUE;
8358
206
            break;
8359
8360
214
          case 'L':
8361
214
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8362
2
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8363
8364
212
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8365
210
            OPTION_NEGATE(option, ONIG_OPTION_FIND_LONGEST, neg);
8366
210
            whole_options = TRUE;
8367
210
            break;
8368
0
#endif
8369
8370
68
          default:
8371
68
            return ONIGERR_UNDEFINED_GROUP_OPTION;
8372
7.86k
          }
8373
8374
7.68k
          if (c == ')') {
8375
694
            *np = node_new_option(option);
8376
694
            CHECK_NULL_RETURN_MEMERR(*np);
8377
8378
694
#ifdef USE_WHOLE_OPTIONS
8379
694
            if (whole_options == TRUE) {
8380
30
              r = set_whole_options(option, env);
8381
30
              if (r != 0) return r;
8382
26
              ND_STATUS_ADD(*np, WHOLE_OPTIONS);
8383
26
            }
8384
690
#endif
8385
690
            *src = p;
8386
690
            return 2; /* option only */
8387
694
          }
8388
6.99k
          else if (c == ':') {
8389
542
            OnigOptionType prev = env->options;
8390
8391
542
            env->options = option;
8392
542
#ifdef USE_WHOLE_OPTIONS
8393
542
            if (whole_options == TRUE) {
8394
16
              r = set_whole_options(option, env);
8395
16
              if (r != 0) return r;
8396
16
            }
8397
538
#endif
8398
538
            r = fetch_token(tok, &p, end, env);
8399
538
            if (r < 0) return r;
8400
524
            r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8401
524
            env->options = prev;
8402
524
            if (r < 0) {
8403
82
              onig_node_free(target);
8404
82
              return r;
8405
82
            }
8406
8407
442
            *np = node_new_option(option);
8408
442
            CHECK_NULL_RETURN_MEMERR(*np);
8409
442
            ND_BODY(*np) = target;
8410
442
            ND_STATUS_ADD(*np, WHOLE_OPTIONS);
8411
8412
442
            *src = p;
8413
442
            return 0;
8414
442
          }
8415
8416
6.44k
          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8417
6.18k
          PFETCH(c);
8418
6.18k
        } /* while (1) */
8419
1.68k
      }
8420
0
      break;
8421
8422
126
    default:
8423
126
      return ONIGERR_UNDEFINED_GROUP_OPTION;
8424
25.4k
    }
8425
25.4k
  }
8426
33.5k
#ifdef USE_CALLOUT
8427
33.5k
  else if (c == '*' &&
8428
33.5k
           IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8429
3.93k
    PINC;
8430
3.93k
    r = prs_callout_of_name(np, ')', &p, end, env);
8431
3.93k
    if (r != 0) return r;
8432
8433
2.68k
    goto end;
8434
3.93k
  }
8435
29.5k
#endif
8436
29.5k
  else {
8437
29.5k
    if (OPTON_DONT_CAPTURE_GROUP(env->options))
8438
1.09k
      goto group;
8439
8440
28.4k
    *np = node_new_memory(0);
8441
28.4k
    CHECK_NULL_RETURN_MEMERR(*np);
8442
28.4k
    num = scan_env_add_mem_entry(env);
8443
28.4k
    if (num < 0) return num;
8444
28.4k
    BAG_(*np)->m.regnum = num;
8445
28.4k
  }
8446
8447
42.1k
  CHECK_NULL_RETURN_MEMERR(*np);
8448
42.1k
  r = fetch_token(tok, &p, end, env);
8449
42.1k
  if (r < 0) return r;
8450
42.1k
  r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8451
42.1k
  if (r < 0) {
8452
1.04k
    onig_node_free(target);
8453
1.04k
    return r;
8454
1.04k
  }
8455
8456
41.1k
  ND_BODY(*np) = target;
8457
8458
41.1k
  if (ND_TYPE(*np) == ND_BAG) {
8459
34.2k
    if (BAG_(*np)->type == BAG_MEMORY) {
8460
      /* Don't move this to previous of prs_alts() */
8461
34.0k
      r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
8462
34.0k
      if (r != 0) return r;
8463
34.0k
    }
8464
34.2k
  }
8465
8466
51.2k
 end:
8467
51.2k
  *src = p;
8468
51.2k
  return 0;
8469
41.1k
}
8470
8471
static const char* PopularQStr[] = {
8472
  "?", "*", "+", "??", "*?", "+?"
8473
};
8474
8475
static const char* ReduceQStr[] = {
8476
  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
8477
};
8478
8479
static int
8480
assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
8481
79.0k
{
8482
79.0k
  QuantNode* qn;
8483
8484
79.0k
  qn = QUANT_(qnode);
8485
79.0k
  if (qn->lower == 1 && qn->upper == 1)
8486
314
    return 1;
8487
8488
78.7k
  switch (ND_TYPE(target)) {
8489
30.9k
  case ND_STRING:
8490
30.9k
    if (group == 0) {
8491
30.6k
      if (str_node_can_be_split(target, env->enc)) {
8492
11.6k
        Node* n = str_node_split_last_char(target, env->enc);
8493
11.6k
        if (IS_NOT_NULL(n)) {
8494
11.5k
          ND_BODY(qnode) = n;
8495
11.5k
          return 2;
8496
11.5k
        }
8497
11.6k
      }
8498
30.6k
    }
8499
19.3k
    break;
8500
8501
19.3k
  case ND_QUANT:
8502
16.6k
    { /* check redundant double repeat. */
8503
      /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
8504
16.6k
      QuantNode* qnt   = QUANT_(target);
8505
16.6k
      int nestq_num   = quantifier_type_num(qn);
8506
16.6k
      int targetq_num = quantifier_type_num(qnt);
8507
8508
16.6k
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
8509
16.6k
      if (targetq_num >= 0 && nestq_num >= 0 &&
8510
16.6k
          IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
8511
1.87k
        UChar buf[WARN_BUFSIZE];
8512
8513
1.87k
        switch(ReduceTypeTable[targetq_num][nestq_num]) {
8514
122
        case RQ_ASIS:
8515
122
          break;
8516
8517
698
        case RQ_DEL:
8518
698
          if (onig_verb_warn != onig_null_warn) {
8519
0
            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8520
0
                                  env->pattern, env->pattern_end,
8521
0
                                  (UChar* )"redundant nested repeat operator");
8522
0
            (*onig_verb_warn)((char* )buf);
8523
0
          }
8524
698
          goto warn_exit;
8525
0
          break;
8526
8527
1.05k
        default:
8528
1.05k
          if (onig_verb_warn != onig_null_warn) {
8529
0
            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8530
0
                                       env->pattern, env->pattern_end,
8531
0
            (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
8532
0
            PopularQStr[targetq_num], PopularQStr[nestq_num],
8533
0
            ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
8534
0
            (*onig_verb_warn)((char* )buf);
8535
0
          }
8536
1.05k
          goto warn_exit;
8537
0
          break;
8538
1.87k
        }
8539
1.87k
      }
8540
8541
16.6k
    warn_exit:
8542
16.6k
#endif
8543
16.6k
      if (targetq_num >= 0 && nestq_num < 0) {
8544
4.50k
        if (targetq_num == 1 || targetq_num == 2) { /* * or + */
8545
          /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
8546
3.57k
          if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
8547
2.73k
            qn->upper = (qn->lower == 0 ? 1 : qn->lower);
8548
2.73k
          }
8549
3.57k
        }
8550
4.50k
      }
8551
12.1k
      else {
8552
12.1k
        int r;
8553
8554
12.1k
        ND_BODY(qnode) = target;
8555
12.1k
        r = onig_reduce_nested_quantifier(qnode);
8556
12.1k
        return r;
8557
12.1k
      }
8558
16.6k
    }
8559
4.50k
    break;
8560
8561
31.0k
  default:
8562
31.0k
    break;
8563
78.7k
  }
8564
8565
54.9k
  ND_BODY(qnode) = target;
8566
54.9k
  return 0;
8567
78.7k
}
8568
8569
8570
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8571
static int
8572
clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
8573
{
8574
  BBuf *tbuf;
8575
  int r;
8576
8577
  if (IS_NCCLASS_NOT(cc)) {
8578
    bitset_invert(cc->bs);
8579
8580
    if (! ONIGENC_IS_SINGLEBYTE(enc)) {
8581
      r = not_code_range_buf(enc, cc->mbuf, &tbuf);
8582
      if (r != 0) return r;
8583
8584
      bbuf_free(cc->mbuf);
8585
      cc->mbuf = tbuf;
8586
    }
8587
8588
    NCCLASS_CLEAR_NOT(cc);
8589
  }
8590
8591
  return 0;
8592
}
8593
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8594
8595
4.59M
#define ADD_CODE_INTO_CC(cc, code, enc) do {\
8596
4.59M
  if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\
8597
4.28M
    add_code_range_to_buf(&((cc)->mbuf), code, code);\
8598
4.28M
  }\
8599
4.59M
  else {\
8600
306k
    BITSET_SET_BIT((cc)->bs, code);\
8601
306k
  }\
8602
4.59M
} while (0)
8603
8604
extern int
8605
onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
8606
                               int n, OnigCodePoint codes[])
8607
62.4k
{
8608
62.4k
  int i;
8609
62.4k
  Node* node;
8610
62.4k
  CClassNode* cc;
8611
8612
62.4k
  *rnode = NULL_NODE;
8613
8614
62.4k
  node = node_new_cclass();
8615
62.4k
  CHECK_NULL_RETURN_MEMERR(node);
8616
8617
62.4k
  cc = CCLASS_(node);
8618
8619
189k
  for (i = 0; i < n; i++) {
8620
126k
    ADD_CODE_INTO_CC(cc, codes[i], enc);
8621
126k
  }
8622
8623
62.4k
  *rnode = node;
8624
62.4k
  return 0;
8625
62.4k
}
8626
8627
typedef struct {
8628
  ParseEnv*   env;
8629
  CClassNode* cc;
8630
  Node*       alt_root;
8631
  Node**      ptail;
8632
} IApplyCaseFoldArg;
8633
8634
static int
8635
i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
8636
                  void* arg)
8637
9.74M
{
8638
9.74M
  IApplyCaseFoldArg* iarg;
8639
9.74M
  ParseEnv* env;
8640
9.74M
  OnigEncoding enc;
8641
9.74M
  CClassNode* cc;
8642
8643
9.74M
  iarg = (IApplyCaseFoldArg* )arg;
8644
9.74M
  env = iarg->env;
8645
9.74M
  cc  = iarg->cc;
8646
9.74M
  enc = env->enc;
8647
8648
9.74M
  if (to_len == 1) {
8649
9.43M
    int is_in = onig_is_code_in_cc(enc, from, cc);
8650
9.43M
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8651
9.43M
    if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
8652
9.43M
        (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
8653
3.83M
      ADD_CODE_INTO_CC(cc, *to, enc);
8654
3.83M
    }
8655
#else
8656
    if (is_in != 0) {
8657
      if (ONIGENC_MBC_MINLEN(enc) > 1 ||
8658
          ONIGENC_CODE_TO_MBCLEN(enc, *to) != 1) {
8659
        if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
8660
        add_code_range(&(cc->mbuf), env, *to, *to);
8661
      }
8662
      else {
8663
        if (IS_NCCLASS_NOT(cc)) {
8664
          BITSET_CLEAR_BIT(cc->bs, *to);
8665
        }
8666
        else
8667
          BITSET_SET_BIT(cc->bs, *to);
8668
      }
8669
    }
8670
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8671
9.43M
  }
8672
312k
  else {
8673
312k
    int r, i, len;
8674
312k
    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8675
8676
312k
    if (onig_is_code_in_cc(enc, from, cc)
8677
312k
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8678
312k
        && !IS_NCCLASS_NOT(cc)
8679
312k
#endif
8680
312k
        ) {
8681
129k
      int n, j, m, index;
8682
129k
      Node* list_node;
8683
129k
      Node* ns[3];
8684
8685
129k
      n = 0;
8686
408k
      for (i = 0; i < to_len; i++) {
8687
278k
        OnigCodePoint code;
8688
278k
        Node* csnode;
8689
278k
        CClassNode* cs_cc;
8690
8691
278k
        index = 0;
8692
278k
        if (ONIGENC_IS_UNICODE_ENCODING(enc) &&
8693
278k
            (index = onigenc_unicode_fold1_key(&to[i])) >= 0) {
8694
228k
          csnode = node_new_cclass();
8695
228k
          cs_cc = CCLASS_(csnode);
8696
228k
          if (IS_NULL(csnode)) {
8697
0
          err_free_ns:
8698
0
            for (j = 0; j < n; j++) onig_node_free(ns[j]);
8699
0
            return ONIGERR_MEMORY;
8700
0
          }
8701
228k
          m = FOLDS1_UNFOLDS_NUM(index);
8702
639k
          for (j = 0; j < m; j++) {
8703
410k
            code = FOLDS1_UNFOLDS(index)[j];
8704
410k
            ADD_CODE_INTO_CC(cs_cc, code, enc);
8705
410k
          }
8706
228k
          ADD_CODE_INTO_CC(cs_cc, to[i], enc);
8707
228k
          ns[n++] = csnode;
8708
228k
        }
8709
50.4k
        else {
8710
50.4k
          len = ONIGENC_CODE_TO_MBC(enc, to[i], buf);
8711
50.4k
          if (n == 0 || ND_TYPE(ns[n-1]) != ND_STRING) {
8712
36.4k
            csnode = node_new_str(buf, buf + len);
8713
36.4k
            if (IS_NULL(csnode)) goto err_free_ns;
8714
8715
36.4k
            if (index == 0)
8716
300
              ND_STATUS_ADD(csnode, IGNORECASE);
8717
36.1k
            else
8718
36.1k
              ND_STRING_SET_CASE_EXPANDED(csnode);
8719
8720
36.4k
            ns[n++] = csnode;
8721
36.4k
          }
8722
13.9k
          else {
8723
13.9k
            r = onig_node_str_cat(ns[n-1], buf, buf + len);
8724
13.9k
            if (r < 0) goto err_free_ns;
8725
13.9k
          }
8726
50.4k
        }
8727
278k
      }
8728
8729
129k
      if (n == 1)
8730
300
        list_node = ns[0];
8731
129k
      else
8732
129k
        list_node = make_list(n, ns);
8733
8734
129k
      *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE);
8735
129k
      if (IS_NULL(*(iarg->ptail))) {
8736
0
        onig_node_free(list_node);
8737
0
        return ONIGERR_MEMORY;
8738
0
      }
8739
129k
      iarg->ptail = &(ND_CDR((*(iarg->ptail))));
8740
129k
    }
8741
312k
  }
8742
8743
9.74M
  return 0;
8744
9.74M
}
8745
8746
static int
8747
prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
8748
        ParseEnv* env, int group_head)
8749
297k
{
8750
297k
  int r, len, group;
8751
297k
  Node* qn;
8752
297k
  Node** tp;
8753
297k
  unsigned int parse_depth;
8754
8755
298k
 retry:
8756
298k
  group = 0;
8757
298k
  *np = NULL;
8758
298k
  if (tok->type == (enum TokenSyms )term)
8759
21.7k
    goto end_of_token;
8760
8761
276k
  parse_depth = env->parse_depth;
8762
8763
276k
  switch (tok->type) {
8764
8.92k
  case TK_ALT:
8765
9.01k
  case TK_EOT:
8766
30.7k
  end_of_token:
8767
30.7k
    *np = node_new_empty();
8768
30.7k
    CHECK_NULL_RETURN_MEMERR(*np);
8769
30.7k
    return tok->type;
8770
0
  break;
8771
8772
59.0k
  case TK_SUBEXP_OPEN:
8773
59.0k
    r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
8774
59.0k
    if (r < 0) return r;
8775
53.4k
    if (r == 1) { /* group */
8776
1.07k
      if (group_head == 0)
8777
898
        group = 1;
8778
172
      else {
8779
172
        Node* target = *np;
8780
172
        *np = node_new_group(target);
8781
172
        if (IS_NULL(*np)) {
8782
0
          onig_node_free(target);
8783
0
          return ONIGERR_MEMORY;
8784
0
        }
8785
172
        group = 2;
8786
172
      }
8787
1.07k
    }
8788
52.3k
    else if (r == 2) { /* option only */
8789
690
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
8790
448
        env->options = BAG_(*np)->o.options;
8791
448
        r = fetch_token(tok, src, end, env);
8792
448
        if (r < 0) return r;
8793
442
        onig_node_free(*np);
8794
442
        goto retry;
8795
448
      }
8796
242
      else {
8797
242
        Node* target;
8798
242
        OnigOptionType prev = env->options;
8799
8800
242
        env->options = BAG_(*np)->o.options;
8801
242
        r = fetch_token(tok, src, end, env);
8802
242
        if (r < 0) return r;
8803
236
        r = prs_alts(&target, tok, term, src, end, env, FALSE);
8804
236
        env->options = prev;
8805
236
        if (r < 0) {
8806
32
          onig_node_free(target);
8807
32
          return r;
8808
32
        }
8809
204
        ND_BODY(*np) = target;
8810
204
      }
8811
204
      return tok->type;
8812
690
    }
8813
52.7k
    break;
8814
8815
52.7k
  case TK_SUBEXP_CLOSE:
8816
1.16k
    if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
8817
76
      return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
8818
8819
1.09k
    if (tok->escaped) goto tk_crude_byte;
8820
1.09k
    else goto tk_byte;
8821
0
    break;
8822
8823
100k
  case TK_STRING:
8824
101k
  tk_byte:
8825
101k
    {
8826
101k
      *np = node_new_str_with_options(tok->backp, *src, env->options);
8827
101k
      CHECK_NULL_RETURN_MEMERR(*np);
8828
8829
305k
      while (1) {
8830
305k
        r = fetch_token(tok, src, end, env);
8831
305k
        if (r < 0) return r;
8832
305k
        if (r != TK_STRING) break;
8833
8834
203k
        r = onig_node_str_cat(*np, tok->backp, *src);
8835
203k
        if (r < 0) return r;
8836
203k
      }
8837
8838
103k
    string_end:
8839
103k
      tp = np;
8840
103k
      goto repeat;
8841
101k
    }
8842
0
    break;
8843
8844
1.98k
  case TK_CRUDE_BYTE:
8845
1.98k
  tk_crude_byte:
8846
1.98k
    {
8847
1.98k
      *np = node_new_str_crude_char(tok->u.byte, env->options);
8848
1.98k
      CHECK_NULL_RETURN_MEMERR(*np);
8849
1.98k
      len = 1;
8850
2.12k
      while (1) {
8851
2.12k
        if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
8852
2.01k
          if (len == enclen(env->enc, STR_(*np)->s)) {
8853
1.91k
            r = fetch_token(tok, src, end, env);
8854
1.91k
            goto tk_crude_byte_end;
8855
1.91k
          }
8856
2.01k
        }
8857
8858
214
        r = fetch_token(tok, src, end, env);
8859
214
        if (r < 0) return r;
8860
202
        if (r != TK_CRUDE_BYTE)
8861
60
          return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
8862
8863
142
        r = node_str_cat_char(*np, tok->u.byte);
8864
142
        if (r < 0) return r;
8865
8866
142
        len++;
8867
142
      }
8868
8869
1.91k
    tk_crude_byte_end:
8870
1.91k
      if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
8871
14
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8872
8873
1.89k
      ND_STRING_CLEAR_CRUDE(*np);
8874
1.89k
      goto string_end;
8875
1.91k
    }
8876
0
    break;
8877
8878
4.54k
  case TK_CODE_POINT:
8879
4.54k
    {
8880
4.54k
      UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8881
4.54k
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
8882
4.54k
      if (len < 0) return len;
8883
4.07k
      len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
8884
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
8885
      *np = node_new_str_crude(buf, buf + len, env->options);
8886
#else
8887
4.07k
      *np = node_new_str_with_options(buf, buf + len, env->options);
8888
4.07k
#endif
8889
4.07k
      CHECK_NULL_RETURN_MEMERR(*np);
8890
4.07k
    }
8891
4.07k
    break;
8892
8893
4.07k
  case TK_QUOTE_OPEN:
8894
420
    {
8895
420
      OnigCodePoint end_op[2];
8896
420
      UChar *qstart, *qend, *nextp;
8897
8898
420
      end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
8899
420
      end_op[1] = (OnigCodePoint )'E';
8900
420
      qstart = *src;
8901
420
      qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
8902
420
      if (IS_NULL(qend)) {
8903
150
        nextp = qend = end;
8904
150
      }
8905
420
      *np = node_new_str_with_options(qstart, qend, env->options);
8906
420
      CHECK_NULL_RETURN_MEMERR(*np);
8907
420
      *src = nextp;
8908
420
    }
8909
0
    break;
8910
8911
13.6k
  case TK_CHAR_TYPE:
8912
13.6k
    {
8913
13.6k
      switch (tok->u.prop.ctype) {
8914
7.29k
      case ONIGENC_CTYPE_WORD:
8915
7.29k
        *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
8916
7.29k
        CHECK_NULL_RETURN_MEMERR(*np);
8917
7.29k
        break;
8918
8919
7.29k
      case ONIGENC_CTYPE_SPACE:
8920
5.31k
      case ONIGENC_CTYPE_DIGIT:
8921
6.40k
      case ONIGENC_CTYPE_XDIGIT:
8922
6.40k
        {
8923
6.40k
          CClassNode* cc;
8924
8925
6.40k
          *np = node_new_cclass();
8926
6.40k
          CHECK_NULL_RETURN_MEMERR(*np);
8927
6.40k
          cc = CCLASS_(*np);
8928
6.40k
          r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
8929
6.40k
          if (r != 0) {
8930
0
            onig_node_free(*np);
8931
0
            *np = NULL_NODE;
8932
0
            return r;
8933
0
          }
8934
6.40k
          if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
8935
6.40k
        }
8936
0
        break;
8937
8938
0
      default:
8939
0
        return ONIGERR_PARSER_BUG;
8940
0
        break;
8941
13.6k
      }
8942
13.6k
    }
8943
13.6k
    break;
8944
8945
13.6k
  case TK_CHAR_PROPERTY:
8946
1.87k
    r = prs_char_property(np, tok, src, end, env);
8947
1.87k
    if (r != 0) return r;
8948
1.23k
    break;
8949
8950
13.6k
  case TK_OPEN_CC:
8951
13.6k
    {
8952
13.6k
      CClassNode* cc;
8953
8954
13.6k
      r = prs_cc(np, tok, src, end, env);
8955
13.6k
      if (r != 0) return r;
8956
8957
9.88k
      cc = CCLASS_(*np);
8958
9.88k
      if (OPTON_IGNORECASE(env->options)) {
8959
8.60k
        IApplyCaseFoldArg iarg;
8960
8961
8.60k
        iarg.env      = env;
8962
8.60k
        iarg.cc       = cc;
8963
8.60k
        iarg.alt_root = NULL_NODE;
8964
8.60k
        iarg.ptail    = &(iarg.alt_root);
8965
8966
8.60k
        r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->reg->case_fold_flag,
8967
8.60k
                                        i_apply_case_fold, &iarg);
8968
8.60k
        if (r != 0) {
8969
0
          onig_node_free(iarg.alt_root);
8970
0
          return r;
8971
0
        }
8972
8.60k
        if (IS_NOT_NULL(iarg.alt_root)) {
8973
1.62k
          Node* work = onig_node_new_alt(*np, iarg.alt_root);
8974
1.62k
          if (IS_NULL(work)) {
8975
0
            onig_node_free(iarg.alt_root);
8976
0
            return ONIGERR_MEMORY;
8977
0
          }
8978
1.62k
          *np = work;
8979
1.62k
        }
8980
8.60k
      }
8981
9.88k
    }
8982
9.88k
    break;
8983
8984
17.7k
  case TK_ANYCHAR:
8985
17.7k
    *np = node_new_anychar(env->options);
8986
17.7k
    CHECK_NULL_RETURN_MEMERR(*np);
8987
17.7k
    break;
8988
8989
17.7k
  case TK_ANYCHAR_ANYTIME:
8990
0
    *np = node_new_anychar(env->options);
8991
0
    CHECK_NULL_RETURN_MEMERR(*np);
8992
0
    qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
8993
0
    CHECK_NULL_RETURN_MEMERR(qn);
8994
0
    ND_BODY(qn) = *np;
8995
0
    *np = qn;
8996
0
    break;
8997
8998
11.3k
  case TK_BACKREF:
8999
11.3k
    len = tok->u.backref.num;
9000
11.3k
    *np = node_new_backref(len,
9001
11.3k
                  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
9002
11.3k
                  tok->u.backref.by_name,
9003
11.3k
#ifdef USE_BACKREF_WITH_LEVEL
9004
11.3k
                           tok->u.backref.exist_level,
9005
11.3k
                           tok->u.backref.level,
9006
11.3k
#endif
9007
11.3k
                           env);
9008
11.3k
    CHECK_NULL_RETURN_MEMERR(*np);
9009
11.3k
    break;
9010
9011
11.3k
#ifdef USE_CALL
9012
11.3k
  case TK_CALL:
9013
8.76k
    {
9014
8.76k
      int gnum = tok->u.call.gnum;
9015
9016
8.76k
      *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
9017
8.76k
                          gnum, tok->u.call.by_number);
9018
8.76k
      CHECK_NULL_RETURN_MEMERR(*np);
9019
8.76k
      env->num_call++;
9020
8.76k
      if (tok->u.call.by_number != 0 && gnum == 0) {
9021
3.56k
        env->flags |= PE_FLAG_HAS_CALL_ZERO;
9022
3.56k
      }
9023
8.76k
    }
9024
0
    break;
9025
0
#endif
9026
9027
17.8k
  case TK_ANCHOR:
9028
17.8k
    *np = node_new_anchor_with_options(tok->u.anchor, env->options);
9029
17.8k
    CHECK_NULL_RETURN_MEMERR(*np);
9030
17.8k
    break;
9031
9032
17.8k
  case TK_REPEAT:
9033
542
  case TK_INTERVAL:
9034
542
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
9035
276
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
9036
276
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
9037
0
      else {
9038
0
        *np = node_new_empty();
9039
0
        CHECK_NULL_RETURN_MEMERR(*np);
9040
0
      }
9041
276
    }
9042
266
    else {
9043
266
      goto tk_byte;
9044
266
    }
9045
0
    break;
9046
9047
3.22k
  case TK_KEEP:
9048
3.22k
    r = node_new_keep(np, env);
9049
3.22k
    if (r < 0) return r;
9050
3.22k
    break;
9051
9052
3.63k
  case TK_GENERAL_NEWLINE:
9053
3.63k
    r = node_new_general_newline(np, env);
9054
3.63k
    if (r < 0) return r;
9055
3.63k
    break;
9056
9057
3.63k
  case TK_NO_NEWLINE:
9058
370
    r = node_new_no_newline(np, env);
9059
370
    if (r < 0) return r;
9060
370
    break;
9061
9062
752
  case TK_TRUE_ANYCHAR:
9063
752
    r = node_new_true_anychar(np);
9064
752
    if (r < 0) return r;
9065
752
    break;
9066
9067
5.99k
  case TK_TEXT_SEGMENT:
9068
5.99k
    r = make_text_segment(np, env);
9069
5.99k
    if (r < 0) return r;
9070
5.99k
    break;
9071
9072
5.99k
  default:
9073
0
    return ONIGERR_PARSER_BUG;
9074
0
    break;
9075
276k
  }
9076
9077
151k
  {
9078
151k
    tp = np;
9079
9080
230k
  re_entry:
9081
230k
    r = fetch_token(tok, src, end, env);
9082
230k
    if (r < 0) return r;
9083
9084
334k
  repeat:
9085
334k
    if (r == TK_REPEAT || r == TK_INTERVAL) {
9086
79.0k
      Node* target;
9087
9088
79.0k
      if (is_invalid_quantifier_target(*tp))
9089
8
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
9090
9091
79.0k
      INC_PARSE_DEPTH(parse_depth);
9092
9093
79.0k
      qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
9094
79.0k
                               r == TK_INTERVAL);
9095
79.0k
      CHECK_NULL_RETURN_MEMERR(qn);
9096
79.0k
      QUANT_(qn)->greedy = tok->u.repeat.greedy;
9097
79.0k
      if (group == 2) {
9098
42
        target = node_drop_group(*tp);
9099
42
        *tp = NULL_NODE;
9100
42
      }
9101
79.0k
      else {
9102
79.0k
        target = *tp;
9103
79.0k
      }
9104
79.0k
      r = assign_quantifier_body(qn, target, group, env);
9105
79.0k
      if (r < 0) {
9106
30
        onig_node_free(qn);
9107
30
        *tp = NULL_NODE;
9108
30
        return r;
9109
30
      }
9110
9111
79.0k
      if (tok->u.repeat.possessive != 0) {
9112
7.62k
        Node* en;
9113
7.62k
        en = node_new_bag(BAG_STOP_BACKTRACK);
9114
7.62k
        if (IS_NULL(en)) {
9115
0
          onig_node_free(qn);
9116
0
          return ONIGERR_MEMORY;
9117
0
        }
9118
7.62k
        ND_BODY(en) = qn;
9119
7.62k
        qn = en;
9120
7.62k
      }
9121
9122
79.0k
      if (r == 0) {
9123
67.1k
        *tp = qn;
9124
67.1k
      }
9125
11.9k
      else if (r == 1) { /* x{1,1} ==> x */
9126
314
        onig_node_free(qn);
9127
314
        *tp = target;
9128
314
      }
9129
11.5k
      else if (r == 2) { /* split case: /abc+/ */
9130
11.5k
        Node *tmp;
9131
9132
11.5k
        *tp = node_new_list(*tp, NULL);
9133
11.5k
        if (IS_NULL(*tp)) {
9134
0
          onig_node_free(qn);
9135
0
          return ONIGERR_MEMORY;
9136
0
        }
9137
11.5k
        tmp = ND_CDR(*tp) = node_new_list(qn, NULL);
9138
11.5k
        if (IS_NULL(tmp)) {
9139
0
          onig_node_free(qn);
9140
0
          return ONIGERR_MEMORY;
9141
0
        }
9142
11.5k
        tp = &(ND_CAR(tmp));
9143
11.5k
      }
9144
79.0k
      group = 0;
9145
79.0k
      goto re_entry;
9146
79.0k
    }
9147
334k
  }
9148
9149
255k
  return r;
9150
334k
}
9151
9152
static int
9153
prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9154
           ParseEnv* env, int group_head)
9155
126k
{
9156
126k
  int r;
9157
126k
  Node *node, **headp;
9158
9159
126k
  *top = NULL;
9160
126k
  INC_PARSE_DEPTH(env->parse_depth);
9161
9162
126k
  r = prs_exp(&node, tok, term, src, end, env, group_head);
9163
126k
  if (r < 0) {
9164
9.30k
    onig_node_free(node);
9165
9.30k
    return r;
9166
9.30k
  }
9167
9168
116k
  if (r == TK_EOT || r == term || r == TK_ALT) {
9169
73.9k
    *top = node;
9170
73.9k
  }
9171
42.9k
  else {
9172
42.9k
    *top = node_new_list(node, NULL);
9173
42.9k
    if (IS_NULL(*top)) {
9174
0
    mem_err:
9175
0
      onig_node_free(node);
9176
0
      return ONIGERR_MEMORY;
9177
0
    }
9178
9179
42.9k
    headp = &(ND_CDR(*top));
9180
212k
    while (r != TK_EOT && r != term && r != TK_ALT) {
9181
171k
      r = prs_exp(&node, tok, term, src, end, env, FALSE);
9182
171k
      if (r < 0) {
9183
2.24k
        onig_node_free(node);
9184
2.24k
        return r;
9185
2.24k
      }
9186
9187
169k
      if (ND_TYPE(node) == ND_LIST) {
9188
10.0k
        *headp = node;
9189
25.0k
        while (IS_NOT_NULL(ND_CDR(node))) node = ND_CDR(node);
9190
10.0k
        headp = &(ND_CDR(node));
9191
10.0k
      }
9192
159k
      else {
9193
159k
        *headp = node_new_list(node, NULL);
9194
159k
        if (IS_NULL(*headp)) goto mem_err;
9195
159k
        headp = &(ND_CDR(*headp));
9196
159k
      }
9197
169k
    }
9198
42.9k
  }
9199
9200
114k
  DEC_PARSE_DEPTH(env->parse_depth);
9201
114k
  return r;
9202
116k
}
9203
9204
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
9205
static int
9206
prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9207
         ParseEnv* env, int group_head)
9208
103k
{
9209
103k
  int r;
9210
103k
  Node *node, **headp;
9211
103k
  OnigOptionType save_options;
9212
9213
103k
  *top = NULL;
9214
103k
  INC_PARSE_DEPTH(env->parse_depth);
9215
103k
  save_options = env->options;
9216
9217
103k
  r = prs_branch(&node, tok, term, src, end, env, group_head);
9218
103k
  if (r < 0) {
9219
11.3k
    onig_node_free(node);
9220
11.3k
    return r;
9221
11.3k
  }
9222
9223
91.7k
  if (r == term) {
9224
79.8k
    *top = node;
9225
79.8k
  }
9226
11.9k
  else if (r == TK_ALT) {
9227
11.0k
    *top  = onig_node_new_alt(node, NULL);
9228
11.0k
    if (IS_NULL(*top)) {
9229
0
      onig_node_free(node);
9230
0
      return ONIGERR_MEMORY;
9231
0
    }
9232
9233
11.0k
    headp = &(ND_CDR(*top));
9234
33.9k
    while (r == TK_ALT) {
9235
23.0k
      r = fetch_token(tok, src, end, env);
9236
23.0k
      if (r < 0) return r;
9237
23.0k
      r = prs_branch(&node, tok, term, src, end, env, FALSE);
9238
23.0k
      if (r < 0) {
9239
152
        onig_node_free(node);
9240
152
        return r;
9241
152
      }
9242
22.8k
      *headp = onig_node_new_alt(node, NULL);
9243
22.8k
      if (IS_NULL(*headp)) {
9244
0
        onig_node_free(node);
9245
0
        onig_node_free(*top);
9246
0
        *top = NULL_NODE;
9247
0
        return ONIGERR_MEMORY;
9248
0
      }
9249
9250
22.8k
      headp = &(ND_CDR(*headp));
9251
22.8k
    }
9252
9253
10.9k
    if (tok->type != (enum TokenSyms )term)
9254
38
      goto err;
9255
10.9k
  }
9256
856
  else {
9257
856
    onig_node_free(node);
9258
894
  err:
9259
894
    if (term == TK_SUBEXP_CLOSE)
9260
894
      return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
9261
0
    else
9262
0
      return ONIGERR_PARSER_BUG;
9263
894
  }
9264
9265
90.6k
  env->options = save_options;
9266
90.6k
  DEC_PARSE_DEPTH(env->parse_depth);
9267
90.6k
  return r;
9268
91.7k
}
9269
9270
static int
9271
prs_regexp(Node** top, UChar** src, UChar* end, ParseEnv* env)
9272
51.2k
{
9273
51.2k
  int r;
9274
51.2k
  PToken tok;
9275
9276
51.2k
  ptoken_init(&tok);
9277
51.2k
  r = fetch_token(&tok, src, end, env);
9278
51.2k
  if (r < 0) return r;
9279
49.2k
  r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);
9280
49.2k
  if (r < 0) return r;
9281
9282
38.9k
  return 0;
9283
49.2k
}
9284
9285
#ifdef USE_CALL
9286
static int
9287
make_call_zero_body(Node* node, ParseEnv* env, Node** rnode)
9288
2.15k
{
9289
2.15k
  int r;
9290
9291
2.15k
  Node* x = node_new_memory(0 /* 0: is not named */);
9292
2.15k
  CHECK_NULL_RETURN_MEMERR(x);
9293
9294
2.15k
  ND_BODY(x) = node;
9295
2.15k
  BAG_(x)->m.regnum = 0;
9296
2.15k
  r = scan_env_set_mem_node(env, 0, x);
9297
2.15k
  if (r != 0) {
9298
0
    onig_node_free(x);
9299
0
    return r;
9300
0
  }
9301
9302
2.15k
  *rnode = x;
9303
2.15k
  return 0;
9304
2.15k
}
9305
#endif
9306
9307
extern int
9308
onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
9309
                regex_t* reg, ParseEnv* env)
9310
51.3k
{
9311
51.3k
  int r;
9312
51.3k
  UChar* p;
9313
51.3k
#ifdef USE_CALLOUT
9314
51.3k
  RegexExt* ext;
9315
51.3k
#endif
9316
9317
51.3k
  reg->string_pool        = 0;
9318
51.3k
  reg->string_pool_end    = 0;
9319
51.3k
  reg->num_mem            = 0;
9320
51.3k
  reg->num_repeat         = 0;
9321
51.3k
  reg->num_empty_check    = 0;
9322
51.3k
  reg->repeat_range_alloc = 0;
9323
51.3k
  reg->repeat_range       = (RepeatRange* )NULL;
9324
9325
51.3k
  names_clear(reg);
9326
9327
51.3k
  scan_env_clear(env);
9328
51.3k
  env->options        = reg->options;
9329
51.3k
  env->case_fold_flag = reg->case_fold_flag;
9330
51.3k
  env->enc            = reg->enc;
9331
51.3k
  env->syntax         = reg->syntax;
9332
51.3k
  env->pattern        = (UChar* )pattern;
9333
51.3k
  env->pattern_end    = (UChar* )end;
9334
51.3k
  env->reg            = reg;
9335
9336
51.3k
  *root = NULL;
9337
9338
51.3k
  if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
9339
110
    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
9340
9341
51.2k
  p = (UChar* )pattern;
9342
51.2k
  r = prs_regexp(root, &p, (UChar* )end, env);
9343
51.2k
  if (r != 0) return r;
9344
9345
38.9k
#ifdef USE_CALL
9346
38.9k
  if ((env->flags & PE_FLAG_HAS_CALL_ZERO) != 0) {
9347
2.15k
    Node* zero_node;
9348
2.15k
    r = make_call_zero_body(*root, env, &zero_node);
9349
2.15k
    if (r != 0) return r;
9350
9351
2.15k
    *root = zero_node;
9352
2.15k
  }
9353
38.9k
#endif
9354
9355
38.9k
  reg->num_mem = env->num_mem;
9356
9357
38.9k
#ifdef USE_CALLOUT
9358
38.9k
  ext = reg->extp;
9359
38.9k
  if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
9360
2.24k
    r = setup_ext_callout_list_values(reg);
9361
2.24k
  }
9362
38.9k
#endif
9363
9364
38.9k
  return r;
9365
38.9k
}
9366
9367
extern void
9368
onig_scan_env_set_error_string(ParseEnv* env, int ecode ARG_UNUSED,
9369
                               UChar* arg, UChar* arg_end)
9370
2.78k
{
9371
2.78k
  env->error     = arg;
9372
2.78k
  env->error_end = arg_end;
9373
2.78k
}