Coverage Report

Created: 2024-05-20 06:21

/src/jq/modules/oniguruma/src/regparse.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regparse.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#ifdef DEBUG_ND_FREE
31
#ifndef NEED_TO_INCLUDE_STDIO
32
#define NEED_TO_INCLUDE_STDIO
33
#endif
34
#endif
35
36
#include "regparse.h"
37
#include "st.h"
38
39
0
#define INIT_TAG_NAMES_ALLOC_NUM   5
40
41
0
#define WARN_BUFSIZE    256
42
43
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
44
45
#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \
46
6.60k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */)
47
#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \
48
1.21k
  ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_')
49
50
255k
#define OPTON_SINGLELINE(option)     ((option) & ONIG_OPTION_SINGLELINE)
51
2.35M
#define OPTON_MULTILINE(option)      ((option) & ONIG_OPTION_MULTILINE)
52
6.04M
#define OPTON_IGNORECASE(option)     ((option) & ONIG_OPTION_IGNORECASE)
53
737k
#define OPTON_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
54
#define OPTON_WORD_ASCII(option) \
55
541k
  ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
56
#define OPTON_DIGIT_ASCII(option) \
57
8.56k
  ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
58
#define OPTON_SPACE_ASCII(option) \
59
2.09k
  ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII))
60
14.1k
#define OPTON_POSIX_ASCII(option)    ((option) & ONIG_OPTION_POSIX_IS_ASCII)
61
8.78k
#define OPTON_TEXT_SEGMENT_WORD(option)  ((option) & ONIG_OPTION_TEXT_SEGMENT_WORD)
62
63
#define OPTON_IS_ASCII_MODE_CTYPE(ctype, options) \
64
2.36M
  ((ctype) >= 0 && \
65
2.36M
  (((ctype) < ONIGENC_CTYPE_ASCII  && OPTON_POSIX_ASCII(options)) ||\
66
14.1k
   ((ctype) == ONIGENC_CTYPE_WORD  && OPTON_WORD_ASCII(options))  ||\
67
14.1k
   ((ctype) == ONIGENC_CTYPE_DIGIT && OPTON_DIGIT_ASCII(options)) ||\
68
14.1k
   ((ctype) == ONIGENC_CTYPE_SPACE && OPTON_SPACE_ASCII(options))))
69
70
71
OnigSyntaxType OnigSyntaxOniguruma = {
72
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
73
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
74
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
75
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
76
     ONIG_SYN_OP_ESC_C_CONTROL )
77
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
78
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
79
      ONIG_SYN_OP2_OPTION_ONIGURUMA |
80
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
81
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
82
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
83
      ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
84
      ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME    |
85
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
86
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
87
      ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
88
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
89
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
90
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
91
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
92
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
93
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
94
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
95
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
96
  , ( SYN_GNU_REGEX_BV |
97
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
98
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
99
      ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND |
100
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
101
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
102
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
103
      ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
104
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
105
#ifdef USE_WHOLE_OPTIONS
106
      ONIG_SYN_WHOLE_OPTIONS |
107
#endif
108
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
109
    )
110
  , ONIG_OPTION_NONE
111
  ,
112
  {
113
      (OnigCodePoint )'\\'                       /* esc */
114
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
115
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
116
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
117
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
118
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
119
  }
120
};
121
122
OnigSyntaxType OnigSyntaxRuby = {
123
  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
124
     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
125
     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
126
     ONIG_SYN_OP_ESC_CONTROL_CHARS |
127
     ONIG_SYN_OP_ESC_C_CONTROL )
128
   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
129
  , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
130
      ONIG_SYN_OP2_OPTION_RUBY |
131
      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
132
      ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
133
      ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
134
      ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
135
      ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
136
      ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
137
      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
138
      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
139
      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
140
      ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
141
      ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
142
      ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
143
      ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 )
144
  , ( SYN_GNU_REGEX_BV |
145
      ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
146
      ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
147
      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
148
      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
149
      ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
150
      ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
151
      ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
152
  , ONIG_OPTION_NONE
153
  ,
154
  {
155
      (OnigCodePoint )'\\'                       /* esc */
156
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
157
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
158
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
159
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
160
    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
161
  }
162
};
163
164
OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_ONIGURUMA;
165
166
167
1.21M
#define BB_INIT(buf,size)    bbuf_init((BBuf* )(buf), (size))
168
169
43.3k
#define BB_EXPAND(buf,low) do{\
170
43.3k
  do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
171
43.3k
  (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
172
43.3k
  if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
173
43.3k
} while (0)
174
175
23.2M
#define BB_ENSURE_SIZE(buf,size) do{\
176
23.2M
  unsigned int new_alloc = (buf)->alloc;\
177
23.7M
  while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
178
23.2M
  if ((buf)->alloc != new_alloc) {\
179
517k
    (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
180
517k
    if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
181
517k
    (buf)->alloc = new_alloc;\
182
517k
  }\
183
23.2M
} while (0)
184
185
71.0M
#define BB_WRITE(buf,pos,bytes,n) do{\
186
71.0M
  int used = (pos) + (n);\
187
71.0M
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
188
71.0M
  xmemcpy((buf)->p + (pos), (bytes), (n));\
189
71.0M
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
190
71.0M
} while (0)
191
192
#define BB_WRITE1(buf,pos,byte) do{\
193
  int used = (pos) + 1;\
194
  if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\
195
  (buf)->p[(pos)] = (byte);\
196
  if ((buf)->used < (unsigned int )used) (buf)->used = used;\
197
} while (0)
198
199
#define BB_ADD(buf,bytes,n)       BB_WRITE((buf),(buf)->used,(bytes),(n))
200
#define BB_ADD1(buf,byte)         BB_WRITE1((buf),(buf)->used,(byte))
201
#define BB_GET_ADD_ADDRESS(buf)   ((buf)->p + (buf)->used)
202
#define BB_GET_OFFSET_POS(buf)    ((buf)->used)
203
204
/* from < to */
205
952k
#define BB_MOVE_RIGHT(buf,from,to,n) do {\
206
952k
  if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\
207
952k
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
208
952k
  if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
209
952k
} while (0)
210
211
/* from > to */
212
#define BB_MOVE_LEFT(buf,from,to,n) do {\
213
  xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
214
} while (0)
215
216
/* from > to */
217
1.67k
#define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\
218
1.67k
  xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
219
1.67k
  (buf)->used -= (from - to);\
220
1.67k
} while (0)
221
222
#define BB_INSERT(buf,pos,bytes,n) do {\
223
  if (pos >= (buf)->used) {\
224
    BB_WRITE(buf,pos,bytes,n);\
225
  }\
226
  else {\
227
    BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
228
    xmemcpy((buf)->p + (pos), (bytes), (n));\
229
  }\
230
} while (0)
231
232
#define BB_GET_BYTE(buf, pos) (buf)->p[(pos)]
233
234
235
typedef enum {
236
  CS_VALUE,
237
  CS_RANGE,
238
  CS_COMPLETE,
239
  CS_START
240
} CSTATE;
241
242
typedef enum {
243
  CV_UNDEF,
244
  CV_SB,
245
  CV_MB,
246
  CV_CPROP
247
} CVAL;
248
249
0
extern void onig_null_warn(const char* s ARG_UNUSED) { }
250
251
#ifdef DEFAULT_WARN_FUNCTION
252
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
253
#else
254
static OnigWarnFunc onig_warn = onig_null_warn;
255
#endif
256
257
#ifdef DEFAULT_VERB_WARN_FUNCTION
258
static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
259
#else
260
static OnigWarnFunc onig_verb_warn = onig_null_warn;
261
#endif
262
263
extern void onig_set_warn_func(OnigWarnFunc f)
264
0
{
265
0
  onig_warn = f;
266
0
}
267
268
extern void onig_set_verb_warn_func(OnigWarnFunc f)
269
0
{
270
0
  onig_verb_warn = f;
271
0
}
272
273
extern void
274
onig_warning(const char* s)
275
2
{
276
2
  if (onig_warn == onig_null_warn) return ;
277
278
0
  (*onig_warn)(s);
279
0
}
280
281
#define DEFAULT_MAX_CAPTURE_NUM   32767
282
283
static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM;
284
285
extern int
286
onig_set_capture_num_limit(int num)
287
0
{
288
0
  if (num < 0) return -1;
289
290
0
  MaxCaptureNum = num;
291
0
  return 0;
292
0
}
293
294
static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
295
296
extern unsigned int
297
onig_get_parse_depth_limit(void)
298
0
{
299
0
  return ParseDepthLimit;
300
0
}
301
302
extern int
303
onig_set_parse_depth_limit(unsigned int depth)
304
0
{
305
0
  if (depth == 0)
306
0
    ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
307
0
  else
308
0
    ParseDepthLimit = depth;
309
0
  return 0;
310
0
}
311
312
#ifdef ONIG_DEBUG_PARSE
313
#define INC_PARSE_DEPTH(d) do {\
314
  (d)++;\
315
  if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
316
  if ((d) > ParseDepthLimit) \
317
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
318
} while (0)
319
#else
320
9.42M
#define INC_PARSE_DEPTH(d) do {\
321
9.42M
  (d)++;\
322
9.42M
  if ((d) > ParseDepthLimit) \
323
9.42M
    return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
324
9.42M
} while (0)
325
#endif
326
327
3.85M
#define DEC_PARSE_DEPTH(d)  (d)--
328
329
330
static int
331
bbuf_init(BBuf* buf, int size)
332
1.21M
{
333
1.21M
  if (size <= 0) {
334
0
    size   = 0;
335
0
    buf->p = NULL;
336
0
  }
337
1.21M
  else {
338
1.21M
    buf->p = (UChar* )xmalloc(size);
339
1.21M
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
340
1.21M
  }
341
342
1.21M
  buf->alloc = size;
343
1.21M
  buf->used  = 0;
344
1.21M
  return 0;
345
1.21M
}
346
347
static void
348
bbuf_free(BBuf* bbuf)
349
1.21M
{
350
1.21M
  if (IS_NOT_NULL(bbuf)) {
351
1.21M
    if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
352
1.21M
    xfree(bbuf);
353
1.21M
  }
354
1.21M
}
355
356
static int
357
bbuf_clone(BBuf** rto, BBuf* from)
358
0
{
359
0
  int r;
360
0
  BBuf *to;
361
362
0
  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
363
0
  CHECK_NULL_RETURN_MEMERR(to);
364
0
  r = BB_INIT(to, from->alloc);
365
0
  if (r != 0) {
366
0
    bbuf_free(to);
367
0
    *rto = 0;
368
0
    return r;
369
0
  }
370
0
  to->used = from->used;
371
0
  xmemcpy(to->p, from->p, from->used);
372
0
  return 0;
373
0
}
374
375
static int
376
backref_rel_to_abs(int rel_no, ParseEnv* env)
377
392
{
378
392
  if (rel_no > 0) {
379
43
    if (rel_no > ONIG_INT_MAX - env->num_mem)
380
0
      return ONIGERR_INVALID_BACKREF;
381
43
    return env->num_mem + rel_no;
382
43
  }
383
349
  else {
384
349
    return env->num_mem + 1 + rel_no;
385
349
  }
386
392
}
387
388
#define OPTION_ON(v,f)     ((v) |= (f))
389
#define OPTION_OFF(v,f)    ((v) &= ~(f))
390
391
19.1k
#define OPTION_NEGATE(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
392
393
#define MBCODE_START_POS(enc) \
394
0
  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
395
396
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
397
0
  add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
398
399
0
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
400
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
401
0
    r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
402
0
    if (r != 0) return r;\
403
0
  }\
404
0
} while (0)
405
406
407
0
#define BITSET_IS_EMPTY(bs,empty) do {\
408
0
  int i;\
409
0
  empty = 1;\
410
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {\
411
0
    if ((bs)[i] != 0) {\
412
0
      empty = 0; break;\
413
0
    }\
414
0
  }\
415
0
} while (0)
416
417
static void
418
bitset_set_range(BitSetRef bs, int from, int to)
419
40.4k
{
420
40.4k
  int i;
421
3.01M
  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
422
2.97M
    BITSET_SET_BIT(bs, i);
423
2.97M
  }
424
40.4k
}
425
426
static void
427
bitset_invert(BitSetRef bs)
428
0
{
429
0
  int i;
430
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { bs[i] = ~(bs[i]); }
431
0
}
432
433
static void
434
bitset_invert_to(BitSetRef from, BitSetRef to)
435
0
{
436
0
  int i;
437
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { to[i] = ~(from[i]); }
438
0
}
439
440
static void
441
bitset_and(BitSetRef dest, BitSetRef bs)
442
0
{
443
0
  int i;
444
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] &= bs[i]; }
445
0
}
446
447
static void
448
bitset_or(BitSetRef dest, BitSetRef bs)
449
0
{
450
0
  int i;
451
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] |= bs[i]; }
452
0
}
453
454
static void
455
bitset_copy(BitSetRef dest, BitSetRef bs)
456
0
{
457
0
  int i;
458
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) { dest[i] = bs[i]; }
459
0
}
460
461
extern int
462
onig_strncmp(const UChar* s1, const UChar* s2, int n)
463
0
{
464
0
  int x;
465
466
0
  while (n-- > 0) {
467
0
    x = *s2++ - *s1++;
468
0
    if (x) return x;
469
0
  }
470
0
  return 0;
471
0
}
472
473
extern void
474
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
475
80.4M
{
476
80.4M
  int len = (int )(end - src);
477
80.4M
  if (len > 0) {
478
80.4M
    xmemcpy(dest, src, len);
479
80.4M
    dest[len] = (UChar )0;
480
80.4M
  }
481
80.4M
}
482
483
/* scan pattern methods */
484
6.28k
#define PEND_VALUE   0
485
486
92.7M
#define PFETCH_READY  UChar* pfetch_prev
487
113M
#define PEND         (p < end ?  0 : 1)
488
1.82M
#define PUNFETCH     p = pfetch_prev
489
0
#define PPREV        pfetch_prev
490
275k
#define PINC       do { \
491
275k
  pfetch_prev = p; \
492
275k
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
493
275k
} while (0)
494
92.9M
#define PFETCH(c)  do { \
495
92.9M
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
496
92.9M
  pfetch_prev = p; \
497
92.9M
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
498
92.9M
} while (0)
499
500
389
#define PINC_S     do { \
501
389
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
502
389
} while (0)
503
3.19M
#define PFETCH_S(c) do { \
504
3.19M
  c = ONIGENC_MBC_TO_CODE(enc, p, end); \
505
3.19M
  p += ONIGENC_MBC_ENC_LEN(enc, p); \
506
3.19M
} while (0)
507
508
8.74M
#define PPEEK        (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
509
13.9M
#define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
510
511
static UChar*
512
strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
513
            int capa)
514
46.8M
{
515
46.8M
  UChar* r;
516
46.8M
  ptrdiff_t dest_delta = dest_end - dest;
517
518
46.8M
  if (dest)
519
46.8M
    r = (UChar* )xrealloc(dest, capa + 1);
520
0
  else
521
0
    r = (UChar* )xmalloc(capa + 1);
522
523
46.8M
  CHECK_NULL_RETURN(r);
524
46.8M
  onig_strcpy(r + dest_delta, src, src_end);
525
46.8M
  return r;
526
46.8M
}
527
528
/* dest on static area */
529
static UChar*
530
strcat_capa_from_static(UChar* dest, UChar* dest_end,
531
                        const UChar* src, const UChar* src_end, int capa)
532
593k
{
533
593k
  UChar* r;
534
535
593k
  r = (UChar* )xmalloc(capa + 1);
536
593k
  CHECK_NULL_RETURN(r);
537
593k
  onig_strcpy(r, dest, dest_end);
538
593k
  onig_strcpy(r + (dest_end - dest), src, src_end);
539
593k
  return r;
540
593k
}
541
542
543
#ifdef USE_ST_LIBRARY
544
545
typedef struct {
546
  UChar* s;
547
  UChar* end;
548
} st_str_end_key;
549
550
static int
551
str_end_cmp(st_str_end_key* x, st_str_end_key* y)
552
1.93k
{
553
1.93k
  UChar *p, *q;
554
1.93k
  int c;
555
556
1.93k
  if ((x->end - x->s) != (y->end - y->s))
557
0
    return 1;
558
559
1.93k
  p = x->s;
560
1.93k
  q = y->s;
561
1.26M
  while (p < x->end) {
562
1.26M
    c = (int )*p - (int )*q;
563
1.26M
    if (c != 0) return c;
564
565
1.26M
    p++; q++;
566
1.26M
  }
567
568
1.93k
  return 0;
569
1.93k
}
570
571
static int
572
str_end_hash(st_str_end_key* x)
573
3.49k
{
574
3.49k
  UChar *p;
575
3.49k
  unsigned val = 0;
576
577
3.49k
  p = x->s;
578
3.24M
  while (p < x->end) {
579
3.24M
    val = val * 997 + (unsigned )*p++;
580
3.24M
  }
581
582
3.49k
  return (int) (val + (val >> 5));
583
3.49k
}
584
585
extern hash_table_type
586
onig_st_init_strend_table_with_size(int size)
587
145
{
588
145
  static struct st_hash_type hashType = {
589
145
    str_end_cmp,
590
145
    str_end_hash,
591
145
  };
592
593
145
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
594
145
}
595
596
extern int
597
onig_st_lookup_strend(hash_table_type table, const UChar* str_key,
598
                      const UChar* end_key, hash_data_type *value)
599
2.64k
{
600
2.64k
  st_str_end_key key;
601
602
2.64k
  key.s   = (UChar* )str_key;
603
2.64k
  key.end = (UChar* )end_key;
604
605
2.64k
  return onig_st_lookup(table, (st_data_t )(&key), value);
606
2.64k
}
607
608
extern int
609
onig_st_insert_strend(hash_table_type table, const UChar* str_key,
610
                      const UChar* end_key, hash_data_type value)
611
857
{
612
857
  st_str_end_key* key;
613
857
  int result;
614
615
857
  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
616
857
  CHECK_NULL_RETURN_MEMERR(key);
617
618
857
  key->s   = (UChar* )str_key;
619
857
  key->end = (UChar* )end_key;
620
857
  result = onig_st_insert(table, (st_data_t )key, value);
621
857
  if (result) {
622
0
    xfree(key);
623
0
  }
624
857
  return result;
625
857
}
626
627
628
#ifdef USE_CALLOUT
629
630
typedef struct {
631
  OnigEncoding enc;
632
  int    type; /* callout type: single or not */
633
  UChar* s;
634
  UChar* end;
635
} st_callout_name_key;
636
637
static int
638
callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y)
639
0
{
640
0
  UChar *p, *q;
641
0
  int c;
642
643
0
  if (x->enc  != y->enc)  return 1;
644
0
  if (x->type != y->type) return 1;
645
0
  if ((x->end - x->s) != (y->end - y->s))
646
0
    return 1;
647
648
0
  p = x->s;
649
0
  q = y->s;
650
0
  while (p < x->end) {
651
0
    c = (int )*p - (int )*q;
652
0
    if (c != 0) return c;
653
654
0
    p++; q++;
655
0
  }
656
657
0
  return 0;
658
0
}
659
660
static int
661
callout_name_table_hash(st_callout_name_key* x)
662
108
{
663
108
  UChar *p;
664
108
  unsigned int val = 0;
665
666
108
  p = x->s;
667
3.64k
  while (p < x->end) {
668
3.54k
    val = val * 997 + (unsigned int )*p++;
669
3.54k
  }
670
671
  /* use intptr_t for escape warning in Windows */
672
108
  return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
673
108
}
674
675
extern hash_table_type
676
onig_st_init_callout_name_table_with_size(int size)
677
2
{
678
2
  static struct st_hash_type hashType = {
679
2
    callout_name_table_cmp,
680
2
    callout_name_table_hash,
681
2
  };
682
683
2
  return (hash_table_type )onig_st_init_table_with_size(&hashType, size);
684
2
}
685
686
extern int
687
onig_st_lookup_callout_name_table(hash_table_type table,
688
                                  OnigEncoding enc,
689
                                  int type,
690
                                  const UChar* str_key,
691
                                  const UChar* end_key,
692
                                  hash_data_type *value)
693
94
{
694
94
  st_callout_name_key key;
695
696
94
  key.enc  = enc;
697
94
  key.type = type;
698
94
  key.s    = (UChar* )str_key;
699
94
  key.end  = (UChar* )end_key;
700
701
94
  return onig_st_lookup(table, (st_data_t )(&key), value);
702
94
}
703
704
static int
705
st_insert_callout_name_table(hash_table_type table,
706
                             OnigEncoding enc, int type,
707
                             UChar* str_key, UChar* end_key,
708
                             hash_data_type value)
709
14
{
710
14
  st_callout_name_key* key;
711
14
  int result;
712
713
14
  key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key));
714
14
  CHECK_NULL_RETURN_MEMERR(key);
715
716
  /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */
717
14
  key->enc  = enc;
718
14
  key->type = type;
719
14
  key->s    = str_key;
720
14
  key->end  = end_key;
721
14
  result = onig_st_insert(table, (st_data_t )key, value);
722
14
  if (result) {
723
0
    xfree(key);
724
0
  }
725
14
  return result;
726
14
}
727
#endif
728
729
#endif /* USE_ST_LIBRARY */
730
731
732
315
#define INIT_NAME_BACKREFS_ALLOC_NUM   8
733
734
typedef struct {
735
  UChar* name;
736
  int    name_len;   /* byte length */
737
  int    back_num;   /* number of backrefs */
738
  int    back_alloc;
739
  int    back_ref1;
740
  int*   back_refs;
741
} NameEntry;
742
743
#ifdef USE_ST_LIBRARY
744
745
147
#define INIT_NAMES_ALLOC_NUM    5
746
747
typedef st_table  NameTable;
748
typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
749
750
#define NAMEBUF_SIZE    24
751
#define NAMEBUF_SIZE_1  25
752
753
#ifdef ONIG_DEBUG
754
static int
755
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
756
{
757
  int i;
758
  FILE* fp = (FILE* )arg;
759
760
  fprintf(fp, "%s: ", e->name);
761
  if (e->back_num == 0)
762
    fputs("-", fp);
763
  else if (e->back_num == 1)
764
    fprintf(fp, "%d", e->back_ref1);
765
  else {
766
    for (i = 0; i < e->back_num; i++) {
767
      if (i > 0) fprintf(fp, ", ");
768
      fprintf(fp, "%d", e->back_refs[i]);
769
    }
770
  }
771
  fputs("\n", fp);
772
  return ST_CONTINUE;
773
}
774
775
extern int
776
onig_print_names(FILE* fp, regex_t* reg)
777
{
778
  NameTable* t = (NameTable* )reg->name_table;
779
780
  if (IS_NOT_NULL(t)) {
781
    fprintf(fp, "name table\n");
782
    onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
783
    fputs("\n", fp);
784
  }
785
  return 0;
786
}
787
#endif /* ONIG_DEBUG */
788
789
static int
790
i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
791
857
{
792
857
  xfree(e->name);
793
857
  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
794
857
  xfree(key);
795
857
  xfree(e);
796
857
  return ST_DELETE;
797
857
}
798
799
static int
800
names_clear(regex_t* reg)
801
2.74M
{
802
2.74M
  NameTable* t = (NameTable* )reg->name_table;
803
804
2.74M
  if (IS_NOT_NULL(t)) {
805
145
    onig_st_foreach(t, i_free_name_entry, 0);
806
145
  }
807
2.74M
  return 0;
808
2.74M
}
809
810
extern int
811
onig_names_free(regex_t* reg)
812
1.37M
{
813
1.37M
  int r;
814
1.37M
  NameTable* t;
815
816
1.37M
  r = names_clear(reg);
817
1.37M
  if (r != 0) return r;
818
819
1.37M
  t = (NameTable* )reg->name_table;
820
1.37M
  if (IS_NOT_NULL(t)) onig_st_free_table(t);
821
1.37M
  reg->name_table = (void* )NULL;
822
1.37M
  return 0;
823
1.37M
}
824
825
static NameEntry*
826
name_find(regex_t* reg, const UChar* name, const UChar* name_end)
827
2.79k
{
828
2.79k
  NameEntry* e;
829
2.79k
  NameTable* t = (NameTable* )reg->name_table;
830
831
2.79k
  e = (NameEntry* )NULL;
832
2.79k
  if (IS_NOT_NULL(t)) {
833
2.64k
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
834
2.64k
  }
835
2.79k
  return e;
836
2.79k
}
837
838
typedef struct {
839
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
840
  regex_t* reg;
841
  void* arg;
842
  int ret;
843
  OnigEncoding enc;
844
} INamesArg;
845
846
static int
847
i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
848
8.28k
{
849
8.28k
  int r = (*(arg->func))(e->name,
850
8.28k
                         e->name + e->name_len,
851
8.28k
                         e->back_num,
852
8.28k
                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
853
8.28k
                         arg->reg, arg->arg);
854
8.28k
  if (r != 0) {
855
0
    arg->ret = r;
856
0
    return ST_STOP;
857
0
  }
858
8.28k
  return ST_CONTINUE;
859
8.28k
}
860
861
extern int
862
onig_foreach_name(regex_t* reg,
863
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
864
1.97M
{
865
1.97M
  INamesArg narg;
866
1.97M
  NameTable* t = (NameTable* )reg->name_table;
867
868
1.97M
  narg.ret = 0;
869
1.97M
  if (IS_NOT_NULL(t)) {
870
5.14k
    narg.func = func;
871
5.14k
    narg.reg  = reg;
872
5.14k
    narg.arg  = arg;
873
5.14k
    narg.enc  = reg->enc; /* should be pattern encoding. */
874
5.14k
    onig_st_foreach(t, i_names, (HashDataType )&narg);
875
5.14k
  }
876
1.97M
  return narg.ret;
877
1.97M
}
878
879
static int
880
i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumMap* map)
881
0
{
882
0
  int i;
883
884
0
  if (e->back_num > 1) {
885
0
    for (i = 0; i < e->back_num; i++) {
886
0
      e->back_refs[i] = map[e->back_refs[i]].new_val;
887
0
    }
888
0
  }
889
0
  else if (e->back_num == 1) {
890
0
    e->back_ref1 = map[e->back_ref1].new_val;
891
0
  }
892
893
0
  return ST_CONTINUE;
894
0
}
895
896
extern int
897
onig_renumber_name_table(regex_t* reg, GroupNumMap* map)
898
0
{
899
0
  NameTable* t = (NameTable* )reg->name_table;
900
901
0
  if (IS_NOT_NULL(t)) {
902
0
    onig_st_foreach(t, i_renumber_name, (HashDataType )map);
903
0
  }
904
0
  return 0;
905
0
}
906
907
908
extern int
909
onig_number_of_names(regex_t* reg)
910
0
{
911
0
  NameTable* t = (NameTable* )reg->name_table;
912
913
0
  if (IS_NOT_NULL(t))
914
0
    return t->num_entries;
915
0
  else
916
0
    return 0;
917
0
}
918
919
#else  /* USE_ST_LIBRARY */
920
921
#define INIT_NAMES_ALLOC_NUM    8
922
923
typedef struct {
924
  NameEntry* e;
925
  int        num;
926
  int        alloc;
927
} NameTable;
928
929
#ifdef ONIG_DEBUG
930
extern int
931
onig_print_names(FILE* fp, regex_t* reg)
932
{
933
  int i, j;
934
  NameEntry* e;
935
  NameTable* t = (NameTable* )reg->name_table;
936
937
  if (IS_NOT_NULL(t) && t->num > 0) {
938
    fprintf(fp, "name table\n");
939
    for (i = 0; i < t->num; i++) {
940
      e = &(t->e[i]);
941
      fprintf(fp, "%s: ", e->name);
942
      if (e->back_num == 0) {
943
        fputs("-", fp);
944
      }
945
      else if (e->back_num == 1) {
946
        fprintf(fp, "%d", e->back_ref1);
947
      }
948
      else {
949
        for (j = 0; j < e->back_num; j++) {
950
          if (j > 0) fprintf(fp, ", ");
951
          fprintf(fp, "%d", e->back_refs[j]);
952
        }
953
      }
954
      fputs("\n", fp);
955
    }
956
    fputs("\n", fp);
957
  }
958
  return 0;
959
}
960
#endif
961
962
static int
963
names_clear(regex_t* reg)
964
{
965
  int i;
966
  NameEntry* e;
967
  NameTable* t = (NameTable* )reg->name_table;
968
969
  if (IS_NOT_NULL(t)) {
970
    for (i = 0; i < t->num; i++) {
971
      e = &(t->e[i]);
972
      if (IS_NOT_NULL(e->name)) {
973
        xfree(e->name);
974
        e->name       = NULL;
975
        e->name_len   = 0;
976
        e->back_num   = 0;
977
        e->back_alloc = 0;
978
        if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
979
        e->back_refs = (int* )NULL;
980
      }
981
    }
982
    if (IS_NOT_NULL(t->e)) {
983
      xfree(t->e);
984
      t->e = NULL;
985
    }
986
    t->num = 0;
987
  }
988
  return 0;
989
}
990
991
extern int
992
onig_names_free(regex_t* reg)
993
{
994
  int r;
995
  NameTable* t;
996
997
  r = names_clear(reg);
998
  if (r != 0) return r;
999
1000
  t = (NameTable* )reg->name_table;
1001
  if (IS_NOT_NULL(t)) xfree(t);
1002
  reg->name_table = NULL;
1003
  return 0;
1004
}
1005
1006
static NameEntry*
1007
name_find(regex_t* reg, UChar* name, UChar* name_end)
1008
{
1009
  int i, len;
1010
  NameEntry* e;
1011
  NameTable* t = (NameTable* )reg->name_table;
1012
1013
  if (IS_NOT_NULL(t)) {
1014
    len = name_end - name;
1015
    for (i = 0; i < t->num; i++) {
1016
      e = &(t->e[i]);
1017
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1018
        return e;
1019
    }
1020
  }
1021
  return (NameEntry* )NULL;
1022
}
1023
1024
extern int
1025
onig_foreach_name(regex_t* reg,
1026
  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
1027
{
1028
  int i, r;
1029
  NameEntry* e;
1030
  NameTable* t = (NameTable* )reg->name_table;
1031
1032
  if (IS_NOT_NULL(t)) {
1033
    for (i = 0; i < t->num; i++) {
1034
      e = &(t->e[i]);
1035
      r = (*func)(e->name, e->name + e->name_len, e->back_num,
1036
                  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
1037
                  reg, arg);
1038
      if (r != 0) return r;
1039
    }
1040
  }
1041
  return 0;
1042
}
1043
1044
extern int
1045
onig_number_of_names(regex_t* reg)
1046
{
1047
  NameTable* t = (NameTable* )reg->name_table;
1048
1049
  if (IS_NOT_NULL(t))
1050
    return t->num;
1051
  else
1052
    return 0;
1053
}
1054
1055
#endif /* else USE_ST_LIBRARY */
1056
1057
static int
1058
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ParseEnv* env)
1059
2.78k
{
1060
2.78k
  int r;
1061
2.78k
  int alloc;
1062
2.78k
  NameEntry* e;
1063
2.78k
  NameTable* t = (NameTable* )reg->name_table;
1064
1065
2.78k
  if (name_end - name <= 0)
1066
0
    return ONIGERR_EMPTY_GROUP_NAME;
1067
1068
2.78k
  e = name_find(reg, name, name_end);
1069
2.78k
  if (IS_NULL(e)) {
1070
857
#ifdef USE_ST_LIBRARY
1071
857
    if (IS_NULL(t)) {
1072
145
      t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM);
1073
145
      CHECK_NULL_RETURN_MEMERR(t);
1074
145
      reg->name_table = (void* )t;
1075
145
    }
1076
857
    e = (NameEntry* )xmalloc(sizeof(NameEntry));
1077
857
    CHECK_NULL_RETURN_MEMERR(e);
1078
1079
857
    e->name = onigenc_strdup(reg->enc, name, name_end);
1080
857
    if (IS_NULL(e->name)) {
1081
0
      xfree(e);  return ONIGERR_MEMORY;
1082
0
    }
1083
857
    r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
1084
857
                              (HashDataType )e);
1085
857
    if (r < 0) return r;
1086
1087
857
    e->name_len   = (int )(name_end - name);
1088
857
    e->back_num   = 0;
1089
857
    e->back_alloc = 0;
1090
857
    e->back_refs  = (int* )NULL;
1091
1092
#else
1093
1094
    if (IS_NULL(t)) {
1095
      alloc = INIT_NAMES_ALLOC_NUM;
1096
      t = (NameTable* )xmalloc(sizeof(NameTable));
1097
      CHECK_NULL_RETURN_MEMERR(t);
1098
      t->e     = NULL;
1099
      t->alloc = 0;
1100
      t->num   = 0;
1101
1102
      t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
1103
      if (IS_NULL(t->e)) {
1104
        xfree(t);
1105
        return ONIGERR_MEMORY;
1106
      }
1107
      t->alloc = alloc;
1108
      reg->name_table = t;
1109
      goto clear;
1110
    }
1111
    else if (t->num == t->alloc) {
1112
      int i;
1113
1114
      alloc = t->alloc * 2;
1115
      t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
1116
      CHECK_NULL_RETURN_MEMERR(t->e);
1117
      t->alloc = alloc;
1118
1119
    clear:
1120
      for (i = t->num; i < t->alloc; i++) {
1121
        t->e[i].name       = NULL;
1122
        t->e[i].name_len   = 0;
1123
        t->e[i].back_num   = 0;
1124
        t->e[i].back_alloc = 0;
1125
        t->e[i].back_refs  = (int* )NULL;
1126
      }
1127
    }
1128
    e = &(t->e[t->num]);
1129
    t->num++;
1130
    e->name = onigenc_strdup(reg->enc, name, name_end);
1131
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1132
    e->name_len = name_end - name;
1133
#endif
1134
857
  }
1135
1136
2.78k
  if (e->back_num >= 1 &&
1137
2.78k
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
1138
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
1139
0
                                   name, name_end);
1140
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
1141
0
  }
1142
1143
2.78k
  e->back_num++;
1144
2.78k
  if (e->back_num == 1) {
1145
857
    e->back_ref1 = backref;
1146
857
  }
1147
1.93k
  else {
1148
1.93k
    if (e->back_num == 2) {
1149
315
      alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
1150
315
      e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
1151
315
      CHECK_NULL_RETURN_MEMERR(e->back_refs);
1152
315
      e->back_alloc = alloc;
1153
315
      e->back_refs[0] = e->back_ref1;
1154
315
      e->back_refs[1] = backref;
1155
315
    }
1156
1.61k
    else {
1157
1.61k
      if (e->back_num > e->back_alloc) {
1158
92
        alloc = e->back_alloc * 2;
1159
92
        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
1160
92
        CHECK_NULL_RETURN_MEMERR(e->back_refs);
1161
92
        e->back_alloc = alloc;
1162
92
      }
1163
1.61k
      e->back_refs[e->back_num - 1] = backref;
1164
1.61k
    }
1165
1.93k
  }
1166
1167
2.78k
  return 0;
1168
2.78k
}
1169
1170
extern int
1171
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
1172
                           const UChar* name_end, int** nums)
1173
5
{
1174
5
  NameEntry* e = name_find(reg, name, name_end);
1175
1176
5
  if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
1177
1178
0
  switch (e->back_num) {
1179
0
  case 0:
1180
0
    break;
1181
0
  case 1:
1182
0
    *nums = &(e->back_ref1);
1183
0
    break;
1184
0
  default:
1185
0
    *nums = e->back_refs;
1186
0
    break;
1187
0
  }
1188
0
  return e->back_num;
1189
0
}
1190
1191
static int
1192
name_to_group_numbers(ParseEnv* env, const UChar* name, const UChar* name_end,
1193
                      int** nums)
1194
0
{
1195
0
  regex_t* reg;
1196
0
  NameEntry* e;
1197
1198
0
  reg = env->reg;
1199
0
  e = name_find(reg, name, name_end);
1200
1201
0
  if (IS_NULL(e)) {
1202
0
    onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
1203
0
                                   (UChar* )name, (UChar* )name_end);
1204
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
1205
0
  }
1206
1207
0
  switch (e->back_num) {
1208
0
  case 0:
1209
0
    break;
1210
0
  case 1:
1211
0
    *nums = &(e->back_ref1);
1212
0
    break;
1213
0
  default:
1214
0
    *nums = e->back_refs;
1215
0
    break;
1216
0
  }
1217
0
  return e->back_num;
1218
0
}
1219
1220
extern int
1221
onig_name_to_backref_number(regex_t* reg, const UChar* name,
1222
                            const UChar* name_end, OnigRegion *region)
1223
0
{
1224
0
  int i, n, *nums;
1225
1226
0
  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
1227
0
  if (n < 0)
1228
0
    return n;
1229
0
  else if (n == 0)
1230
0
    return ONIGERR_PARSER_BUG;
1231
0
  else if (n == 1)
1232
0
    return nums[0];
1233
0
  else {
1234
0
    if (IS_NOT_NULL(region)) {
1235
0
      for (i = n - 1; i >= 0; i--) {
1236
0
        if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
1237
0
          return nums[i];
1238
0
      }
1239
0
    }
1240
0
    return nums[n - 1];
1241
0
  }
1242
0
}
1243
1244
extern int
1245
onig_noname_group_capture_is_active(regex_t* reg)
1246
0
{
1247
0
  if (OPTON_DONT_CAPTURE_GROUP(reg->options))
1248
0
    return 0;
1249
1250
0
  if (onig_number_of_names(reg) > 0 &&
1251
0
      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
1252
0
      ! OPTON_CAPTURE_GROUP(reg->options)) {
1253
0
    return 0;
1254
0
  }
1255
1256
0
  return 1;
1257
0
}
1258
1259
#ifdef USE_CALLOUT
1260
1261
typedef struct {
1262
  OnigCalloutType type;
1263
  int             in;
1264
  OnigCalloutFunc start_func;
1265
  OnigCalloutFunc end_func;
1266
  int             arg_num;
1267
  int             opt_arg_num;
1268
  unsigned int    arg_types[ONIG_CALLOUT_MAX_ARGS_NUM];
1269
  OnigValue       opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM];
1270
  UChar*          name; /* reference to GlobalCalloutNameTable entry: e->name */
1271
} CalloutNameListEntry;
1272
1273
typedef struct {
1274
  int  n;
1275
  int  alloc;
1276
  CalloutNameListEntry* v;
1277
} CalloutNameListType;
1278
1279
static CalloutNameListType* GlobalCalloutNameList;
1280
1281
static int
1282
make_callout_func_list(CalloutNameListType** rs, int init_size)
1283
2
{
1284
2
  CalloutNameListType* s;
1285
2
  CalloutNameListEntry* v;
1286
1287
2
  *rs = 0;
1288
1289
2
  s = xmalloc(sizeof(*s));
1290
2
  if (IS_NULL(s)) return ONIGERR_MEMORY;
1291
1292
2
  v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size);
1293
2
  if (IS_NULL(v)) {
1294
0
    xfree(s);
1295
0
    return ONIGERR_MEMORY;
1296
0
  }
1297
1298
2
  s->n = 0;
1299
2
  s->alloc = init_size;
1300
2
  s->v = v;
1301
1302
2
  *rs = s;
1303
2
  return ONIG_NORMAL;
1304
2
}
1305
1306
static void
1307
free_callout_func_list(CalloutNameListType* s)
1308
0
{
1309
0
  if (IS_NOT_NULL(s)) {
1310
0
    if (IS_NOT_NULL(s->v)) {
1311
0
      int i, j;
1312
1313
0
      for (i = 0; i < s->n; i++) {
1314
0
        CalloutNameListEntry* e = s->v + i;
1315
0
        for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) {
1316
0
          if (e->arg_types[j] == ONIG_TYPE_STRING) {
1317
0
            UChar* p = e->opt_defaults[j].s.start;
1318
0
            if (IS_NOT_NULL(p)) xfree(p);
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      xfree(s->v);
1323
0
    }
1324
0
    xfree(s);
1325
0
  }
1326
0
}
1327
1328
static int
1329
callout_func_list_add(CalloutNameListType* s, int* rid)
1330
16
{
1331
16
  if (s->n >= s->alloc) {
1332
0
    int new_size = s->alloc * 2;
1333
0
    CalloutNameListEntry* nv = (CalloutNameListEntry* )
1334
0
      xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size);
1335
0
    if (IS_NULL(nv)) return ONIGERR_MEMORY;
1336
1337
0
    s->alloc = new_size;
1338
0
    s->v = nv;
1339
0
  }
1340
1341
16
  *rid = s->n;
1342
1343
16
  xmemset(&(s->v[s->n]), 0, sizeof(*(s->v)));
1344
16
  s->n++;
1345
16
  return ONIG_NORMAL;
1346
16
}
1347
1348
1349
typedef struct {
1350
  UChar* name;
1351
  int    name_len;   /* byte length */
1352
  int    id;
1353
} CalloutNameEntry;
1354
1355
#ifdef USE_ST_LIBRARY
1356
typedef st_table  CalloutNameTable;
1357
#else
1358
typedef struct {
1359
  CalloutNameEntry* e;
1360
  int               num;
1361
  int               alloc;
1362
} CalloutNameTable;
1363
#endif
1364
1365
static CalloutNameTable* GlobalCalloutNameTable;
1366
static int CalloutNameIDCounter;
1367
1368
#ifdef USE_ST_LIBRARY
1369
1370
static int
1371
i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
1372
                          void* arg ARG_UNUSED)
1373
0
{
1374
0
  if (IS_NOT_NULL(e)) {
1375
0
    xfree(e->name);
1376
0
  }
1377
  /*xfree(key->s); */ /* is same as e->name */
1378
0
  xfree(key);
1379
0
  xfree(e);
1380
0
  return ST_DELETE;
1381
0
}
1382
1383
static int
1384
callout_name_table_clear(CalloutNameTable* t)
1385
0
{
1386
0
  if (IS_NOT_NULL(t)) {
1387
0
    onig_st_foreach(t, i_free_callout_name_entry, 0);
1388
0
  }
1389
0
  return 0;
1390
0
}
1391
1392
static int
1393
global_callout_name_table_free(void)
1394
0
{
1395
0
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1396
0
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1397
0
    if (r != 0) return r;
1398
1399
0
    onig_st_free_table(GlobalCalloutNameTable);
1400
0
    GlobalCalloutNameTable = 0;
1401
0
    CalloutNameIDCounter = 0;
1402
0
  }
1403
1404
0
  return 0;
1405
0
}
1406
1407
static CalloutNameEntry*
1408
callout_name_find(OnigEncoding enc, int is_not_single,
1409
                  const UChar* name, const UChar* name_end)
1410
55
{
1411
55
  int r;
1412
55
  CalloutNameEntry* e;
1413
55
  CalloutNameTable* t = GlobalCalloutNameTable;
1414
1415
55
  e = (CalloutNameEntry* )NULL;
1416
55
  if (IS_NOT_NULL(t)) {
1417
53
    r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1418
53
                                          (HashDataType* )((void* )(&e)));
1419
53
    if (r == 0) { /* not found */
1420
53
      if (enc != ONIG_ENCODING_ASCII &&
1421
53
          ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) {
1422
41
        enc = ONIG_ENCODING_ASCII;
1423
41
        onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end,
1424
41
                                          (HashDataType* )((void* )(&e)));
1425
41
      }
1426
53
    }
1427
53
  }
1428
55
  return e;
1429
55
}
1430
1431
#else
1432
1433
static int
1434
callout_name_table_clear(CalloutNameTable* t)
1435
{
1436
  int i;
1437
  CalloutNameEntry* e;
1438
1439
  if (IS_NOT_NULL(t)) {
1440
    for (i = 0; i < t->num; i++) {
1441
      e = &(t->e[i]);
1442
      if (IS_NOT_NULL(e->name)) {
1443
        xfree(e->name);
1444
        e->name     = NULL;
1445
        e->name_len = 0;
1446
        e->id       = 0;
1447
        e->func     = 0;
1448
      }
1449
    }
1450
    if (IS_NOT_NULL(t->e)) {
1451
      xfree(t->e);
1452
      t->e = NULL;
1453
    }
1454
    t->num = 0;
1455
  }
1456
  return 0;
1457
}
1458
1459
static int
1460
global_callout_name_table_free(void)
1461
{
1462
  if (IS_NOT_NULL(GlobalCalloutNameTable)) {
1463
    int r = callout_name_table_clear(GlobalCalloutNameTable);
1464
    if (r != 0) return r;
1465
1466
    xfree(GlobalCalloutNameTable);
1467
    GlobalCalloutNameTable = 0;
1468
    CalloutNameIDCounter = 0;
1469
  }
1470
  return 0;
1471
}
1472
1473
static CalloutNameEntry*
1474
callout_name_find(UChar* name, UChar* name_end)
1475
{
1476
  int i, len;
1477
  CalloutNameEntry* e;
1478
  CalloutNameTable* t = Calloutnames;
1479
1480
  if (IS_NOT_NULL(t)) {
1481
    len = name_end - name;
1482
    for (i = 0; i < t->num; i++) {
1483
      e = &(t->e[i]);
1484
      if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
1485
        return e;
1486
    }
1487
  }
1488
  return (CalloutNameEntry* )NULL;
1489
}
1490
1491
#endif
1492
1493
/* name string must be single byte char string. */
1494
static int
1495
callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc,
1496
                   int is_not_single, UChar* name, UChar* name_end)
1497
14
{
1498
14
  int r;
1499
14
  CalloutNameEntry* e;
1500
14
  CalloutNameTable* t = GlobalCalloutNameTable;
1501
1502
14
  *rentry = 0;
1503
14
  if (name_end - name <= 0)
1504
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1505
1506
14
  e = callout_name_find(enc, is_not_single, name, name_end);
1507
14
  if (IS_NULL(e)) {
1508
14
#ifdef USE_ST_LIBRARY
1509
14
    if (IS_NULL(t)) {
1510
2
      t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM);
1511
2
      CHECK_NULL_RETURN_MEMERR(t);
1512
2
      GlobalCalloutNameTable = t;
1513
2
    }
1514
14
    e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry));
1515
14
    CHECK_NULL_RETURN_MEMERR(e);
1516
1517
14
    e->name = onigenc_strdup(enc, name, name_end);
1518
14
    if (IS_NULL(e->name)) {
1519
0
      xfree(e);  return ONIGERR_MEMORY;
1520
0
    }
1521
1522
14
    r = st_insert_callout_name_table(t, enc, is_not_single,
1523
14
                                     e->name, (e->name + (name_end - name)),
1524
14
                                     (HashDataType )e);
1525
14
    if (r < 0) return r;
1526
1527
#else
1528
1529
    int alloc;
1530
1531
    if (IS_NULL(t)) {
1532
      alloc = INIT_NAMES_ALLOC_NUM;
1533
      t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable));
1534
      CHECK_NULL_RETURN_MEMERR(t);
1535
      t->e     = NULL;
1536
      t->alloc = 0;
1537
      t->num   = 0;
1538
1539
      t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc);
1540
      if (IS_NULL(t->e)) {
1541
        xfree(t);
1542
        return ONIGERR_MEMORY;
1543
      }
1544
      t->alloc = alloc;
1545
      GlobalCalloutNameTable = t;
1546
      goto clear;
1547
    }
1548
    else if (t->num == t->alloc) {
1549
      int i;
1550
1551
      alloc = t->alloc * 2;
1552
      t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc);
1553
      CHECK_NULL_RETURN_MEMERR(t->e);
1554
      t->alloc = alloc;
1555
1556
    clear:
1557
      for (i = t->num; i < t->alloc; i++) {
1558
        t->e[i].name       = NULL;
1559
        t->e[i].name_len   = 0;
1560
        t->e[i].id         = 0;
1561
      }
1562
    }
1563
    e = &(t->e[t->num]);
1564
    t->num++;
1565
    e->name = onigenc_strdup(enc, name, name_end);
1566
    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
1567
#endif
1568
1569
14
    CalloutNameIDCounter++;
1570
14
    e->id = CalloutNameIDCounter;
1571
14
    e->name_len = (int )(name_end - name);
1572
14
  }
1573
1574
14
  *rentry = e;
1575
14
  return e->id;
1576
14
}
1577
1578
static int
1579
is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end)
1580
646
{
1581
646
  UChar* p;
1582
646
  OnigCodePoint c;
1583
1584
646
  if (name >= name_end) return 0;
1585
1586
591
  p = name;
1587
7.15k
  while (p < name_end) {
1588
6.60k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1589
6.60k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c))
1590
47
      return 0;
1591
1592
6.56k
    if (p == name) {
1593
578
      if (c >= '0' && c <= '9') return 0;
1594
578
    }
1595
1596
6.55k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1597
6.55k
  }
1598
1599
542
  return 1;
1600
591
}
1601
1602
static int
1603
is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end)
1604
27
{
1605
27
  UChar* p;
1606
27
  OnigCodePoint c;
1607
1608
27
  if (name >= name_end) return 0;
1609
1610
27
  p = name;
1611
1.21k
  while (p < name_end) {
1612
1.21k
    c = ONIGENC_MBC_TO_CODE(enc, p, name_end);
1613
1.21k
    if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c))
1614
24
      return 0;
1615
1616
1.18k
    if (p == name) {
1617
26
      if (c >= '0' && c <= '9') return 0;
1618
26
    }
1619
1620
1.18k
    p += ONIGENC_MBC_ENC_LEN(enc, p);
1621
1.18k
  }
1622
1623
2
  return 1;
1624
27
}
1625
1626
extern int
1627
onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
1628
                         UChar* name, UChar* name_end, int in,
1629
                         OnigCalloutFunc start_func,
1630
                         OnigCalloutFunc end_func,
1631
                         int arg_num, unsigned int arg_types[],
1632
                         int opt_arg_num, OnigValue opt_defaults[])
1633
14
{
1634
14
  int r;
1635
14
  int i;
1636
14
  int j;
1637
14
  int id;
1638
14
  int is_not_single;
1639
14
  CalloutNameEntry* e;
1640
14
  CalloutNameListEntry* fe;
1641
1642
14
  if (callout_type != ONIG_CALLOUT_TYPE_SINGLE)
1643
0
    return ONIGERR_INVALID_ARGUMENT;
1644
1645
14
  if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM)
1646
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1647
1648
14
  if (opt_arg_num < 0 || opt_arg_num > arg_num)
1649
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1650
1651
14
  if (start_func == 0 && end_func == 0)
1652
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1653
1654
14
  if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0)
1655
0
    return ONIGERR_INVALID_CALLOUT_ARG;
1656
1657
30
  for (i = 0; i < arg_num; i++) {
1658
16
    unsigned int t = arg_types[i];
1659
16
    if (t == ONIG_TYPE_VOID)
1660
0
      return ONIGERR_INVALID_CALLOUT_ARG;
1661
16
    else {
1662
16
      if (i >= arg_num - opt_arg_num) {
1663
8
        if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING &&
1664
8
            t != ONIG_TYPE_TAG)
1665
0
          return ONIGERR_INVALID_CALLOUT_ARG;
1666
8
      }
1667
8
      else {
1668
8
        if (t != ONIG_TYPE_LONG) {
1669
8
          t = t & ~ONIG_TYPE_LONG;
1670
8
          if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG)
1671
0
            return ONIGERR_INVALID_CALLOUT_ARG;
1672
8
        }
1673
8
      }
1674
16
    }
1675
16
  }
1676
1677
14
  if (! is_allowed_callout_name(enc, name, name_end)) {
1678
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1679
0
  }
1680
1681
14
  is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE);
1682
14
  id = callout_name_entry(&e, enc, is_not_single, name, name_end);
1683
14
  if (id < 0) return id;
1684
1685
14
  r = ONIG_NORMAL;
1686
14
  if (IS_NULL(GlobalCalloutNameList)) {
1687
2
    r = make_callout_func_list(&GlobalCalloutNameList, 10);
1688
2
    if (r != ONIG_NORMAL) return r;
1689
2
  }
1690
1691
30
  while (id >= GlobalCalloutNameList->n) {
1692
16
    int rid;
1693
16
    r = callout_func_list_add(GlobalCalloutNameList, &rid);
1694
16
    if (r != ONIG_NORMAL) return r;
1695
16
  }
1696
1697
14
  fe = GlobalCalloutNameList->v + id;
1698
14
  fe->type         = callout_type;
1699
14
  fe->in           = in;
1700
14
  fe->start_func   = start_func;
1701
14
  fe->end_func     = end_func;
1702
14
  fe->arg_num      = arg_num;
1703
14
  fe->opt_arg_num  = opt_arg_num;
1704
14
  fe->name         = e->name;
1705
1706
30
  for (i = 0; i < arg_num; i++) {
1707
16
    fe->arg_types[i] = arg_types[i];
1708
16
  }
1709
22
  for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
1710
8
    if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
1711
8
    if (fe->arg_types[i] == ONIG_TYPE_STRING) {
1712
0
      OnigValue* val;
1713
0
      UChar* ds;
1714
1715
0
      val = opt_defaults + j;
1716
0
      ds = onigenc_strdup(enc, val->s.start, val->s.end);
1717
0
      CHECK_NULL_RETURN_MEMERR(ds);
1718
1719
0
      fe->opt_defaults[i].s.start = ds;
1720
0
      fe->opt_defaults[i].s.end   = ds + (val->s.end - val->s.start);
1721
0
    }
1722
8
    else {
1723
8
      fe->opt_defaults[i] = opt_defaults[j];
1724
8
    }
1725
8
  }
1726
1727
14
  r = id;
1728
14
  return r;
1729
14
}
1730
1731
static int
1732
get_callout_name_id_by_name(OnigEncoding enc, int is_not_single,
1733
                            UChar* name, UChar* name_end, int* rid)
1734
41
{
1735
41
  int r;
1736
41
  CalloutNameEntry* e;
1737
1738
41
  if (! is_allowed_callout_name(enc, name, name_end)) {
1739
0
    return ONIGERR_INVALID_CALLOUT_NAME;
1740
0
  }
1741
1742
41
  e = callout_name_find(enc, is_not_single, name, name_end);
1743
41
  if (IS_NULL(e)) {
1744
41
    return ONIGERR_UNDEFINED_CALLOUT_NAME;
1745
41
  }
1746
1747
0
  r = ONIG_NORMAL;
1748
0
  *rid = e->id;
1749
1750
0
  return r;
1751
41
}
1752
1753
extern OnigCalloutFunc
1754
onig_get_callout_start_func(regex_t* reg, int callout_num)
1755
0
{
1756
  /* If used for callouts of contents, return 0. */
1757
0
  CalloutListEntry* e;
1758
1759
0
  e = onig_reg_callout_list_at(reg, callout_num);
1760
0
  CHECK_NULL_RETURN(e);
1761
0
  return e->start_func;
1762
0
}
1763
1764
extern const UChar*
1765
onig_get_callout_tag_start(regex_t* reg, int callout_num)
1766
0
{
1767
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1768
0
  CHECK_NULL_RETURN(e);
1769
0
  return e->tag_start;
1770
0
}
1771
1772
extern const UChar*
1773
onig_get_callout_tag_end(regex_t* reg, int callout_num)
1774
0
{
1775
0
  CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num);
1776
0
  CHECK_NULL_RETURN(e);
1777
0
  return e->tag_end;
1778
0
}
1779
1780
1781
extern OnigCalloutType
1782
onig_get_callout_type_by_name_id(int name_id)
1783
0
{
1784
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1785
0
    return 0;
1786
1787
0
  return GlobalCalloutNameList->v[name_id].type;
1788
0
}
1789
1790
extern OnigCalloutFunc
1791
onig_get_callout_start_func_by_name_id(int name_id)
1792
0
{
1793
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1794
0
    return 0;
1795
1796
0
  return GlobalCalloutNameList->v[name_id].start_func;
1797
0
}
1798
1799
extern OnigCalloutFunc
1800
onig_get_callout_end_func_by_name_id(int name_id)
1801
0
{
1802
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1803
0
    return 0;
1804
1805
0
  return GlobalCalloutNameList->v[name_id].end_func;
1806
0
}
1807
1808
extern int
1809
onig_get_callout_in_by_name_id(int name_id)
1810
0
{
1811
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1812
0
    return 0;
1813
1814
0
  return GlobalCalloutNameList->v[name_id].in;
1815
0
}
1816
1817
static int
1818
get_callout_arg_num_by_name_id(int name_id)
1819
0
{
1820
0
  return GlobalCalloutNameList->v[name_id].arg_num;
1821
0
}
1822
1823
static int
1824
get_callout_opt_arg_num_by_name_id(int name_id)
1825
0
{
1826
0
  return GlobalCalloutNameList->v[name_id].opt_arg_num;
1827
0
}
1828
1829
static unsigned int
1830
get_callout_arg_type_by_name_id(int name_id, int index)
1831
0
{
1832
0
  return GlobalCalloutNameList->v[name_id].arg_types[index];
1833
0
}
1834
1835
static OnigValue
1836
get_callout_opt_default_by_name_id(int name_id, int index)
1837
0
{
1838
0
  return GlobalCalloutNameList->v[name_id].opt_defaults[index];
1839
0
}
1840
1841
extern UChar*
1842
onig_get_callout_name_by_name_id(int name_id)
1843
0
{
1844
0
  if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
1845
0
    return 0;
1846
1847
0
  return GlobalCalloutNameList->v[name_id].name;
1848
0
}
1849
1850
extern int
1851
onig_global_callout_names_free(void)
1852
0
{
1853
0
  free_callout_func_list(GlobalCalloutNameList);
1854
0
  GlobalCalloutNameList = 0;
1855
1856
0
  global_callout_name_table_free();
1857
0
  return ONIG_NORMAL;
1858
0
}
1859
1860
1861
typedef st_table   CalloutTagTable;
1862
typedef intptr_t   CalloutTagVal;
1863
1864
0
#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST     (1<<0)
1865
1866
static int
1867
i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg)
1868
0
{
1869
0
  int num;
1870
0
  RegexExt* ext = (RegexExt* )arg;
1871
1872
0
  num = (int )e - 1;
1873
0
  ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST;
1874
0
  return ST_CONTINUE;
1875
0
}
1876
1877
static int
1878
setup_ext_callout_list_values(regex_t* reg)
1879
0
{
1880
0
  int i, j;
1881
0
  RegexExt* ext;
1882
1883
0
  ext = reg->extp;
1884
0
  if (IS_NOT_NULL(ext->tag_table)) {
1885
0
    onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set,
1886
0
                    (st_data_t )ext);
1887
0
  }
1888
1889
0
  for (i = 0; i < ext->callout_num; i++) {
1890
0
    CalloutListEntry* e = ext->callout_list + i;
1891
0
    if (e->of == ONIG_CALLOUT_OF_NAME) {
1892
0
      for (j = 0; j < e->u.arg.num; j++) {
1893
0
        if (e->u.arg.types[j] == ONIG_TYPE_TAG) {
1894
0
          UChar* start;
1895
0
          UChar* end;
1896
0
          int num;
1897
0
          start = e->u.arg.vals[j].s.start;
1898
0
          end   = e->u.arg.vals[j].s.end;
1899
0
          num = onig_get_callout_num_by_tag(reg, start, end);
1900
0
          if (num < 0) return num;
1901
0
          e->u.arg.vals[j].tag = num;
1902
0
        }
1903
0
      }
1904
0
    }
1905
0
  }
1906
1907
0
  return ONIG_NORMAL;
1908
0
}
1909
1910
extern int
1911
onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num)
1912
0
{
1913
0
  RegexExt* ext = reg->extp;
1914
1915
0
  if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0;
1916
0
  if (callout_num > ext->callout_num) return 0;
1917
1918
0
  return (ext->callout_list[callout_num].flag &
1919
0
          CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0;
1920
0
}
1921
1922
static int
1923
i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED)
1924
0
{
1925
0
  xfree(key);
1926
0
  return ST_DELETE;
1927
0
}
1928
1929
static int
1930
callout_tag_table_clear(CalloutTagTable* t)
1931
0
{
1932
0
  if (IS_NOT_NULL(t)) {
1933
0
    onig_st_foreach(t, i_free_callout_tag_entry, 0);
1934
0
  }
1935
0
  return 0;
1936
0
}
1937
1938
extern int
1939
onig_callout_tag_table_free(void* table)
1940
0
{
1941
0
  CalloutTagTable* t = (CalloutTagTable* )table;
1942
1943
0
  if (IS_NOT_NULL(t)) {
1944
0
    int r = callout_tag_table_clear(t);
1945
0
    if (r != 0) return r;
1946
1947
0
    onig_st_free_table(t);
1948
0
  }
1949
1950
0
  return 0;
1951
0
}
1952
1953
extern int
1954
onig_get_callout_num_by_tag(regex_t* reg,
1955
                            const UChar* tag, const UChar* tag_end)
1956
0
{
1957
0
  int r;
1958
0
  RegexExt* ext;
1959
0
  CalloutTagVal e;
1960
1961
0
  ext = reg->extp;
1962
0
  if (IS_NULL(ext) || IS_NULL(ext->tag_table))
1963
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1964
1965
0
  r = onig_st_lookup_strend(ext->tag_table, tag, tag_end,
1966
0
                            (HashDataType* )((void* )(&e)));
1967
0
  if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME;
1968
0
  return (int )e;
1969
0
}
1970
1971
static CalloutTagVal
1972
callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end)
1973
0
{
1974
0
  CalloutTagVal e;
1975
1976
0
  e = -1;
1977
0
  if (IS_NOT_NULL(t)) {
1978
0
    onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
1979
0
  }
1980
0
  return e;
1981
0
}
1982
1983
static int
1984
callout_tag_table_new(CalloutTagTable** rt)
1985
0
{
1986
0
  CalloutTagTable* t;
1987
1988
0
  *rt = 0;
1989
0
  t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM);
1990
0
  CHECK_NULL_RETURN_MEMERR(t);
1991
1992
0
  *rt = t;
1993
0
  return ONIG_NORMAL;
1994
0
}
1995
1996
static int
1997
callout_tag_entry_raw(ParseEnv* env, CalloutTagTable* t, UChar* name,
1998
                      UChar* name_end, CalloutTagVal entry_val)
1999
0
{
2000
0
  int r;
2001
0
  CalloutTagVal val;
2002
2003
0
  if (name_end - name <= 0)
2004
0
    return ONIGERR_INVALID_CALLOUT_TAG_NAME;
2005
2006
0
  val = callout_tag_find(t, name, name_end);
2007
0
  if (val >= 0) {
2008
0
    onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
2009
0
                                   name, name_end);
2010
0
    return ONIGERR_MULTIPLEX_DEFINED_NAME;
2011
0
  }
2012
2013
0
  r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
2014
0
  if (r < 0) return r;
2015
2016
0
  return ONIG_NORMAL;
2017
0
}
2018
2019
static int
2020
ext_ensure_tag_table(regex_t* reg)
2021
0
{
2022
0
  int r;
2023
0
  RegexExt* ext;
2024
0
  CalloutTagTable* t;
2025
2026
0
  ext = onig_get_regex_ext(reg);
2027
0
  CHECK_NULL_RETURN_MEMERR(ext);
2028
2029
0
  if (IS_NULL(ext->tag_table)) {
2030
0
    r = callout_tag_table_new(&t);
2031
0
    if (r != ONIG_NORMAL) return r;
2032
2033
0
    ext->tag_table = t;
2034
0
  }
2035
2036
0
  return ONIG_NORMAL;
2037
0
}
2038
2039
static int
2040
callout_tag_entry(ParseEnv* env, regex_t* reg, UChar* name, UChar* name_end,
2041
                  CalloutTagVal entry_val)
2042
0
{
2043
0
  int r;
2044
0
  RegexExt* ext;
2045
0
  CalloutListEntry* e;
2046
2047
0
  r = ext_ensure_tag_table(reg);
2048
0
  if (r != ONIG_NORMAL) return r;
2049
2050
0
  ext = onig_get_regex_ext(reg);
2051
0
  CHECK_NULL_RETURN_MEMERR(ext);
2052
0
  r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
2053
2054
0
  e = onig_reg_callout_list_at(reg, (int )entry_val);
2055
0
  CHECK_NULL_RETURN_MEMERR(e);
2056
0
  e->tag_start = name;
2057
0
  e->tag_end   = name_end;
2058
2059
0
  return r;
2060
0
}
2061
2062
#endif /* USE_CALLOUT */
2063
2064
2065
3.20k
#define INIT_PARSEENV_MEMENV_ALLOC_SIZE   16
2066
2067
static void
2068
scan_env_clear(ParseEnv* env)
2069
1.37M
{
2070
1.37M
  MEM_STATUS_CLEAR(env->cap_history);
2071
1.37M
  MEM_STATUS_CLEAR(env->backtrack_mem);
2072
1.37M
  MEM_STATUS_CLEAR(env->backrefed_mem);
2073
1.37M
  env->error      = (UChar* )NULL;
2074
1.37M
  env->error_end  = (UChar* )NULL;
2075
1.37M
  env->num_call   = 0;
2076
2077
1.37M
#ifdef USE_CALL
2078
1.37M
  env->unset_addr_list = NULL;
2079
1.37M
#endif
2080
2081
1.37M
  env->num_mem    = 0;
2082
1.37M
  env->num_named  = 0;
2083
1.37M
  env->mem_alloc  = 0;
2084
1.37M
  env->mem_env_dynamic = (MemEnv* )NULL;
2085
2086
1.37M
  xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
2087
2088
1.37M
  env->parse_depth      = 0;
2089
#ifdef ONIG_DEBUG_PARSE
2090
  env->max_parse_depth  = 0;
2091
#endif
2092
1.37M
  env->backref_num      = 0;
2093
1.37M
  env->keep_num         = 0;
2094
1.37M
  env->id_num           = 0;
2095
1.37M
  env->save_alloc_num   = 0;
2096
1.37M
  env->saves            = 0;
2097
1.37M
  env->flags            = 0;
2098
1.37M
}
2099
2100
static int
2101
scan_env_add_mem_entry(ParseEnv* env)
2102
1.70M
{
2103
1.70M
  int i, need, alloc;
2104
1.70M
  MemEnv* p;
2105
2106
1.70M
  need = env->num_mem + 1;
2107
1.70M
  if (need > MaxCaptureNum && MaxCaptureNum != 0)
2108
5
    return ONIGERR_TOO_MANY_CAPTURES;
2109
2110
1.70M
  if (need >= PARSEENV_MEMENV_SIZE) {
2111
1.56M
    if (env->mem_alloc <= need) {
2112
14.3k
      if (IS_NULL(env->mem_env_dynamic)) {
2113
3.20k
        alloc = INIT_PARSEENV_MEMENV_ALLOC_SIZE;
2114
3.20k
        p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
2115
3.20k
        CHECK_NULL_RETURN_MEMERR(p);
2116
3.20k
        xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
2117
3.20k
      }
2118
11.1k
      else {
2119
11.1k
        alloc = env->mem_alloc * 2;
2120
11.1k
        p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
2121
11.1k
        CHECK_NULL_RETURN_MEMERR(p);
2122
11.1k
      }
2123
2124
2.15M
      for (i = env->num_mem + 1; i < alloc; i++) {
2125
2.14M
        p[i].mem_node = NULL_NODE;
2126
2.14M
        p[i].empty_repeat_node = NULL_NODE;
2127
2.14M
      }
2128
2129
14.3k
      env->mem_env_dynamic = p;
2130
14.3k
      env->mem_alloc = alloc;
2131
14.3k
    }
2132
1.56M
  }
2133
2134
1.70M
  env->num_mem++;
2135
1.70M
  return env->num_mem;
2136
1.70M
}
2137
2138
static int
2139
scan_env_set_mem_node(ParseEnv* env, int num, Node* node)
2140
427k
{
2141
427k
  if (env->num_mem >= num)
2142
427k
    PARSEENV_MEMENV(env)[num].mem_node = node;
2143
0
  else
2144
0
    return ONIGERR_PARSER_BUG;
2145
427k
  return 0;
2146
427k
}
2147
2148
static void
2149
node_free_body(Node* node)
2150
20.5M
{
2151
20.5M
  if (IS_NULL(node)) return ;
2152
2153
20.5M
  switch (ND_TYPE(node)) {
2154
8.26M
  case ND_STRING:
2155
8.26M
    if (STR_(node)->capacity != 0 &&
2156
8.26M
        IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
2157
593k
      xfree(STR_(node)->s);
2158
593k
    }
2159
8.26M
    break;
2160
2161
2.22M
  case ND_LIST:
2162
2.35M
  case ND_ALT:
2163
2.35M
    onig_node_free(ND_CAR(node));
2164
2.35M
    node = ND_CDR(node);
2165
13.4M
    while (IS_NOT_NULL(node)) {
2166
11.1M
      Node* next = ND_CDR(node);
2167
11.1M
      onig_node_free(ND_CAR(node));
2168
11.1M
      xfree(node);
2169
11.1M
      node = next;
2170
11.1M
    }
2171
2.35M
    break;
2172
2173
2.12M
  case ND_CCLASS:
2174
2.12M
    {
2175
2.12M
      CClassNode* cc = CCLASS_(node);
2176
2177
2.12M
      if (cc->mbuf)
2178
1.21M
        bbuf_free(cc->mbuf);
2179
2.12M
    }
2180
2.12M
    break;
2181
2182
26.1k
  case ND_BACKREF:
2183
26.1k
    if (IS_NOT_NULL(BACKREF_(node)->back_dynamic))
2184
0
      xfree(BACKREF_(node)->back_dynamic);
2185
26.1k
    break;
2186
2187
2.51M
  case ND_BAG:
2188
2.51M
    if (ND_BODY(node))
2189
1.22M
      onig_node_free(ND_BODY(node));
2190
2191
2.51M
    {
2192
2.51M
      BagNode* en = BAG_(node);
2193
2.51M
      if (en->type == BAG_IF_ELSE) {
2194
3.12k
        onig_node_free(en->te.Then);
2195
3.12k
        onig_node_free(en->te.Else);
2196
3.12k
      }
2197
2.51M
    }
2198
2.51M
    break;
2199
2200
2.62M
  case ND_QUANT:
2201
2.62M
    if (ND_BODY(node))
2202
2.22M
      onig_node_free(ND_BODY(node));
2203
2.62M
    break;
2204
2205
271k
  case ND_ANCHOR:
2206
271k
    if (ND_BODY(node))
2207
1.97k
      onig_node_free(ND_BODY(node));
2208
271k
    if (IS_NOT_NULL(ANCHOR_(node)->lead_node))
2209
0
      onig_node_free(ANCHOR_(node)->lead_node);
2210
271k
    break;
2211
2212
2.35M
  case ND_CTYPE:
2213
2.37M
  case ND_CALL:
2214
2.39M
  case ND_GIMMICK:
2215
2.39M
    break;
2216
20.5M
  }
2217
20.5M
}
2218
2219
extern void
2220
onig_node_free(Node* node)
2221
23.2M
{
2222
23.2M
  if (IS_NULL(node)) return ;
2223
2224
#ifdef DEBUG_ND_FREE
2225
  fprintf(stderr, "onig_node_free: %p\n", node);
2226
#endif
2227
2228
20.5M
  node_free_body(node);
2229
20.5M
  xfree(node);
2230
20.5M
}
2231
2232
static void
2233
cons_node_free_alone(Node* node)
2234
1.42k
{
2235
1.42k
  ND_CAR(node) = 0;
2236
1.42k
  ND_CDR(node) = 0;
2237
1.42k
  onig_node_free(node);
2238
1.42k
}
2239
2240
static Node*
2241
node_new(void)
2242
31.7M
{
2243
31.7M
  Node* node;
2244
2245
31.7M
  node = (Node* )xmalloc(sizeof(Node));
2246
31.7M
  CHECK_NULL_RETURN(node);
2247
31.7M
  xmemset(node, 0, sizeof(*node));
2248
2249
#ifdef DEBUG_ND_FREE
2250
  fprintf(stderr, "node_new: %p\n", node);
2251
#endif
2252
31.7M
  return node;
2253
31.7M
}
2254
2255
extern int
2256
onig_node_copy(Node** rcopy, Node* from)
2257
0
{
2258
0
  int r;
2259
0
  Node* copy;
2260
2261
0
  *rcopy = NULL_NODE;
2262
2263
0
  switch (ND_TYPE(from)) {
2264
0
  case ND_LIST:
2265
0
  case ND_ALT:
2266
0
  case ND_ANCHOR:
2267
    /* These node's link to other nodes are processed by caller. */
2268
0
    break;
2269
0
  case ND_STRING:
2270
0
  case ND_CCLASS:
2271
0
  case ND_CTYPE:
2272
    /* Fixed contents after copy. */
2273
0
    break;
2274
0
  default:
2275
    /* Not supported yet. */
2276
0
    return ONIGERR_TYPE_BUG;
2277
0
    break;
2278
0
  }
2279
2280
0
  copy = node_new();
2281
0
  CHECK_NULL_RETURN_MEMERR(copy);
2282
0
  xmemcpy(copy, from, sizeof(*copy));
2283
2284
0
  switch (ND_TYPE(copy)) {
2285
0
  case ND_STRING:
2286
0
    r = onig_node_str_set(copy, STR_(from)->s, STR_(from)->end, FALSE);
2287
0
    if (r != 0) {
2288
0
    err:
2289
0
      onig_node_free(copy);
2290
0
      return r;
2291
0
    }
2292
0
    break;
2293
2294
0
  case ND_CCLASS:
2295
0
    {
2296
0
      CClassNode *fcc, *tcc;
2297
2298
0
      fcc = CCLASS_(from);
2299
0
      tcc = CCLASS_(copy);
2300
0
      if (IS_NOT_NULL(fcc->mbuf)) {
2301
0
        r = bbuf_clone(&(tcc->mbuf), fcc->mbuf);
2302
0
        if (r != 0) goto err;
2303
0
      }
2304
0
    }
2305
0
    break;
2306
2307
0
  default:
2308
0
    break;
2309
0
  }
2310
2311
0
  *rcopy = copy;
2312
0
  return ONIG_NORMAL;
2313
0
}
2314
2315
2316
static void
2317
initialize_cclass(CClassNode* cc)
2318
2.12M
{
2319
2.12M
  BITSET_CLEAR(cc->bs);
2320
2.12M
  cc->flags = 0;
2321
2.12M
  cc->mbuf  = NULL;
2322
2.12M
}
2323
2324
static Node*
2325
node_new_cclass(void)
2326
2.12M
{
2327
2.12M
  Node* node = node_new();
2328
2.12M
  CHECK_NULL_RETURN(node);
2329
2330
2.12M
  ND_SET_TYPE(node, ND_CCLASS);
2331
2.12M
  initialize_cclass(CCLASS_(node));
2332
2.12M
  return node;
2333
2.12M
}
2334
2335
static Node*
2336
node_new_ctype(int type, int not, OnigOptionType options)
2337
2.35M
{
2338
2.35M
  Node* node = node_new();
2339
2.35M
  CHECK_NULL_RETURN(node);
2340
2341
2.35M
  ND_SET_TYPE(node, ND_CTYPE);
2342
2.35M
  CTYPE_(node)->ctype   = type;
2343
2.35M
  CTYPE_(node)->not     = not;
2344
2.35M
  CTYPE_(node)->ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(type, options);
2345
2.35M
  return node;
2346
2.35M
}
2347
2348
static Node*
2349
node_new_anychar(OnigOptionType options)
2350
2.35M
{
2351
2.35M
  Node* node;
2352
2353
2.35M
  node = node_new_ctype(CTYPE_ANYCHAR, FALSE, options);
2354
2.35M
  CHECK_NULL_RETURN(node);
2355
2356
2.35M
  if (OPTON_MULTILINE(options))
2357
11.3k
    ND_STATUS_ADD(node, MULTILINE);
2358
2.35M
  return node;
2359
2.35M
}
2360
2361
static int
2362
node_new_no_newline(Node** node, ParseEnv* env)
2363
10
{
2364
10
  Node* n;
2365
2366
10
  n = node_new_anychar(ONIG_OPTION_NONE);
2367
10
  CHECK_NULL_RETURN_MEMERR(n);
2368
10
  *node = n;
2369
10
  return 0;
2370
10
}
2371
2372
static int
2373
node_new_true_anychar(Node** node)
2374
10.2k
{
2375
10.2k
  Node* n;
2376
2377
10.2k
  n = node_new_anychar(ONIG_OPTION_MULTILINE);
2378
10.2k
  CHECK_NULL_RETURN_MEMERR(n);
2379
10.2k
  *node = n;
2380
10.2k
  return 0;
2381
10.2k
}
2382
2383
static Node*
2384
node_new_list(Node* left, Node* right)
2385
10.5M
{
2386
10.5M
  Node* node = node_new();
2387
10.5M
  CHECK_NULL_RETURN(node);
2388
2389
10.5M
  ND_SET_TYPE(node, ND_LIST);
2390
10.5M
  ND_CAR(node)  = left;
2391
10.5M
  ND_CDR(node) = right;
2392
10.5M
  return node;
2393
10.5M
}
2394
2395
extern Node*
2396
onig_node_new_list(Node* left, Node* right)
2397
1.11M
{
2398
1.11M
  return node_new_list(left, right);
2399
1.11M
}
2400
2401
extern Node*
2402
onig_node_new_alt(Node* left, Node* right)
2403
1.39M
{
2404
1.39M
  Node* node = node_new();
2405
1.39M
  CHECK_NULL_RETURN(node);
2406
2407
1.39M
  ND_SET_TYPE(node, ND_ALT);
2408
1.39M
  ND_CAR(node)  = left;
2409
1.39M
  ND_CDR(node) = right;
2410
1.39M
  return node;
2411
1.39M
}
2412
2413
static Node*
2414
make_list_or_alt(NodeType type, int n, Node* ns[])
2415
1.52M
{
2416
1.52M
  Node* r;
2417
2418
1.52M
  if (n <= 0) return NULL_NODE;
2419
2420
1.52M
  if (n == 1) {
2421
741k
    r = node_new();
2422
741k
    CHECK_NULL_RETURN(r);
2423
741k
    ND_SET_TYPE(r, type);
2424
741k
    ND_CAR(r) = ns[0];
2425
741k
    ND_CDR(r) = NULL_NODE;
2426
741k
  }
2427
784k
  else {
2428
784k
    Node* right;
2429
2430
784k
    r = node_new();
2431
784k
    CHECK_NULL_RETURN(r);
2432
2433
784k
    right = make_list_or_alt(type, n - 1, ns + 1);
2434
784k
    if (IS_NULL(right)) {
2435
0
      onig_node_free(r);
2436
0
      return NULL_NODE;
2437
0
    }
2438
2439
784k
    ND_SET_TYPE(r, type);
2440
784k
    ND_CAR(r) = ns[0];
2441
784k
    ND_CDR(r) = right;
2442
784k
  }
2443
2444
1.52M
  return r;
2445
1.52M
}
2446
2447
static Node*
2448
make_list(int n, Node* ns[])
2449
736k
{
2450
736k
  return make_list_or_alt(ND_LIST, n, ns);
2451
736k
}
2452
2453
static Node*
2454
make_alt(int n, Node* ns[])
2455
4.96k
{
2456
4.96k
  return make_list_or_alt(ND_ALT, n, ns);
2457
4.96k
}
2458
2459
static Node*
2460
node_new_anchor(int type)
2461
271k
{
2462
271k
  Node* node;
2463
2464
271k
  node = node_new();
2465
271k
  CHECK_NULL_RETURN(node);
2466
2467
271k
  ND_SET_TYPE(node, ND_ANCHOR);
2468
271k
  ANCHOR_(node)->type       = type;
2469
271k
  ANCHOR_(node)->char_min_len = 0;
2470
271k
  ANCHOR_(node)->char_max_len = INFINITE_LEN;
2471
271k
  ANCHOR_(node)->ascii_mode = 0;
2472
271k
  ANCHOR_(node)->lead_node  = NULL_NODE;
2473
271k
  return node;
2474
271k
}
2475
2476
static Node*
2477
node_new_anchor_with_options(int type, OnigOptionType options)
2478
269k
{
2479
269k
  int ascii_mode;
2480
269k
  Node* node;
2481
2482
269k
  node = node_new_anchor(type);
2483
269k
  CHECK_NULL_RETURN(node);
2484
2485
269k
  ascii_mode = OPTON_WORD_ASCII(options) && IS_WORD_ANCHOR_TYPE(type) ? 1 : 0;
2486
269k
  ANCHOR_(node)->ascii_mode = ascii_mode;
2487
2488
269k
  if (type == ANCR_TEXT_SEGMENT_BOUNDARY ||
2489
269k
      type == ANCR_NO_TEXT_SEGMENT_BOUNDARY) {
2490
8.78k
    if (OPTON_TEXT_SEGMENT_WORD(options))
2491
0
      ND_STATUS_ADD(node, TEXT_SEGMENT_WORD);
2492
8.78k
  }
2493
2494
269k
  return node;
2495
269k
}
2496
2497
static Node*
2498
node_new_backref(int back_num, int* backrefs, int by_name,
2499
#ifdef USE_BACKREF_WITH_LEVEL
2500
                 int exist_level, int nest_level,
2501
#endif
2502
                 ParseEnv* env)
2503
26.1k
{
2504
26.1k
  int i;
2505
26.1k
  Node* node;
2506
2507
26.1k
  node = node_new();
2508
26.1k
  CHECK_NULL_RETURN(node);
2509
2510
26.1k
  ND_SET_TYPE(node, ND_BACKREF);
2511
26.1k
  BACKREF_(node)->back_num = back_num;
2512
26.1k
  BACKREF_(node)->back_dynamic = (int* )NULL;
2513
26.1k
  if (by_name != 0)
2514
0
    ND_STATUS_ADD(node, BY_NAME);
2515
2516
26.1k
  if (OPTON_IGNORECASE(env->options))
2517
4.17k
    ND_STATUS_ADD(node, IGNORECASE);
2518
2519
26.1k
#ifdef USE_BACKREF_WITH_LEVEL
2520
26.1k
  if (exist_level != 0) {
2521
26
    ND_STATUS_ADD(node, NEST_LEVEL);
2522
26
    BACKREF_(node)->nest_level  = nest_level;
2523
26
  }
2524
26.1k
#endif
2525
2526
49.3k
  for (i = 0; i < back_num; i++) {
2527
26.1k
    if (backrefs[i] <= env->num_mem &&
2528
26.1k
        IS_NULL(PARSEENV_MEMENV(env)[backrefs[i]].mem_node)) {
2529
2.92k
      ND_STATUS_ADD(node, RECURSION);   /* /...(\1).../ */
2530
2.92k
      break;
2531
2.92k
    }
2532
26.1k
  }
2533
2534
26.1k
  if (back_num <= ND_BACKREFS_SIZE) {
2535
52.2k
    for (i = 0; i < back_num; i++)
2536
26.1k
      BACKREF_(node)->back_static[i] = backrefs[i];
2537
26.1k
  }
2538
0
  else {
2539
0
    int* p = (int* )xmalloc(sizeof(int) * back_num);
2540
0
    if (IS_NULL(p)) {
2541
0
      onig_node_free(node);
2542
0
      return NULL;
2543
0
    }
2544
0
    BACKREF_(node)->back_dynamic = p;
2545
0
    for (i = 0; i < back_num; i++)
2546
0
      p[i] = backrefs[i];
2547
0
  }
2548
2549
26.1k
  env->backref_num++;
2550
26.1k
  return node;
2551
26.1k
}
2552
2553
static Node*
2554
node_new_backref_checker(int back_num, int* backrefs, int by_name,
2555
#ifdef USE_BACKREF_WITH_LEVEL
2556
                         int exist_level, int nest_level,
2557
#endif
2558
                         ParseEnv* env)
2559
3.17k
{
2560
3.17k
  Node* node;
2561
2562
3.17k
  node = node_new_backref(back_num, backrefs, by_name,
2563
3.17k
#ifdef USE_BACKREF_WITH_LEVEL
2564
3.17k
                          exist_level, nest_level,
2565
3.17k
#endif
2566
3.17k
                          env);
2567
3.17k
  CHECK_NULL_RETURN(node);
2568
2569
3.17k
  ND_STATUS_ADD(node, CHECKER);
2570
3.17k
  return node;
2571
3.17k
}
2572
2573
#ifdef USE_CALL
2574
static Node*
2575
node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
2576
19.7k
{
2577
19.7k
  Node* node = node_new();
2578
19.7k
  CHECK_NULL_RETURN(node);
2579
2580
19.7k
  ND_SET_TYPE(node, ND_CALL);
2581
19.7k
  CALL_(node)->by_number   = by_number;
2582
19.7k
  CALL_(node)->name        = name;
2583
19.7k
  CALL_(node)->name_end    = name_end;
2584
19.7k
  CALL_(node)->called_gnum = gnum;
2585
19.7k
  CALL_(node)->entry_count = 1;
2586
19.7k
  return node;
2587
19.7k
}
2588
#endif
2589
2590
static Node*
2591
node_new_quantifier(int lower, int upper, int by_number)
2592
2.62M
{
2593
2.62M
  Node* node = node_new();
2594
2.62M
  CHECK_NULL_RETURN(node);
2595
2596
2.62M
  ND_SET_TYPE(node, ND_QUANT);
2597
2.62M
  QUANT_(node)->lower            = lower;
2598
2.62M
  QUANT_(node)->upper            = upper;
2599
2.62M
  QUANT_(node)->greedy           = 1;
2600
2.62M
  QUANT_(node)->emptiness        = BODY_IS_NOT_EMPTY;
2601
2.62M
  QUANT_(node)->head_exact       = NULL_NODE;
2602
2.62M
  QUANT_(node)->next_head_exact  = NULL_NODE;
2603
2.62M
  QUANT_(node)->include_referred = 0;
2604
2.62M
  QUANT_(node)->empty_status_mem = 0;
2605
2.62M
  if (by_number != 0)
2606
22.9k
    ND_STATUS_ADD(node, BY_NUMBER);
2607
2608
2.62M
  return node;
2609
2.62M
}
2610
2611
static Node*
2612
node_new_bag(enum BagType type)
2613
2.51M
{
2614
2.51M
  Node* node = node_new();
2615
2.51M
  CHECK_NULL_RETURN(node);
2616
2617
2.51M
  ND_SET_TYPE(node, ND_BAG);
2618
2.51M
  BAG_(node)->type = type;
2619
2620
2.51M
  switch (type) {
2621
1.70M
  case BAG_MEMORY:
2622
1.70M
    BAG_(node)->m.regnum       =  0;
2623
1.70M
    BAG_(node)->m.called_addr  = -1;
2624
1.70M
    BAG_(node)->m.entry_count  =  1;
2625
1.70M
    BAG_(node)->m.called_state =  0;
2626
1.70M
    break;
2627
2628
11.7k
  case BAG_OPTION:
2629
11.7k
    BAG_(node)->o.options =  0;
2630
11.7k
    break;
2631
2632
797k
  case BAG_STOP_BACKTRACK:
2633
797k
    break;
2634
2635
3.12k
  case BAG_IF_ELSE:
2636
3.12k
    BAG_(node)->te.Then = 0;
2637
3.12k
    BAG_(node)->te.Else = 0;
2638
3.12k
    break;
2639
2.51M
  }
2640
2641
2.51M
  BAG_(node)->opt_count = 0;
2642
2.51M
  return node;
2643
2.51M
}
2644
2645
extern Node*
2646
onig_node_new_bag(enum BagType type)
2647
192k
{
2648
192k
  return node_new_bag(type);
2649
192k
}
2650
2651
static Node*
2652
node_new_bag_if_else(Node* cond, Node* Then, Node* Else)
2653
3.12k
{
2654
3.12k
  Node* n;
2655
3.12k
  n = node_new_bag(BAG_IF_ELSE);
2656
3.12k
  CHECK_NULL_RETURN(n);
2657
2658
3.12k
  ND_BODY(n) = cond;
2659
3.12k
  BAG_(n)->te.Then = Then;
2660
3.12k
  BAG_(n)->te.Else = Else;
2661
3.12k
  return n;
2662
3.12k
}
2663
2664
static Node*
2665
node_new_memory(int is_named)
2666
1.70M
{
2667
1.70M
  Node* node = node_new_bag(BAG_MEMORY);
2668
1.70M
  CHECK_NULL_RETURN(node);
2669
1.70M
  if (is_named != 0)
2670
2.78k
    ND_STATUS_ADD(node, NAMED_GROUP);
2671
2672
1.70M
  return node;
2673
1.70M
}
2674
2675
static Node*
2676
node_new_option(OnigOptionType option)
2677
11.7k
{
2678
11.7k
  Node* node = node_new_bag(BAG_OPTION);
2679
11.7k
  CHECK_NULL_RETURN(node);
2680
11.7k
  BAG_(node)->o.options = option;
2681
11.7k
  return node;
2682
11.7k
}
2683
2684
static Node*
2685
node_new_group(Node* content)
2686
0
{
2687
0
  Node* node;
2688
2689
0
  node = node_new();
2690
0
  CHECK_NULL_RETURN(node);
2691
0
  ND_SET_TYPE(node, ND_LIST);
2692
0
  ND_CAR(node) = content;
2693
0
  ND_CDR(node) = NULL_NODE;
2694
2695
0
  return node;
2696
0
}
2697
2698
static Node*
2699
node_drop_group(Node* group)
2700
0
{
2701
0
  Node* content;
2702
2703
0
  content = ND_CAR(group);
2704
0
  ND_CAR(group) = NULL_NODE;
2705
0
  onig_node_free(group);
2706
0
  return content;
2707
0
}
2708
2709
static int
2710
node_set_fail(Node* node)
2711
4.96k
{
2712
4.96k
  ND_SET_TYPE(node, ND_GIMMICK);
2713
4.96k
  GIMMICK_(node)->type = GIMMICK_FAIL;
2714
4.96k
  return ONIG_NORMAL;
2715
4.96k
}
2716
2717
static int
2718
node_new_fail(Node** node, ParseEnv* env)
2719
4.96k
{
2720
4.96k
  *node = node_new();
2721
4.96k
  CHECK_NULL_RETURN_MEMERR(*node);
2722
2723
4.96k
  return node_set_fail(*node);
2724
4.96k
}
2725
2726
extern int
2727
onig_node_reset_fail(Node* node)
2728
0
{
2729
0
  node_free_body(node);
2730
0
  return node_set_fail(node);
2731
0
}
2732
2733
static int
2734
node_new_save_gimmick(Node** node, enum SaveType save_type, ParseEnv* env)
2735
5.23k
{
2736
5.23k
  int id;
2737
2738
5.23k
  ID_ENTRY(env, id);
2739
2740
5.23k
  *node = node_new();
2741
5.23k
  CHECK_NULL_RETURN_MEMERR(*node);
2742
2743
5.23k
  ND_SET_TYPE(*node, ND_GIMMICK);
2744
5.23k
  GIMMICK_(*node)->id   = id;
2745
5.23k
  GIMMICK_(*node)->type = GIMMICK_SAVE;
2746
5.23k
  GIMMICK_(*node)->detail_type = (int )save_type;
2747
2748
5.23k
  return ONIG_NORMAL;
2749
5.23k
}
2750
2751
static int
2752
node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type,
2753
                            int id, ParseEnv* env)
2754
7.45k
{
2755
7.45k
  *node = node_new();
2756
7.45k
  CHECK_NULL_RETURN_MEMERR(*node);
2757
2758
7.45k
  ND_SET_TYPE(*node, ND_GIMMICK);
2759
7.45k
  GIMMICK_(*node)->id   = id;
2760
7.45k
  GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR;
2761
7.45k
  GIMMICK_(*node)->detail_type = (int )update_var_type;
2762
2763
7.45k
  return ONIG_NORMAL;
2764
7.45k
}
2765
2766
static int
2767
node_new_keep(Node** node, ParseEnv* env)
2768
223
{
2769
223
  int r;
2770
2771
223
  r = node_new_save_gimmick(node, SAVE_KEEP, env);
2772
223
  if (r != 0) return r;
2773
2774
223
  env->keep_num++;
2775
223
  return ONIG_NORMAL;
2776
223
}
2777
2778
#ifdef USE_CALLOUT
2779
2780
extern void
2781
onig_free_reg_callout_list(int n, CalloutListEntry* list)
2782
0
{
2783
0
  int i;
2784
0
  int j;
2785
2786
0
  if (IS_NULL(list)) return ;
2787
2788
0
  for (i = 0; i < n; i++) {
2789
0
    if (list[i].of == ONIG_CALLOUT_OF_NAME) {
2790
0
      for (j = 0; j < list[i].u.arg.passed_num; j++) {
2791
0
        if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) {
2792
0
          if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start))
2793
0
            xfree(list[i].u.arg.vals[j].s.start);
2794
0
        }
2795
0
      }
2796
0
    }
2797
0
    else { /* ONIG_CALLOUT_OF_CONTENTS */
2798
0
      if (IS_NOT_NULL(list[i].u.content.start)) {
2799
0
        xfree((void* )list[i].u.content.start);
2800
0
      }
2801
0
    }
2802
0
  }
2803
2804
0
  xfree(list);
2805
0
}
2806
2807
extern CalloutListEntry*
2808
onig_reg_callout_list_at(regex_t* reg, int num)
2809
0
{
2810
0
  RegexExt* ext = reg->extp;
2811
0
  CHECK_NULL_RETURN(ext);
2812
2813
0
  if (num <= 0 || num > ext->callout_num)
2814
0
    return 0;
2815
2816
0
  num--;
2817
0
  return ext->callout_list + num;
2818
0
}
2819
2820
static int
2821
reg_callout_list_entry(ParseEnv* env, int* rnum)
2822
0
{
2823
0
#define INIT_CALLOUT_LIST_NUM  3
2824
2825
0
  int num;
2826
0
  CalloutListEntry* list;
2827
0
  CalloutListEntry* e;
2828
0
  RegexExt* ext;
2829
2830
0
  ext = onig_get_regex_ext(env->reg);
2831
0
  CHECK_NULL_RETURN_MEMERR(ext);
2832
2833
0
  if (IS_NULL(ext->callout_list)) {
2834
0
    list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM);
2835
0
    CHECK_NULL_RETURN_MEMERR(list);
2836
2837
0
    ext->callout_list = list;
2838
0
    ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM;
2839
0
    ext->callout_num = 0;
2840
0
  }
2841
2842
0
  num = ext->callout_num + 1;
2843
0
  if (num > ext->callout_list_alloc) {
2844
0
    int alloc = ext->callout_list_alloc * 2;
2845
0
    list = (CalloutListEntry* )xrealloc(ext->callout_list,
2846
0
                                        sizeof(CalloutListEntry) * alloc);
2847
0
    CHECK_NULL_RETURN_MEMERR(list);
2848
2849
0
    ext->callout_list       = list;
2850
0
    ext->callout_list_alloc = alloc;
2851
0
  }
2852
2853
0
  e = ext->callout_list + (num - 1);
2854
2855
0
  e->flag             = 0;
2856
0
  e->of               = 0;
2857
0
  e->in               = ONIG_CALLOUT_OF_CONTENTS;
2858
0
  e->type             = 0;
2859
0
  e->tag_start        = 0;
2860
0
  e->tag_end          = 0;
2861
0
  e->start_func       = 0;
2862
0
  e->end_func         = 0;
2863
0
  e->u.arg.num        = 0;
2864
0
  e->u.arg.passed_num = 0;
2865
2866
0
  ext->callout_num = num;
2867
0
  *rnum = num;
2868
0
  return ONIG_NORMAL;
2869
0
}
2870
2871
static int
2872
node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
2873
                 ParseEnv* env)
2874
0
{
2875
0
  *node = node_new();
2876
0
  CHECK_NULL_RETURN_MEMERR(*node);
2877
2878
0
  ND_SET_TYPE(*node, ND_GIMMICK);
2879
0
  GIMMICK_(*node)->id          = id;
2880
0
  GIMMICK_(*node)->num         = num;
2881
0
  GIMMICK_(*node)->type        = GIMMICK_CALLOUT;
2882
0
  GIMMICK_(*node)->detail_type = (int )callout_of;
2883
2884
0
  return ONIG_NORMAL;
2885
0
}
2886
#endif
2887
2888
static int
2889
make_text_segment(Node** node, ParseEnv* env)
2890
2.74k
{
2891
2.74k
  int r;
2892
2.74k
  int i;
2893
2.74k
  Node* x;
2894
2.74k
  Node* ns[2];
2895
2896
  /* \X == (?>\O(?:\Y\O)*) */
2897
2898
2.74k
  ns[1] = NULL_NODE;
2899
2900
2.74k
  r = ONIGERR_MEMORY;
2901
2.74k
  ns[0] = node_new_anchor_with_options(ANCR_NO_TEXT_SEGMENT_BOUNDARY, env->options);
2902
2.74k
  if (IS_NULL(ns[0])) goto err;
2903
2904
2.74k
  r = node_new_true_anychar(&ns[1]);
2905
2.74k
  if (r != 0) goto err1;
2906
2907
2.74k
  x = make_list(2, ns);
2908
2.74k
  if (IS_NULL(x)) goto err;
2909
2.74k
  ns[0] = x;
2910
2.74k
  ns[1] = NULL_NODE;
2911
2912
2.74k
  x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
2913
2.74k
  if (IS_NULL(x)) goto err;
2914
2915
2.74k
  ND_BODY(x) = ns[0];
2916
2.74k
  ns[0] = NULL_NODE;
2917
2.74k
  ns[1] = x;
2918
2919
2.74k
  r = node_new_true_anychar(&ns[0]);
2920
2.74k
  if (r != 0) goto err1;
2921
2922
2.74k
  x = make_list(2, ns);
2923
2.74k
  if (IS_NULL(x)) goto err;
2924
2925
2.74k
  ns[0] = x;
2926
2.74k
  ns[1] = NULL_NODE;
2927
2928
2.74k
  x = node_new_bag(BAG_STOP_BACKTRACK);
2929
2.74k
  if (IS_NULL(x)) goto err;
2930
2931
2.74k
  ND_BODY(x) = ns[0];
2932
2933
2.74k
  *node = x;
2934
2.74k
  return ONIG_NORMAL;
2935
2936
0
 err:
2937
0
  r = ONIGERR_MEMORY;
2938
0
 err1:
2939
0
  for (i = 0; i < 2; i++) onig_node_free(ns[i]);
2940
0
  return r;
2941
0
}
2942
2943
static int
2944
make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
2945
                   Node* step_one, int lower, int upper, int possessive,
2946
                   int is_range_cutter, ParseEnv* env)
2947
2.48k
{
2948
2.48k
  int r;
2949
2.48k
  int i;
2950
2.48k
  int id;
2951
2.48k
  Node* x;
2952
2.48k
  Node* ns[4];
2953
2954
12.4k
  for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
2955
2956
2.48k
  ns[1] = absent;
2957
2.48k
  ns[3] = step_one; /* for err */
2958
2.48k
  r = node_new_save_gimmick(&ns[0], SAVE_S, env);
2959
2.48k
  if (r != 0) goto err;
2960
2961
2.48k
  id = GIMMICK_(ns[0])->id;
2962
2.48k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK,
2963
2.48k
                                  id, env);
2964
2.48k
  if (r != 0) goto err;
2965
2966
2.48k
  if (is_range_cutter != 0)
2967
39
    ND_STATUS_ADD(ns[2], ABSENT_WITH_SIDE_EFFECTS);
2968
2969
2.48k
  r = node_new_fail(&ns[3], env);
2970
2.48k
  if (r != 0) goto err;
2971
2972
2.48k
  x = make_list(4, ns);
2973
2.48k
  if (IS_NULL(x)) goto err0;
2974
2975
2.48k
  ns[0] = x;
2976
2.48k
  ns[1] = step_one;
2977
2.48k
  ns[2] = ns[3] = NULL_NODE;
2978
2979
2.48k
  x = make_alt(2, ns);
2980
2.48k
  if (IS_NULL(x)) goto err0;
2981
2982
2.48k
  ns[0] = x;
2983
2984
2.48k
  x = node_new_quantifier(lower, upper, FALSE);
2985
2.48k
  if (IS_NULL(x)) goto err0;
2986
2987
2.48k
  ND_BODY(x) = ns[0];
2988
2.48k
  ns[0] = x;
2989
2990
2.48k
  if (possessive != 0) {
2991
43
    x = node_new_bag(BAG_STOP_BACKTRACK);
2992
43
    if (IS_NULL(x)) goto err0;
2993
2994
43
    ND_BODY(x) = ns[0];
2995
43
    ns[0] = x;
2996
43
  }
2997
2998
2.48k
  r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
2999
2.48k
                                  pre_save_right_id, env);
3000
2.48k
  if (r != 0) goto err;
3001
3002
2.48k
  r = node_new_fail(&ns[2], env);
3003
2.48k
  if (r != 0) goto err;
3004
3005
2.48k
  x = make_list(2, ns + 1);
3006
2.48k
  if (IS_NULL(x)) goto err0;
3007
3008
2.48k
  ns[1] = x; ns[2] = NULL_NODE;
3009
3010
2.48k
  x = make_alt(2, ns);
3011
2.48k
  if (IS_NULL(x)) goto err0;
3012
3013
2.48k
  if (is_range_cutter != FALSE)
3014
39
    ND_STATUS_ADD(x, SUPER);
3015
3016
2.48k
  *node = x;
3017
2.48k
  return ONIG_NORMAL;
3018
3019
0
 err0:
3020
0
  r = ONIGERR_MEMORY;
3021
0
 err:
3022
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3023
0
  return r;
3024
0
}
3025
3026
static int
3027
make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
3028
                 ParseEnv* env)
3029
4
{
3030
4
  int r;
3031
4
  int id;
3032
4
  Node* save;
3033
4
  Node* x;
3034
4
  Node* ns[2];
3035
3036
4
  *node1 = *node2 = NULL_NODE;
3037
4
  save = ns[0] = ns[1] = NULL_NODE;
3038
3039
4
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3040
4
  if (r != 0) goto err;
3041
3042
4
  id = GIMMICK_(save)->id;
3043
4
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3044
4
                                  id, env);
3045
4
  if (r != 0) goto err;
3046
3047
4
  r = node_new_fail(&ns[1], env);
3048
4
  if (r != 0) goto err;
3049
3050
4
  x = make_list(2, ns);
3051
4
  if (IS_NULL(x)) goto err0;
3052
3053
4
  ns[0] = NULL_NODE; ns[1] = x;
3054
3055
4
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3056
4
                                  pre_save_right_id, env);
3057
4
  if (r != 0) goto err;
3058
3059
4
  x = make_alt(2, ns);
3060
4
  if (IS_NULL(x)) goto err0;
3061
3062
4
  *node1 = save;
3063
4
  *node2 = x;
3064
4
  return ONIG_NORMAL;
3065
3066
0
 err0:
3067
0
  r = ONIGERR_MEMORY;
3068
0
 err:
3069
0
  onig_node_free(save);
3070
0
  onig_node_free(ns[0]);
3071
0
  onig_node_free(ns[1]);
3072
0
  return r;
3073
0
}
3074
3075
static int
3076
make_range_clear(Node** node, ParseEnv* env)
3077
0
{
3078
0
  int r;
3079
0
  int id;
3080
0
  Node* save;
3081
0
  Node* x;
3082
0
  Node* ns[2];
3083
3084
0
  *node = NULL_NODE;
3085
0
  save = ns[0] = ns[1] = NULL_NODE;
3086
3087
0
  r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
3088
0
  if (r != 0) goto err;
3089
3090
0
  id = GIMMICK_(save)->id;
3091
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3092
0
                                  id, env);
3093
0
  if (r != 0) goto err;
3094
3095
0
  r = node_new_fail(&ns[1], env);
3096
0
  if (r != 0) goto err;
3097
3098
0
  x = make_list(2, ns);
3099
0
  if (IS_NULL(x)) goto err0;
3100
3101
0
  ns[0] = NULL_NODE; ns[1] = x;
3102
3103
0
#define ID_NOT_USED_DONT_CARE_ME   0
3104
3105
0
  r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
3106
0
                                  ID_NOT_USED_DONT_CARE_ME, env);
3107
0
  if (r != 0) goto err;
3108
0
  ND_STATUS_ADD(ns[0], ABSENT_WITH_SIDE_EFFECTS);
3109
3110
0
  x = make_alt(2, ns);
3111
0
  if (IS_NULL(x)) goto err0;
3112
3113
0
  ND_STATUS_ADD(x, SUPER);
3114
3115
0
  ns[0] = save;
3116
0
  ns[1] = x;
3117
0
  save = NULL_NODE;
3118
0
  x = make_list(2, ns);
3119
0
  if (IS_NULL(x)) goto err0;
3120
3121
0
  *node = x;
3122
0
  return ONIG_NORMAL;
3123
3124
0
 err0:
3125
0
  r = ONIGERR_MEMORY;
3126
0
 err:
3127
0
  onig_node_free(save);
3128
0
  onig_node_free(ns[0]);
3129
0
  onig_node_free(ns[1]);
3130
0
  return r;
3131
0
}
3132
3133
static int
3134
is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
3135
                          int* is_possessive, ParseEnv* env)
3136
4
{
3137
4
  Node* quant;
3138
4
  Node* body;
3139
3140
4
  *rquant = *rbody = 0;
3141
4
  *is_possessive = 0;
3142
3143
4
  if (ND_TYPE(node) == ND_QUANT) {
3144
0
    quant = node;
3145
0
  }
3146
4
  else {
3147
4
    if (ND_TYPE(node) == ND_BAG) {
3148
0
      BagNode* en = BAG_(node);
3149
0
      if (en->type == BAG_STOP_BACKTRACK) {
3150
0
        *is_possessive = 1;
3151
0
        quant = ND_BAG_BODY(en);
3152
0
        if (ND_TYPE(quant) != ND_QUANT)
3153
0
          return 0;
3154
0
      }
3155
0
      else
3156
0
        return 0;
3157
0
    }
3158
4
    else
3159
4
      return 0;
3160
4
  }
3161
3162
0
  if (QUANT_(quant)->greedy == 0)
3163
0
    return 0;
3164
3165
0
  body = ND_BODY(quant);
3166
0
  switch (ND_TYPE(body)) {
3167
0
  case ND_STRING:
3168
0
    {
3169
0
      int len;
3170
0
      StrNode* sn = STR_(body);
3171
0
      UChar *s = sn->s;
3172
3173
0
      len = 0;
3174
0
      while (s < sn->end) {
3175
0
        s += enclen(env->enc, s);
3176
0
        len++;
3177
0
      }
3178
0
      if (len != 1)
3179
0
        return 0;
3180
0
    }
3181
3182
0
  case ND_CCLASS:
3183
0
    break;
3184
3185
0
  default:
3186
0
    return 0;
3187
0
    break;
3188
0
  }
3189
3190
0
  if (node != quant) {
3191
0
    ND_BODY(node) = 0;
3192
0
    onig_node_free(node);
3193
0
  }
3194
0
  ND_BODY(quant) = NULL_NODE;
3195
0
  *rquant = quant;
3196
0
  *rbody  = body;
3197
0
  return 1;
3198
0
}
3199
3200
static int
3201
make_absent_tree_for_simple_one_char_repeat(Node** node,
3202
  Node* absent, Node* quant, Node* body, int possessive, ParseEnv* env)
3203
2.43k
{
3204
2.43k
  int r;
3205
2.43k
  int i;
3206
2.43k
  int id1;
3207
2.43k
  int lower, upper;
3208
2.43k
  Node* x;
3209
2.43k
  Node* ns[4];
3210
3211
2.43k
  *node = NULL_NODE;
3212
2.43k
  r = ONIGERR_MEMORY;
3213
2.43k
  ns[0] = ns[1] = NULL_NODE;
3214
2.43k
  ns[2] = body, ns[3] = absent;
3215
3216
2.43k
  lower = QUANT_(quant)->lower;
3217
2.43k
  upper = QUANT_(quant)->upper;
3218
3219
2.43k
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3220
2.43k
  if (r != 0) goto err;
3221
3222
2.43k
  id1 = GIMMICK_(ns[0])->id;
3223
3224
2.43k
  r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
3225
2.43k
                         FALSE, env);
3226
2.43k
  if (r != 0) goto err;
3227
3228
2.43k
  ns[2] = ns[3] = NULL_NODE;
3229
3230
2.43k
  r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
3231
2.43k
                                  id1, env);
3232
2.43k
  if (r != 0) goto err;
3233
3234
2.43k
  x = make_list(3, ns);
3235
2.43k
  if (IS_NULL(x)) goto err0;
3236
3237
2.43k
  *node = x;
3238
2.43k
  return ONIG_NORMAL;
3239
3240
0
 err0:
3241
0
  r = ONIGERR_MEMORY;
3242
0
 err:
3243
0
  for (i = 0; i < 4; i++) onig_node_free(ns[i]);
3244
0
  return r;
3245
0
}
3246
3247
static int
3248
make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
3249
                 ParseEnv* env)
3250
2.48k
{
3251
2.48k
  int r;
3252
2.48k
  int i;
3253
2.48k
  int id1, id2;
3254
2.48k
  int possessive;
3255
2.48k
  Node* x;
3256
2.48k
  Node* ns[7];
3257
3258
2.48k
  r = ONIGERR_MEMORY;
3259
19.8k
  for (i = 0; i < 7; i++) ns[i] = NULL_NODE;
3260
2.48k
  ns[4] = expr; ns[5] = absent;
3261
3262
2.48k
  if (is_range_cutter == 0) {
3263
2.44k
    Node* quant;
3264
2.44k
    Node* body;
3265
3266
2.44k
    if (expr == NULL_NODE) {
3267
      /* default expr \O* */
3268
2.43k
      quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
3269
2.43k
      if (IS_NULL(quant)) goto err0;
3270
3271
2.43k
      r = node_new_true_anychar(&body);
3272
2.43k
      if (r != 0) {
3273
0
        onig_node_free(quant);
3274
0
        goto err;
3275
0
      }
3276
2.43k
      possessive = 0;
3277
2.43k
      goto simple;
3278
2.43k
    }
3279
4
    else {
3280
4
      if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) {
3281
2.43k
      simple:
3282
2.43k
        r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
3283
2.43k
                                                        body, possessive, env);
3284
2.43k
        onig_node_free(quant);
3285
2.43k
        if (r != 0) {
3286
0
          ns[4] = NULL_NODE;
3287
0
          onig_node_free(body);
3288
0
          goto err;
3289
0
        }
3290
3291
2.43k
        return ONIG_NORMAL;
3292
2.43k
      }
3293
4
    }
3294
2.44k
  }
3295
3296
43
  r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
3297
43
  if (r != 0) goto err;
3298
3299
43
  id1 = GIMMICK_(ns[0])->id;
3300
3301
43
  r = node_new_save_gimmick(&ns[1], SAVE_S, env);
3302
43
  if (r != 0) goto err;
3303
3304
43
  id2 = GIMMICK_(ns[1])->id;
3305
3306
43
  r = node_new_true_anychar(&ns[3]);
3307
43
  if (r != 0) goto err;
3308
3309
43
  possessive = 1;
3310
43
  r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
3311
43
                         possessive, is_range_cutter, env);
3312
43
  if (r != 0) goto err;
3313
3314
43
  ns[3] = NULL_NODE;
3315
43
  ns[5] = NULL_NODE;
3316
3317
43
  r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env);
3318
43
  if (r != 0) goto err;
3319
3320
43
  if (is_range_cutter != 0) {
3321
39
    x = make_list(4, ns);
3322
39
    if (IS_NULL(x)) goto err0;
3323
39
  }
3324
4
  else {
3325
4
    r = make_absent_tail(&ns[5], &ns[6], id1, env);
3326
4
    if (r != 0) goto err;
3327
3328
4
    x = make_list(7, ns);
3329
4
    if (IS_NULL(x)) goto err0;
3330
4
  }
3331
3332
43
  *node = x;
3333
43
  return ONIG_NORMAL;
3334
3335
0
 err0:
3336
0
  r = ONIGERR_MEMORY;
3337
0
 err:
3338
0
  for (i = 0; i < 7; i++) onig_node_free(ns[i]);
3339
0
  return r;
3340
0
}
3341
3342
extern int
3343
onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
3344
80.1M
{
3345
80.1M
  int addlen = (int )(end - s);
3346
3347
80.1M
  if (addlen > 0) {
3348
79.8M
    int len  = (int )(STR_(node)->end - STR_(node)->s);
3349
3350
79.8M
    if (STR_(node)->capacity > 0 || (len + addlen > ND_STRING_BUF_SIZE - 1)) {
3351
47.4M
      UChar* p;
3352
47.4M
      int capa = len + addlen + ND_STRING_MARGIN;
3353
3354
47.4M
      if (capa <= STR_(node)->capacity) {
3355
0
        onig_strcpy(STR_(node)->s + len, s, end);
3356
0
      }
3357
47.4M
      else {
3358
47.4M
        if (STR_(node)->s == STR_(node)->buf)
3359
593k
          p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
3360
593k
                                      s, end, capa);
3361
46.8M
        else
3362
46.8M
          p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
3363
3364
47.4M
        CHECK_NULL_RETURN_MEMERR(p);
3365
47.4M
        STR_(node)->s        = p;
3366
47.4M
        STR_(node)->capacity = capa;
3367
47.4M
      }
3368
47.4M
    }
3369
32.3M
    else {
3370
32.3M
      onig_strcpy(STR_(node)->s + len, s, end);
3371
32.3M
    }
3372
79.8M
    STR_(node)->end = STR_(node)->s + len + addlen;
3373
79.8M
  }
3374
3375
80.1M
  return 0;
3376
80.1M
}
3377
3378
extern int
3379
onig_node_str_set(Node* node, const UChar* s, const UChar* end, int need_free)
3380
0
{
3381
0
  onig_node_str_clear(node, need_free);
3382
0
  return onig_node_str_cat(node, s, end);
3383
0
}
3384
3385
static int
3386
node_str_cat_char(Node* node, UChar c)
3387
0
{
3388
0
  UChar s[1];
3389
3390
0
  s[0] = c;
3391
0
  return onig_node_str_cat(node, s, s + 1);
3392
0
}
3393
3394
extern void
3395
onig_node_str_clear(Node* node, int need_free)
3396
0
{
3397
0
  if (need_free != 0 &&
3398
0
      STR_(node)->capacity != 0 &&
3399
0
      IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
3400
0
    xfree(STR_(node)->s);
3401
0
  }
3402
3403
0
  STR_(node)->flag     = 0;
3404
0
  STR_(node)->s        = STR_(node)->buf;
3405
0
  STR_(node)->end      = STR_(node)->buf;
3406
0
  STR_(node)->capacity = 0;
3407
0
}
3408
3409
static int
3410
node_set_str(Node* node, const UChar* s, const UChar* end)
3411
8.26M
{
3412
8.26M
  int r;
3413
3414
8.26M
  ND_SET_TYPE(node, ND_STRING);
3415
8.26M
  STR_(node)->flag     = 0;
3416
8.26M
  STR_(node)->s        = STR_(node)->buf;
3417
8.26M
  STR_(node)->end      = STR_(node)->buf;
3418
8.26M
  STR_(node)->capacity = 0;
3419
3420
8.26M
  r = onig_node_str_cat(node, s, end);
3421
8.26M
  return r;
3422
8.26M
}
3423
3424
static Node*
3425
node_new_str(const UChar* s, const UChar* end)
3426
8.26M
{
3427
8.26M
  int r;
3428
8.26M
  Node* node = node_new();
3429
8.26M
  CHECK_NULL_RETURN(node);
3430
3431
8.26M
  r = node_set_str(node, s, end);
3432
8.26M
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return NULL;
3435
0
  }
3436
3437
8.26M
  return node;
3438
8.26M
}
3439
3440
static int
3441
node_reset_str(Node* node, const UChar* s, const UChar* end)
3442
0
{
3443
0
  node_free_body(node);
3444
0
  return node_set_str(node, s, end);
3445
0
}
3446
3447
extern int
3448
onig_node_reset_empty(Node* node)
3449
0
{
3450
0
  return node_reset_str(node, NULL, NULL);
3451
0
}
3452
3453
extern Node*
3454
onig_node_new_str(const UChar* s, const UChar* end)
3455
622k
{
3456
622k
  return node_new_str(s, end);
3457
622k
}
3458
3459
static Node*
3460
node_new_str_with_options(const UChar* s, const UChar* end,
3461
                          OnigOptionType options)
3462
5.87M
{
3463
5.87M
  Node* node;
3464
5.87M
  node = node_new_str(s, end);
3465
3466
5.87M
  if (OPTON_IGNORECASE(options))
3467
967k
    ND_STATUS_ADD(node, IGNORECASE);
3468
3469
5.87M
  return node;
3470
5.87M
}
3471
3472
static Node*
3473
node_new_str_crude(UChar* s, UChar* end, OnigOptionType options)
3474
4.69k
{
3475
4.69k
  Node* node = node_new_str_with_options(s, end, options);
3476
4.69k
  CHECK_NULL_RETURN(node);
3477
4.69k
  ND_STRING_SET_CRUDE(node);
3478
4.69k
  return node;
3479
4.69k
}
3480
3481
static Node*
3482
node_new_empty(void)
3483
108k
{
3484
108k
  return node_new_str(NULL, NULL);
3485
108k
}
3486
3487
static Node*
3488
node_new_str_crude_char(UChar c, OnigOptionType options)
3489
4.04k
{
3490
4.04k
  int i;
3491
4.04k
  UChar p[1];
3492
4.04k
  Node* node;
3493
3494
4.04k
  p[0] = c;
3495
4.04k
  node = node_new_str_crude(p, p + 1, options);
3496
4.04k
  CHECK_NULL_RETURN(node);
3497
3498
  /* clear buf tail */
3499
97.1k
  for (i = 1; i < ND_STRING_BUF_SIZE; i++)
3500
93.1k
    STR_(node)->buf[i] = '\0';
3501
3502
4.04k
  return node;
3503
4.04k
}
3504
3505
static Node*
3506
str_node_split_last_char(Node* node, OnigEncoding enc)
3507
1.45M
{
3508
1.45M
  const UChar *p;
3509
1.45M
  Node* rn;
3510
1.45M
  StrNode* sn;
3511
3512
1.45M
  sn = STR_(node);
3513
1.45M
  rn = NULL_NODE;
3514
1.45M
  if (sn->end > sn->s) {
3515
1.45M
    p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
3516
1.45M
    if (p && p > sn->s) { /* can be split. */
3517
1.45M
      rn = node_new_str(p, sn->end);
3518
1.45M
      CHECK_NULL_RETURN(rn);
3519
3520
1.45M
      sn->end = (UChar* )p;
3521
1.45M
      STR_(rn)->flag = sn->flag;
3522
1.45M
      ND_STATUS(rn) = ND_STATUS(node);
3523
1.45M
    }
3524
1.45M
  }
3525
3526
1.45M
  return rn;
3527
1.45M
}
3528
3529
static int
3530
str_node_can_be_split(Node* node, OnigEncoding enc)
3531
1.57M
{
3532
1.57M
  StrNode* sn = STR_(node);
3533
1.57M
  if (sn->end > sn->s) {
3534
1.57M
    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
3535
1.57M
  }
3536
0
  return 0;
3537
1.57M
}
3538
3539
static int
3540
scan_number(UChar** src, const UChar* end, OnigEncoding enc)
3541
980k
{
3542
980k
  int num, val;
3543
980k
  OnigCodePoint c;
3544
980k
  UChar* p;
3545
980k
  PFETCH_READY;
3546
3547
980k
  p = *src;
3548
980k
  num = 0;
3549
1.15M
  while (! PEND) {
3550
1.12M
    PFETCH(c);
3551
1.12M
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
3552
176k
      val = (int )DIGITVAL(c);
3553
176k
      if ((ONIG_INT_MAX - val) / 10 < num)
3554
1.61k
        return -1;  /* overflow */
3555
3556
174k
      num = num * 10 + val;
3557
174k
    }
3558
951k
    else {
3559
951k
      PUNFETCH;
3560
951k
      break;
3561
951k
    }
3562
1.12M
  }
3563
978k
  *src = p;
3564
978k
  return num;
3565
980k
}
3566
3567
static int
3568
scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
3569
                        OnigEncoding enc, OnigCodePoint* rcode)
3570
641
{
3571
641
  OnigCodePoint code;
3572
641
  OnigCodePoint c;
3573
641
  unsigned int val;
3574
641
  int n;
3575
641
  UChar* p;
3576
641
  PFETCH_READY;
3577
3578
641
  p = *src;
3579
641
  code = 0;
3580
641
  n = 0;
3581
1.13k
  while (! PEND && n < maxlen) {
3582
1.06k
    PFETCH(c);
3583
1.06k
    if (IS_CODE_XDIGIT_ASCII(enc, c)) {
3584
491
      n++;
3585
491
      val = (unsigned int )XDIGITVAL(enc, c);
3586
491
      if ((UINT_MAX - val) / 16UL < code)
3587
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3588
3589
491
      code = (code << 4) + val;
3590
491
    }
3591
573
    else {
3592
573
      PUNFETCH;
3593
573
      break;
3594
573
    }
3595
1.06k
  }
3596
3597
641
  if (n < minlen)
3598
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3599
3600
641
  *rcode = code;
3601
641
  *src = p;
3602
641
  return ONIG_NORMAL;
3603
641
}
3604
3605
static int
3606
scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
3607
                  OnigEncoding enc, OnigCodePoint* rcode)
3608
5.02k
{
3609
5.02k
  OnigCodePoint code;
3610
5.02k
  OnigCodePoint c;
3611
5.02k
  unsigned int val;
3612
5.02k
  int n;
3613
5.02k
  UChar* p;
3614
5.02k
  PFETCH_READY;
3615
3616
5.02k
  p = *src;
3617
5.02k
  code = 0;
3618
5.02k
  n = 0;
3619
11.5k
  while (! PEND && n < maxlen) {
3620
10.1k
    PFETCH(c);
3621
10.1k
    if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
3622
6.55k
      n++;
3623
6.55k
      val = (unsigned int )ODIGITVAL(c);
3624
6.55k
      if ((UINT_MAX - val) / 8UL < code)
3625
0
        return ONIGERR_TOO_BIG_NUMBER; /* overflow */
3626
3627
6.55k
      code = (code << 3) + val;
3628
6.55k
    }
3629
3.60k
    else {
3630
3.60k
      PUNFETCH;
3631
3.60k
      break;
3632
3.60k
    }
3633
10.1k
  }
3634
3635
5.02k
  if (n < minlen)
3636
0
    return ONIGERR_INVALID_CODE_POINT_VALUE;
3637
3638
5.02k
  *rcode = code;
3639
5.02k
  *src = p;
3640
5.02k
  return ONIG_NORMAL;
3641
5.02k
}
3642
3643
static int
3644
scan_number_of_base(UChar** src, UChar* end, int minlen,
3645
                    OnigEncoding enc, OnigCodePoint* rcode, int base)
3646
0
{
3647
0
  int r;
3648
3649
0
  if (base == 16)
3650
0
    r = scan_hexadecimal_number(src, end, minlen, 8, enc, rcode);
3651
0
  else if (base == 8)
3652
0
    r = scan_octal_number(src, end, minlen, 11, enc, rcode);
3653
0
  else
3654
0
    r = ONIGERR_INVALID_CODE_POINT_VALUE;
3655
3656
0
  return r;
3657
0
}
3658
3659
0
#define IS_CODE_POINT_DIVIDE(c)  ((c) == ' ' || (c) == '\n')
3660
3661
enum CPS_STATE {
3662
  CPS_EMPTY = 0,
3663
  CPS_START = 1,
3664
  CPS_RANGE = 2
3665
};
3666
3667
static int
3668
check_code_point_sequence_cc(UChar* p, UChar* end, int base,
3669
                             OnigEncoding enc, int state)
3670
0
{
3671
0
  int r;
3672
0
  int n;
3673
0
  int end_digit;
3674
0
  OnigCodePoint code;
3675
0
  OnigCodePoint c;
3676
0
  PFETCH_READY;
3677
3678
0
  end_digit = FALSE;
3679
0
  n = 0;
3680
0
  while (! PEND) {
3681
0
  start:
3682
0
    PFETCH(c);
3683
0
    if (c == '}') {
3684
0
    end_char:
3685
0
      if (state == CPS_RANGE) return ONIGERR_INVALID_CODE_POINT_VALUE;
3686
0
      return n;
3687
0
    }
3688
3689
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3690
0
      while (! PEND) {
3691
0
        PFETCH(c);
3692
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3693
0
      }
3694
0
      if (IS_CODE_POINT_DIVIDE(c))
3695
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3696
0
    }
3697
0
    else if (c == '-') {
3698
0
    range:
3699
0
      if (state != CPS_START) return ONIGERR_INVALID_CODE_POINT_VALUE;
3700
0
      if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
3701
0
      end_digit = FALSE;
3702
0
      state = CPS_RANGE;
3703
0
      goto start;
3704
0
    }
3705
0
    else if (end_digit == TRUE) {
3706
0
      if (base == 16) {
3707
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3708
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3709
0
      }
3710
0
      else if (base == 8) {
3711
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3712
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3713
0
      }
3714
3715
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3716
0
    }
3717
3718
0
    if (c == '}') goto end_char;
3719
0
    if (c == '-') goto range;
3720
3721
0
    PUNFETCH;
3722
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3723
0
    if (r != 0) return r;
3724
0
    n++;
3725
0
    end_digit = TRUE;
3726
0
    state = (state == CPS_RANGE) ? CPS_EMPTY : CPS_START;
3727
0
  }
3728
3729
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3730
0
}
3731
3732
static int
3733
check_code_point_sequence(UChar* p, UChar* end, int base, OnigEncoding enc)
3734
0
{
3735
0
  int r;
3736
0
  int n;
3737
0
  int end_digit;
3738
0
  OnigCodePoint code;
3739
0
  OnigCodePoint c;
3740
0
  PFETCH_READY;
3741
3742
0
  end_digit = FALSE;
3743
0
  n = 0;
3744
0
  while (! PEND) {
3745
0
    PFETCH(c);
3746
0
    if (c == '}') {
3747
0
    end_char:
3748
0
      return n;
3749
0
    }
3750
3751
0
    if (IS_CODE_POINT_DIVIDE(c)) {
3752
0
      while (! PEND) {
3753
0
        PFETCH(c);
3754
0
        if (! IS_CODE_POINT_DIVIDE(c)) break;
3755
0
      }
3756
0
      if (IS_CODE_POINT_DIVIDE(c))
3757
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3758
0
    }
3759
0
    else if (end_digit == TRUE) {
3760
0
      if (base == 16) {
3761
0
        if (IS_CODE_XDIGIT_ASCII(enc, c))
3762
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3763
0
      }
3764
0
      else if (base == 8) {
3765
0
        if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8')
3766
0
          return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
3767
0
      }
3768
3769
0
      return ONIGERR_INVALID_CODE_POINT_VALUE;
3770
0
    }
3771
3772
0
    if (c == '}') goto end_char;
3773
3774
0
    PUNFETCH;
3775
0
    r = scan_number_of_base(&p, end, 1, enc, &code, base);
3776
0
    if (r != 0) return r;
3777
0
    n++;
3778
0
    end_digit = TRUE;
3779
0
  }
3780
3781
0
  return ONIGERR_INVALID_CODE_POINT_VALUE;
3782
0
}
3783
3784
static int
3785
get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_cc, OnigCodePoint* rcode)
3786
0
{
3787
0
  int r;
3788
0
  OnigCodePoint c;
3789
0
  UChar* p;
3790
0
  PFETCH_READY;
3791
3792
0
  p = *src;
3793
0
  while (! PEND) {
3794
0
    PFETCH(c);
3795
0
    if (! IS_CODE_POINT_DIVIDE(c)) {
3796
0
      if (c == '}') {
3797
0
        *src = p;
3798
0
        return 1; /* end of sequence */
3799
0
      }
3800
0
      else if (c == '-' && in_cc == TRUE) {
3801
0
        *src = p;
3802
0
        return 2; /* range */
3803
0
      }
3804
0
      PUNFETCH;
3805
0
      break;
3806
0
    }
3807
0
    else {
3808
0
      if (PEND)
3809
0
        return ONIGERR_INVALID_CODE_POINT_VALUE;
3810
0
    }
3811
0
  }
3812
3813
0
  r = scan_number_of_base(&p, end, 1, enc, rcode, base);
3814
0
  if (r != 0) return r;
3815
3816
0
  *src = p;
3817
0
  return ONIG_NORMAL;
3818
0
}
3819
3820
3821
#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
3822
71.0M
    BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
3823
3824
/* data format:
3825
     [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
3826
     (all data size is OnigCodePoint)
3827
 */
3828
static int
3829
new_code_range(BBuf** pbuf)
3830
1.21M
{
3831
1.21M
#define INIT_MULTI_BYTE_RANGE_SIZE  (SIZE_CODE_POINT * 5)
3832
1.21M
  int r;
3833
1.21M
  OnigCodePoint n;
3834
1.21M
  BBuf* bbuf;
3835
3836
1.21M
  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
3837
1.21M
  CHECK_NULL_RETURN_MEMERR(bbuf);
3838
1.21M
  r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
3839
1.21M
  if (r != 0) {
3840
0
    xfree(bbuf);
3841
0
    *pbuf = 0;
3842
0
    return r;
3843
0
  }
3844
3845
1.21M
  n = 0;
3846
1.21M
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3847
1.21M
  return 0;
3848
1.21M
}
3849
3850
static int
3851
add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
3852
23.2M
{
3853
23.2M
  int r, inc_n, pos;
3854
23.2M
  int low, high, bound, x;
3855
23.2M
  OnigCodePoint n, *data;
3856
23.2M
  BBuf* bbuf;
3857
3858
23.2M
  if (from > to) {
3859
0
    n = from; from = to; to = n;
3860
0
  }
3861
3862
23.2M
  if (IS_NULL(*pbuf)) {
3863
1.21M
    r = new_code_range(pbuf);
3864
1.21M
    if (r != 0) return r;
3865
1.21M
    bbuf = *pbuf;
3866
1.21M
    n = 0;
3867
1.21M
  }
3868
22.0M
  else {
3869
22.0M
    bbuf = *pbuf;
3870
22.0M
    GET_CODE_POINT(n, bbuf->p);
3871
22.0M
  }
3872
23.2M
  data = (OnigCodePoint* )(bbuf->p);
3873
23.2M
  data++;
3874
3875
57.7M
  for (low = 0, bound = n; low < bound; ) {
3876
34.4M
    x = (low + bound) >> 1;
3877
34.4M
    if (from > data[x*2 + 1])
3878
10.4M
      low = x + 1;
3879
24.0M
    else
3880
24.0M
      bound = x;
3881
34.4M
  }
3882
3883
23.2M
  high = (to == ~((OnigCodePoint )0)) ? n : low;
3884
50.3M
  for (bound = n; high < bound; ) {
3885
27.0M
    x = (high + bound) >> 1;
3886
27.0M
    if (to + 1 >= data[x*2])
3887
19.3M
      high = x + 1;
3888
7.77M
    else
3889
7.77M
      bound = x;
3890
27.0M
  }
3891
3892
23.2M
  inc_n = low + 1 - high;
3893
23.2M
  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
3894
0
    return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
3895
3896
23.2M
  if (inc_n != 1) {
3897
19.3M
    if (from > data[low*2])
3898
17.6M
      from = data[low*2];
3899
19.3M
    if (to < data[(high - 1)*2 + 1])
3900
17.0M
      to = data[(high - 1)*2 + 1];
3901
19.3M
  }
3902
3903
23.2M
  if (inc_n != 0 && (OnigCodePoint )high < n) {
3904
954k
    int from_pos = SIZE_CODE_POINT * (1 + high * 2);
3905
954k
    int to_pos   = SIZE_CODE_POINT * (1 + (low + 1) * 2);
3906
954k
    int size = (n - high) * 2 * SIZE_CODE_POINT;
3907
3908
954k
    if (inc_n > 0) {
3909
952k
      BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
3910
952k
    }
3911
1.67k
    else {
3912
1.67k
      BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
3913
1.67k
    }
3914
954k
  }
3915
3916
23.2M
  pos = SIZE_CODE_POINT * (1 + low * 2);
3917
23.2M
  BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
3918
23.2M
  BB_WRITE_CODE_POINT(bbuf, pos, from);
3919
23.2M
  BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
3920
23.2M
  n += inc_n;
3921
23.2M
  BB_WRITE_CODE_POINT(bbuf, 0, n);
3922
3923
23.2M
  return 0;
3924
23.2M
}
3925
3926
static int
3927
add_code_range(BBuf** pbuf, ParseEnv* env, OnigCodePoint from, OnigCodePoint to)
3928
1.92M
{
3929
1.92M
  if (from > to) {
3930
13
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
3931
0
      return 0;
3932
13
    else
3933
13
      return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
3934
13
  }
3935
3936
1.92M
  return add_code_range_to_buf(pbuf, from, to);
3937
1.92M
}
3938
3939
static int
3940
not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
3941
0
{
3942
0
  int r, i, n;
3943
0
  OnigCodePoint pre, from, *data, to = 0;
3944
3945
0
  *pbuf = (BBuf* )NULL;
3946
0
  if (IS_NULL(bbuf)) {
3947
0
  set_all:
3948
0
    return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3949
0
  }
3950
3951
0
  data = (OnigCodePoint* )(bbuf->p);
3952
0
  GET_CODE_POINT(n, data);
3953
0
  data++;
3954
0
  if (n <= 0) goto set_all;
3955
3956
0
  r = 0;
3957
0
  pre = MBCODE_START_POS(enc);
3958
0
  for (i = 0; i < n; i++) {
3959
0
    from = data[i*2];
3960
0
    to   = data[i*2+1];
3961
0
    if (pre <= from - 1) {
3962
0
      r = add_code_range_to_buf(pbuf, pre, from - 1);
3963
0
      if (r != 0) {
3964
0
        bbuf_free(*pbuf);
3965
0
        return r;
3966
0
      }
3967
0
    }
3968
0
    if (to == ~((OnigCodePoint )0)) break;
3969
0
    pre = to + 1;
3970
0
  }
3971
0
  if (to < ~((OnigCodePoint )0)) {
3972
0
    r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
3973
0
    if (r != 0) bbuf_free(*pbuf);
3974
0
  }
3975
0
  return r;
3976
0
}
3977
3978
0
#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
3979
0
  BBuf *tbuf; \
3980
0
  int  tnot; \
3981
0
  tnot = not1;  not1  = not2;  not2  = tnot; \
3982
0
  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
3983
0
} while (0)
3984
3985
static int
3986
or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
3987
                  BBuf* bbuf2, int not2, BBuf** pbuf)
3988
0
{
3989
0
  int r;
3990
0
  OnigCodePoint i, n1, *data1;
3991
0
  OnigCodePoint from, to;
3992
3993
0
  *pbuf = (BBuf* )NULL;
3994
0
  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
3995
0
    if (not1 != 0 || not2 != 0)
3996
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
3997
0
    return 0;
3998
0
  }
3999
4000
0
  r = 0;
4001
0
  if (IS_NULL(bbuf2))
4002
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4003
4004
0
  if (IS_NULL(bbuf1)) {
4005
0
    if (not1 != 0) {
4006
0
      return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
4007
0
    }
4008
0
    else {
4009
0
      if (not2 == 0) {
4010
0
        return bbuf_clone(pbuf, bbuf2);
4011
0
      }
4012
0
      else {
4013
0
        return not_code_range_buf(enc, bbuf2, pbuf);
4014
0
      }
4015
0
    }
4016
0
  }
4017
4018
0
  if (not1 != 0)
4019
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4020
4021
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4022
0
  GET_CODE_POINT(n1, data1);
4023
0
  data1++;
4024
4025
0
  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
4026
0
    r = bbuf_clone(pbuf, bbuf2);
4027
0
  }
4028
0
  else if (not1 == 0) { /* 1 OR (not 2) */
4029
0
    r = not_code_range_buf(enc, bbuf2, pbuf);
4030
0
  }
4031
0
  if (r != 0) return r;
4032
4033
0
  for (i = 0; i < n1; i++) {
4034
0
    from = data1[i*2];
4035
0
    to   = data1[i*2+1];
4036
0
    r = add_code_range_to_buf(pbuf, from, to);
4037
0
    if (r != 0) return r;
4038
0
  }
4039
0
  return 0;
4040
0
}
4041
4042
static int
4043
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
4044
                OnigCodePoint* data, int n)
4045
0
{
4046
0
  int i, r;
4047
0
  OnigCodePoint from2, to2;
4048
4049
0
  for (i = 0; i < n; i++) {
4050
0
    from2 = data[i*2];
4051
0
    to2   = data[i*2+1];
4052
0
    if (from2 < from1) {
4053
0
      if (to2 < from1) continue;
4054
0
      else {
4055
0
        from1 = to2 + 1;
4056
0
      }
4057
0
    }
4058
0
    else if (from2 <= to1) {
4059
0
      if (to2 < to1) {
4060
0
        if (from1 <= from2 - 1) {
4061
0
          r = add_code_range_to_buf(pbuf, from1, from2-1);
4062
0
          if (r != 0) return r;
4063
0
        }
4064
0
        from1 = to2 + 1;
4065
0
      }
4066
0
      else {
4067
0
        to1 = from2 - 1;
4068
0
      }
4069
0
    }
4070
0
    else {
4071
0
      from1 = from2;
4072
0
    }
4073
0
    if (from1 > to1) break;
4074
0
  }
4075
0
  if (from1 <= to1) {
4076
0
    r = add_code_range_to_buf(pbuf, from1, to1);
4077
0
    if (r != 0) return r;
4078
0
  }
4079
0
  return 0;
4080
0
}
4081
4082
static int
4083
and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
4084
0
{
4085
0
  int r;
4086
0
  OnigCodePoint i, j, n1, n2, *data1, *data2;
4087
0
  OnigCodePoint from, to, from1, to1, from2, to2;
4088
4089
0
  *pbuf = (BBuf* )NULL;
4090
0
  if (IS_NULL(bbuf1)) {
4091
0
    if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
4092
0
      return bbuf_clone(pbuf, bbuf2);
4093
0
    return 0;
4094
0
  }
4095
0
  else if (IS_NULL(bbuf2)) {
4096
0
    if (not2 != 0)
4097
0
      return bbuf_clone(pbuf, bbuf1);
4098
0
    return 0;
4099
0
  }
4100
4101
0
  if (not1 != 0)
4102
0
    SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
4103
4104
0
  data1 = (OnigCodePoint* )(bbuf1->p);
4105
0
  data2 = (OnigCodePoint* )(bbuf2->p);
4106
0
  GET_CODE_POINT(n1, data1);
4107
0
  GET_CODE_POINT(n2, data2);
4108
0
  data1++;
4109
0
  data2++;
4110
4111
0
  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
4112
0
    for (i = 0; i < n1; i++) {
4113
0
      from1 = data1[i*2];
4114
0
      to1   = data1[i*2+1];
4115
0
      for (j = 0; j < n2; j++) {
4116
0
        from2 = data2[j*2];
4117
0
        to2   = data2[j*2+1];
4118
0
        if (from2 > to1) break;
4119
0
        if (to2 < from1) continue;
4120
0
        from = MAX(from1, from2);
4121
0
        to   = MIN(to1, to2);
4122
0
        r = add_code_range_to_buf(pbuf, from, to);
4123
0
        if (r != 0) return r;
4124
0
      }
4125
0
    }
4126
0
  }
4127
0
  else if (not1 == 0) { /* 1 AND (not 2) */
4128
0
    for (i = 0; i < n1; i++) {
4129
0
      from1 = data1[i*2];
4130
0
      to1   = data1[i*2+1];
4131
0
      r = and_code_range1(pbuf, from1, to1, data2, n2);
4132
0
      if (r != 0) return r;
4133
0
    }
4134
0
  }
4135
4136
0
  return 0;
4137
0
}
4138
4139
static int
4140
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4141
0
{
4142
0
  int r, not1, not2;
4143
0
  BBuf *buf1, *buf2, *pbuf;
4144
0
  BitSetRef bsr1, bsr2;
4145
0
  BitSet bs1, bs2;
4146
4147
0
  not1 = IS_NCCLASS_NOT(dest);
4148
0
  bsr1 = dest->bs;
4149
0
  buf1 = dest->mbuf;
4150
0
  not2 = IS_NCCLASS_NOT(cc);
4151
0
  bsr2 = cc->bs;
4152
0
  buf2 = cc->mbuf;
4153
4154
0
  if (not1 != 0) {
4155
0
    bitset_invert_to(bsr1, bs1);
4156
0
    bsr1 = bs1;
4157
0
  }
4158
0
  if (not2 != 0) {
4159
0
    bitset_invert_to(bsr2, bs2);
4160
0
    bsr2 = bs2;
4161
0
  }
4162
0
  bitset_and(bsr1, bsr2);
4163
0
  if (bsr1 != dest->bs) {
4164
0
    bitset_copy(dest->bs, bsr1);
4165
0
  }
4166
0
  if (not1 != 0) {
4167
0
    bitset_invert(dest->bs);
4168
0
  }
4169
4170
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4171
0
    if (not1 != 0 && not2 != 0) {
4172
0
      r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf);
4173
0
    }
4174
0
    else {
4175
0
      r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf);
4176
0
      if (r == 0 && not1 != 0) {
4177
0
        BBuf *tbuf;
4178
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4179
0
        if (r != 0) {
4180
0
          bbuf_free(pbuf);
4181
0
          return r;
4182
0
        }
4183
0
        bbuf_free(pbuf);
4184
0
        pbuf = tbuf;
4185
0
      }
4186
0
    }
4187
0
    if (r != 0) return r;
4188
4189
0
    dest->mbuf = pbuf;
4190
0
    bbuf_free(buf1);
4191
0
    return r;
4192
0
  }
4193
0
  return 0;
4194
0
}
4195
4196
static int
4197
or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
4198
0
{
4199
0
  int r, not1, not2;
4200
0
  BBuf *buf1, *buf2, *pbuf;
4201
0
  BitSetRef bsr1, bsr2;
4202
0
  BitSet bs1, bs2;
4203
4204
0
  not1 = IS_NCCLASS_NOT(dest);
4205
0
  bsr1 = dest->bs;
4206
0
  buf1 = dest->mbuf;
4207
0
  not2 = IS_NCCLASS_NOT(cc);
4208
0
  bsr2 = cc->bs;
4209
0
  buf2 = cc->mbuf;
4210
4211
0
  if (not1 != 0) {
4212
0
    bitset_invert_to(bsr1, bs1);
4213
0
    bsr1 = bs1;
4214
0
  }
4215
0
  if (not2 != 0) {
4216
0
    bitset_invert_to(bsr2, bs2);
4217
0
    bsr2 = bs2;
4218
0
  }
4219
0
  bitset_or(bsr1, bsr2);
4220
0
  if (bsr1 != dest->bs) {
4221
0
    bitset_copy(dest->bs, bsr1);
4222
0
  }
4223
0
  if (not1 != 0) {
4224
0
    bitset_invert(dest->bs);
4225
0
  }
4226
4227
0
  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
4228
0
    if (not1 != 0 && not2 != 0) {
4229
0
      r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf);
4230
0
    }
4231
0
    else {
4232
0
      r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf);
4233
0
      if (r == 0 && not1 != 0) {
4234
0
        BBuf *tbuf;
4235
0
        r = not_code_range_buf(enc, pbuf, &tbuf);
4236
0
        if (r != 0) {
4237
0
          bbuf_free(pbuf);
4238
0
          return r;
4239
0
        }
4240
0
        bbuf_free(pbuf);
4241
0
        pbuf = tbuf;
4242
0
      }
4243
0
    }
4244
0
    if (r != 0) return r;
4245
4246
0
    dest->mbuf = pbuf;
4247
0
    bbuf_free(buf1);
4248
0
    return r;
4249
0
  }
4250
0
  else
4251
0
    return 0;
4252
0
}
4253
4254
static OnigCodePoint
4255
conv_backslash_value(OnigCodePoint c, ParseEnv* env)
4256
566k
{
4257
566k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
4258
566k
    switch (c) {
4259
8.68k
    case 'n': return '\n';
4260
4.09k
    case 't': return '\t';
4261
474
    case 'r': return '\r';
4262
872
    case 'f': return '\f';
4263
298
    case 'a': return '\007';
4264
101
    case 'b': return '\010';
4265
304
    case 'e': return '\033';
4266
81
    case 'v':
4267
81
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
4268
0
        return '\v';
4269
81
      break;
4270
4271
551k
    default:
4272
551k
      break;
4273
566k
    }
4274
566k
  }
4275
551k
  return c;
4276
566k
}
4277
4278
static int
4279
is_invalid_quantifier_target(Node* node)
4280
2.76M
{
4281
2.76M
  switch (ND_TYPE(node)) {
4282
79
  case ND_ANCHOR:
4283
1.31k
  case ND_GIMMICK:
4284
1.31k
    return 1;
4285
0
    break;
4286
4287
547k
  case ND_BAG:
4288
    /* allow enclosed elements */
4289
    /* return is_invalid_quantifier_target(ND_BODY(node)); */
4290
547k
    break;
4291
4292
69.8k
  case ND_LIST:
4293
70.6k
    do {
4294
70.6k
      if (! is_invalid_quantifier_target(ND_CAR(node))) return 0;
4295
70.6k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4296
413
    return 0;
4297
0
    break;
4298
4299
1.08k
  case ND_ALT:
4300
70.4k
    do {
4301
70.4k
      if (is_invalid_quantifier_target(ND_CAR(node))) return 1;
4302
70.4k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4303
1.08k
    break;
4304
4305
2.14M
  default:
4306
2.14M
    break;
4307
2.76M
  }
4308
2.69M
  return 0;
4309
2.76M
}
4310
4311
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
4312
static int
4313
quantifier_type_num(QuantNode* q)
4314
1.64M
{
4315
1.64M
  if (q->greedy) {
4316
1.30M
    if (q->lower == 0) {
4317
1.28M
      if (q->upper == 1) return 0;
4318
1.09M
      else if (IS_INFINITE_REPEAT(q->upper)) return 1;
4319
1.28M
    }
4320
17.8k
    else if (q->lower == 1) {
4321
7.94k
      if (IS_INFINITE_REPEAT(q->upper)) return 2;
4322
7.94k
    }
4323
1.30M
  }
4324
337k
  else {
4325
337k
    if (q->lower == 0) {
4326
333k
      if (q->upper == 1) return 3;
4327
14.5k
      else if (IS_INFINITE_REPEAT(q->upper)) return 4;
4328
333k
    }
4329
4.32k
    else if (q->lower == 1) {
4330
1.71k
      if (IS_INFINITE_REPEAT(q->upper)) return 5;
4331
1.71k
    }
4332
337k
  }
4333
25.5k
  return -1;
4334
1.64M
}
4335
4336
4337
enum ReduceType {
4338
  RQ_ASIS = 0, /* as is */
4339
  RQ_DEL,      /* delete parent */
4340
  RQ_A,        /* to '*'    */
4341
  RQ_P,        /* to '+'    */
4342
  RQ_AQ,       /* to '*?'   */
4343
  RQ_QQ,       /* to '??'   */
4344
  RQ_P_QQ,     /* to '+)??' */
4345
};
4346
4347
static enum ReduceType ReduceTypeTable[6][6] = {
4348
  {RQ_DEL,  RQ_A,    RQ_A,   RQ_QQ,   RQ_AQ,   RQ_ASIS}, /* '?'  */
4349
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},  /* '*'  */
4350
  {RQ_A,    RQ_A,    RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},  /* '+'  */
4351
  {RQ_DEL,  RQ_AQ,   RQ_AQ,  RQ_DEL,  RQ_AQ,   RQ_AQ},   /* '??' */
4352
  {RQ_DEL,  RQ_DEL,  RQ_DEL, RQ_DEL,  RQ_DEL,  RQ_DEL},  /* '*?' */
4353
  {RQ_ASIS, RQ_A,    RQ_P,   RQ_AQ,   RQ_AQ,   RQ_DEL}   /* '+?' */
4354
};
4355
4356
extern int
4357
onig_reduce_nested_quantifier(Node* pnode)
4358
407k
{
4359
407k
  int pnum, cnum;
4360
407k
  QuantNode *p, *c;
4361
407k
  Node* cnode;
4362
4363
407k
  cnode = ND_BODY(pnode);
4364
4365
407k
  p = QUANT_(pnode);
4366
407k
  c = QUANT_(cnode);
4367
407k
  pnum = quantifier_type_num(p);
4368
407k
  cnum = quantifier_type_num(c);
4369
407k
  if (pnum < 0 || cnum < 0) {
4370
7.19k
    if (p->lower == p->upper && c->lower == c->upper) {
4371
2.24k
      int n = onig_positive_int_multiply(p->lower, c->lower);
4372
2.24k
      if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4373
4374
2.24k
      p->lower = p->upper = n;
4375
2.24k
      ND_BODY(pnode) = ND_BODY(cnode);
4376
2.24k
      goto remove_cnode;
4377
2.24k
    }
4378
4379
4.95k
    return 0;
4380
7.19k
  }
4381
4382
400k
  switch(ReduceTypeTable[cnum][pnum]) {
4383
305k
  case RQ_DEL:
4384
305k
    *pnode = *cnode;
4385
305k
    goto remove_cnode;
4386
0
    break;
4387
90.8k
  case RQ_A:
4388
90.8k
    ND_BODY(pnode) = ND_BODY(cnode);
4389
90.8k
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4390
90.8k
    goto remove_cnode;
4391
0
    break;
4392
441
  case RQ_P:
4393
441
    ND_BODY(pnode) = ND_BODY(cnode);
4394
441
    p->lower  = 1;  p->upper = INFINITE_REPEAT;  p->greedy = 1;
4395
441
    goto remove_cnode;
4396
0
    break;
4397
2.28k
  case RQ_AQ:
4398
2.28k
    ND_BODY(pnode) = ND_BODY(cnode);
4399
2.28k
    p->lower  = 0;  p->upper = INFINITE_REPEAT;  p->greedy = 0;
4400
2.28k
    goto remove_cnode;
4401
0
    break;
4402
3
  case RQ_QQ:
4403
3
    ND_BODY(pnode) = ND_BODY(cnode);
4404
3
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4405
3
    goto remove_cnode;
4406
0
    break;
4407
1.06k
  case RQ_P_QQ:
4408
1.06k
    p->lower  = 0;  p->upper = 1;  p->greedy = 0;
4409
1.06k
    c->lower  = 1;  c->upper = INFINITE_REPEAT;  c->greedy = 1;
4410
1.06k
    break;
4411
53
  case RQ_ASIS:
4412
53
    break;
4413
400k
  }
4414
4415
1.12k
  return 0;
4416
4417
401k
 remove_cnode:
4418
401k
  ND_BODY(cnode) = NULL_NODE;
4419
401k
  onig_node_free(cnode);
4420
401k
  return 0;
4421
400k
}
4422
4423
static int
4424
node_new_general_newline(Node** node, ParseEnv* env)
4425
641
{
4426
641
  int r;
4427
641
  int dlen, alen;
4428
641
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
4429
641
  Node* crnl;
4430
641
  Node* ncc;
4431
641
  Node* x;
4432
641
  CClassNode* cc;
4433
4434
641
  dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf);
4435
641
  if (dlen < 0) return dlen;
4436
641
  alen = ONIGENC_CODE_TO_MBC(env->enc, NEWLINE_CODE, buf + dlen);
4437
641
  if (alen < 0) return alen;
4438
4439
641
  crnl = node_new_str_crude(buf, buf + dlen + alen, ONIG_OPTION_NONE);
4440
641
  CHECK_NULL_RETURN_MEMERR(crnl);
4441
4442
641
  ncc = node_new_cclass();
4443
641
  if (IS_NULL(ncc)) goto err2;
4444
4445
641
  cc = CCLASS_(ncc);
4446
641
  if (dlen == 1) {
4447
641
    bitset_set_range(cc->bs, NEWLINE_CODE, 0x0d);
4448
641
  }
4449
0
  else {
4450
0
    r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, 0x0d);
4451
0
    if (r != 0) {
4452
0
    err1:
4453
0
      onig_node_free(ncc);
4454
0
    err2:
4455
0
      onig_node_free(crnl);
4456
0
      return ONIGERR_MEMORY;
4457
0
    }
4458
0
  }
4459
4460
641
  if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) {
4461
641
    r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
4462
641
    if (r != 0) goto err1;
4463
641
    r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
4464
641
    if (r != 0) goto err1;
4465
641
  }
4466
4467
641
  x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
4468
641
  if (IS_NULL(x)) goto err1;
4469
4470
641
  *node = x;
4471
641
  return 0;
4472
641
}
4473
4474
enum TokenSyms {
4475
  TK_EOT      = 0,   /* end of token */
4476
  TK_CRUDE_BYTE,
4477
  TK_CHAR,
4478
  TK_STRING,
4479
  TK_CODE_POINT,
4480
  TK_ANYCHAR,
4481
  TK_CHAR_TYPE,
4482
  TK_BACKREF,
4483
  TK_CALL,
4484
  TK_ANCHOR,
4485
  TK_REPEAT,
4486
  TK_INTERVAL,
4487
  TK_ANYCHAR_ANYTIME,  /* SQL '%' == .* */
4488
  TK_ALT,
4489
  TK_SUBEXP_OPEN,
4490
  TK_SUBEXP_CLOSE,
4491
  TK_OPEN_CC,
4492
  TK_QUOTE_OPEN,
4493
  TK_CHAR_PROPERTY,    /* \p{...}, \P{...} */
4494
  TK_KEEP,             /* \K */
4495
  TK_GENERAL_NEWLINE,  /* \R */
4496
  TK_NO_NEWLINE,       /* \N */
4497
  TK_TRUE_ANYCHAR,     /* \O */
4498
  TK_TEXT_SEGMENT,     /* \X */
4499
4500
  /* in cc */
4501
  TK_CC_CLOSE,
4502
  TK_CC_RANGE,
4503
  TK_CC_POSIX_BRACKET_OPEN,
4504
  TK_CC_AND,           /* && */
4505
  TK_CC_OPEN_CC        /* [ */
4506
};
4507
4508
typedef struct {
4509
  enum TokenSyms type;
4510
  int code_point_continue;
4511
  int escaped;
4512
  int base_num;   /* is number: 8, 16 (used in [....]) */
4513
  UChar* backp;
4514
  union {
4515
    UChar* s;
4516
    UChar byte;
4517
    OnigCodePoint code;
4518
    int   anchor;
4519
    int   subtype;
4520
    struct {
4521
      int lower;
4522
      int upper;
4523
      int greedy;
4524
      int possessive;
4525
    } repeat;
4526
    struct {
4527
      int  num;
4528
      int  ref1;
4529
      int* refs;
4530
      int  by_name;
4531
#ifdef USE_BACKREF_WITH_LEVEL
4532
      int  exist_level;
4533
      int  level;   /* \k<name+n> */
4534
#endif
4535
    } backref;
4536
    struct {
4537
      UChar* name;
4538
      UChar* name_end;
4539
      int    gnum;
4540
      int    by_number;
4541
    } call;
4542
    struct {
4543
      int ctype;
4544
      int not;
4545
    } prop;
4546
  } u;
4547
} PToken;
4548
4549
static void
4550
ptoken_init(PToken* tok)
4551
1.37M
{
4552
1.37M
  tok->code_point_continue = 0;
4553
1.37M
}
4554
4555
static int
4556
fetch_interval(UChar** src, UChar* end, PToken* tok, ParseEnv* env)
4557
946k
{
4558
946k
  int low, up, syn_allow, non_low;
4559
946k
  int r;
4560
946k
  OnigCodePoint c;
4561
946k
  OnigEncoding enc;
4562
946k
  UChar* p;
4563
946k
  PFETCH_READY;
4564
4565
946k
  p = *src;
4566
946k
  r = 0;
4567
946k
  non_low = 0;
4568
946k
  enc = env->enc;
4569
946k
  syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
4570
4571
946k
  if (PEND) {
4572
25.9k
    if (syn_allow)
4573
25.9k
      return 1;  /* "....{" : OK! */
4574
0
    else
4575
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;  /* "....{" syntax error */
4576
25.9k
  }
4577
4578
921k
  if (! syn_allow) {
4579
0
    c = PPEEK;
4580
0
    if (c == ')' || c == '(' || c == '|') {
4581
0
      return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
4582
0
    }
4583
0
  }
4584
4585
921k
  low = scan_number(&p, end, env->enc);
4586
921k
  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4587
921k
  if (low > ONIG_MAX_REPEAT_NUM)
4588
25
    return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4589
4590
921k
  if (p == *src) { /* can't read low */
4591
866k
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
4592
      /* allow {,n} as {0,n} */
4593
0
      low = 0;
4594
0
      non_low = 1;
4595
0
    }
4596
866k
    else
4597
866k
      goto invalid;
4598
866k
  }
4599
4600
54.5k
  if (PEND) goto invalid;
4601
52.2k
  PFETCH(c);
4602
52.2k
  if (c == ',') {
4603
9.43k
    UChar* prev = p;
4604
9.43k
    up = scan_number(&p, end, env->enc);
4605
9.43k
    if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4606
9.42k
    if (up > ONIG_MAX_REPEAT_NUM)
4607
20
      return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
4608
4609
9.40k
    if (p == prev) {
4610
3.58k
      if (non_low != 0)
4611
0
        goto invalid;
4612
3.58k
      up = INFINITE_REPEAT;  /* {n,} : {n,infinite} */
4613
3.58k
    }
4614
9.40k
  }
4615
42.8k
  else {
4616
42.8k
    if (non_low != 0)
4617
0
      goto invalid;
4618
4619
42.8k
    PUNFETCH;
4620
42.8k
    up = low;  /* {n} : exact n times */
4621
42.8k
    r = 2;     /* fixed */
4622
42.8k
  }
4623
4624
52.2k
  if (PEND) goto invalid;
4625
51.1k
  PFETCH(c);
4626
51.1k
  if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
4627
0
    if (c != MC_ESC(env->syntax) || PEND) goto invalid;
4628
0
    PFETCH(c);
4629
0
  }
4630
51.1k
  if (c != '}') goto invalid;
4631
4632
20.6k
  if (!IS_INFINITE_REPEAT(up) && low > up) {
4633
    /* {n,m}+ supported case */
4634
1
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
4635
1
      return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
4636
4637
0
    tok->u.repeat.possessive = 1;
4638
0
    {
4639
0
      int tmp;
4640
0
      tmp = low; low = up; up = tmp;
4641
0
    }
4642
0
  }
4643
20.6k
  else
4644
20.6k
    tok->u.repeat.possessive = 0;
4645
4646
20.6k
  tok->type = TK_INTERVAL;
4647
20.6k
  tok->u.repeat.lower = low;
4648
20.6k
  tok->u.repeat.upper = up;
4649
20.6k
  *src = p;
4650
20.6k
  return r; /* 0: normal {n,m}, 2: fixed {n} */
4651
4652
900k
 invalid:
4653
900k
  if (syn_allow) {
4654
    /* *src = p; */ /* !!! Don't do this line !!! */
4655
900k
    return 1;  /* OK */
4656
900k
  }
4657
0
  else
4658
0
    return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
4659
900k
}
4660
4661
/* \M-, \C-, \c, or \... */
4662
static int
4663
fetch_escaped_value_raw(UChar** src, UChar* end, ParseEnv* env,
4664
                        OnigCodePoint* val)
4665
570k
{
4666
570k
  int v;
4667
570k
  OnigCodePoint c;
4668
570k
  OnigEncoding enc = env->enc;
4669
570k
  UChar* p = *src;
4670
4671
570k
  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
4672
4673
570k
  PFETCH_S(c);
4674
570k
  switch (c) {
4675
80
  case 'M':
4676
80
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
4677
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4678
0
      PFETCH_S(c);
4679
0
      if (c != '-') return ONIGERR_META_CODE_SYNTAX;
4680
0
      if (PEND) return ONIGERR_END_PATTERN_AT_META;
4681
0
      PFETCH_S(c);
4682
0
      if (c == MC_ESC(env->syntax)) {
4683
0
        v = fetch_escaped_value_raw(&p, end, env, &c);
4684
0
        if (v < 0) return v;
4685
0
      }
4686
0
      c = ((c & 0xff) | 0x80);
4687
0
    }
4688
80
    else
4689
80
      goto backslash;
4690
0
    break;
4691
4692
513
  case 'C':
4693
513
    if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
4694
0
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4695
0
      PFETCH_S(c);
4696
0
      if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
4697
0
      goto control;
4698
0
    }
4699
513
    else
4700
513
      goto backslash;
4701
4702
4.03k
  case 'c':
4703
4.03k
    if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
4704
4.03k
    control:
4705
4.03k
      if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
4706
4.00k
      PFETCH_S(c);
4707
4.00k
      if (c == '?') {
4708
10
        c = 0177;
4709
10
      }
4710
3.99k
      else {
4711
3.99k
        if (c == MC_ESC(env->syntax)) {
4712
44
          v = fetch_escaped_value_raw(&p, end, env, &c);
4713
44
          if (v < 0) return v;
4714
44
        }
4715
3.99k
        c &= 0x9f;
4716
3.99k
      }
4717
4.00k
      break;
4718
4.00k
    }
4719
    /* fall through */
4720
4721
566k
  default:
4722
566k
    {
4723
566k
    backslash:
4724
566k
      c = conv_backslash_value(c, env);
4725
566k
    }
4726
566k
    break;
4727
570k
  }
4728
4729
570k
  *src = p;
4730
570k
  *val = c;
4731
570k
  return 0;
4732
570k
}
4733
4734
static int
4735
fetch_escaped_value(UChar** src, UChar* end, ParseEnv* env, OnigCodePoint* val)
4736
570k
{
4737
570k
  int r;
4738
570k
  int len;
4739
4740
570k
  r = fetch_escaped_value_raw(src, end, env, val);
4741
570k
  if (r != 0) return r;
4742
4743
570k
  len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
4744
570k
  if (len < 0) return len;
4745
4746
570k
  return 0;
4747
570k
}
4748
4749
static int fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env);
4750
4751
static OnigCodePoint
4752
get_name_end_code_point(OnigCodePoint start)
4753
36.1k
{
4754
36.1k
  switch (start) {
4755
197
  case '<':  return (OnigCodePoint )'>';  break;
4756
3.01k
  case '\'': return (OnigCodePoint )'\''; break;
4757
32.9k
  case '(':  return (OnigCodePoint )')';  break;
4758
0
  default:
4759
0
    break;
4760
36.1k
  }
4761
4762
0
  return (OnigCodePoint )0;
4763
36.1k
}
4764
4765
enum REF_NUM {
4766
  IS_NOT_NUM = 0,
4767
  IS_ABS_NUM = 1,
4768
  IS_REL_NUM = 2
4769
};
4770
4771
#ifdef USE_BACKREF_WITH_LEVEL
4772
/*
4773
   \k<name+n>, \k<name-n>
4774
   \k<num+n>,  \k<num-n>
4775
   \k<-num+n>, \k<-num-n>
4776
   \k<+num+n>, \k<+num-n>
4777
*/
4778
static int
4779
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
4780
                      UChar** rname_end, ParseEnv* env,
4781
                      int* rback_num, int* rlevel, enum REF_NUM* num_type)
4782
12.3k
{
4783
12.3k
  int r, sign, exist_level;
4784
12.3k
  int digit_count;
4785
12.3k
  OnigCodePoint end_code;
4786
12.3k
  OnigCodePoint c;
4787
12.3k
  OnigEncoding enc;
4788
12.3k
  UChar *name_end;
4789
12.3k
  UChar *pnum_head;
4790
12.3k
  UChar *p;
4791
12.3k
  PFETCH_READY;
4792
4793
12.3k
  p = *src;
4794
12.3k
  c = 0;
4795
12.3k
  enc = env->enc;
4796
12.3k
  *rback_num = 0;
4797
12.3k
  exist_level = 0;
4798
12.3k
  *num_type = IS_NOT_NUM;
4799
12.3k
  sign = 1;
4800
12.3k
  pnum_head = *src;
4801
4802
12.3k
  end_code = get_name_end_code_point(start_code);
4803
4804
12.3k
  digit_count = 0;
4805
12.3k
  name_end = end;
4806
12.3k
  r = 0;
4807
12.3k
  if (PEND) {
4808
0
    return ONIGERR_EMPTY_GROUP_NAME;
4809
0
  }
4810
12.3k
  else {
4811
12.3k
    PFETCH(c);
4812
12.3k
    if (c == end_code)
4813
0
      return ONIGERR_EMPTY_GROUP_NAME;
4814
4815
12.3k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4816
12.2k
      *num_type = IS_ABS_NUM;
4817
12.2k
      digit_count++;
4818
12.2k
    }
4819
128
    else if (c == '-') {
4820
103
      *num_type = IS_REL_NUM;
4821
103
      sign = -1;
4822
103
      pnum_head = p;
4823
103
    }
4824
25
    else if (c == '+') {
4825
2
      *num_type = IS_REL_NUM;
4826
2
      sign = 1;
4827
2
      pnum_head = p;
4828
2
    }
4829
23
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4830
21
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4831
21
    }
4832
12.3k
  }
4833
4834
2.42M
  while (!PEND) {
4835
2.42M
    name_end = p;
4836
2.42M
    PFETCH(c);
4837
2.42M
    if (c == end_code || c == ')' || c == '+' || c == '-') {
4838
8.21k
      if (*num_type != IS_NOT_NUM && digit_count == 0)
4839
18
        r = ONIGERR_INVALID_GROUP_NAME;
4840
8.21k
      break;
4841
8.21k
    }
4842
4843
2.41M
    if (*num_type != IS_NOT_NUM) {
4844
96.9k
      if (IS_CODE_DIGIT_ASCII(enc, c)) {
4845
89.1k
        digit_count++;
4846
89.1k
      }
4847
7.78k
      else {
4848
7.78k
        r = ONIGERR_INVALID_GROUP_NAME;
4849
7.78k
        *num_type = IS_NOT_NUM;
4850
7.78k
      }
4851
96.9k
    }
4852
2.32M
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4853
1.59M
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4854
1.59M
    }
4855
2.41M
  }
4856
4857
12.3k
  if (r == 0 && c != end_code) {
4858
1.26k
    if (c == '+' || c == '-') {
4859
687
      int level;
4860
687
      int flag = (c == '-' ? -1 : 1);
4861
4862
687
      if (PEND) {
4863
26
        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4864
26
        goto end;
4865
26
      }
4866
661
      PFETCH(c);
4867
661
      if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
4868
227
      PUNFETCH;
4869
227
      level = scan_number(&p, end, enc);
4870
227
      if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
4871
132
      *rlevel = (level * flag);
4872
132
      exist_level = 1;
4873
4874
132
      if (!PEND) {
4875
88
        PFETCH(c);
4876
88
        if (c == end_code)
4877
26
          goto end;
4878
88
      }
4879
132
    }
4880
4881
1.11k
  err:
4882
1.11k
    name_end = end;
4883
1.11k
  err2:
4884
1.11k
    r = ONIGERR_INVALID_GROUP_NAME;
4885
1.11k
  }
4886
4887
12.2k
 end:
4888
12.2k
  if (r == 0) {
4889
3.30k
    if (*num_type != IS_NOT_NUM) {
4890
3.30k
      *rback_num = scan_number(&pnum_head, name_end, enc);
4891
3.30k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
4892
3.17k
      else if (*rback_num == 0) {
4893
5
        if (*num_type == IS_REL_NUM)
4894
0
          goto err2;
4895
5
      }
4896
4897
3.17k
      *rback_num *= sign;
4898
3.17k
    }
4899
4900
3.17k
    *rname_end = name_end;
4901
3.17k
    *src = p;
4902
3.17k
    return (exist_level ? 1 : 0);
4903
3.30k
  }
4904
8.96k
  else {
4905
8.96k
    onig_scan_env_set_error_string(env, r, *src, name_end);
4906
8.96k
    return r;
4907
8.96k
  }
4908
12.2k
}
4909
#endif /* USE_BACKREF_WITH_LEVEL */
4910
4911
/*
4912
  ref: 0 -> define name    (don't allow number name)
4913
       1 -> reference name (allow number name)
4914
*/
4915
static int
4916
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
4917
           UChar** rname_end, ParseEnv* env, int* rback_num,
4918
           enum REF_NUM* num_type, int is_ref)
4919
23.8k
{
4920
23.8k
  int r, sign;
4921
23.8k
  int digit_count;
4922
23.8k
  OnigCodePoint end_code;
4923
23.8k
  OnigCodePoint c = 0;
4924
23.8k
  OnigEncoding enc = env->enc;
4925
23.8k
  UChar *name_end;
4926
23.8k
  UChar *pnum_head;
4927
23.8k
  UChar *p = *src;
4928
4929
23.8k
  *rback_num = 0;
4930
4931
23.8k
  end_code = get_name_end_code_point(start_code);
4932
4933
23.8k
  digit_count = 0;
4934
23.8k
  name_end = end;
4935
23.8k
  pnum_head = *src;
4936
23.8k
  r = 0;
4937
23.8k
  *num_type = IS_NOT_NUM;
4938
23.8k
  sign = 1;
4939
23.8k
  if (PEND) {
4940
84
    return ONIGERR_EMPTY_GROUP_NAME;
4941
84
  }
4942
23.7k
  else {
4943
23.7k
    PFETCH_S(c);
4944
23.7k
    if (c == end_code)
4945
1
      return ONIGERR_EMPTY_GROUP_NAME;
4946
4947
23.7k
    if (IS_CODE_DIGIT_ASCII(enc, c)) {
4948
19.9k
      if (is_ref == TRUE)
4949
19.9k
        *num_type = IS_ABS_NUM;
4950
23
      else {
4951
23
        r = ONIGERR_INVALID_GROUP_NAME;
4952
23
      }
4953
19.9k
      digit_count++;
4954
19.9k
    }
4955
3.79k
    else if (c == '-') {
4956
358
      if (is_ref == TRUE) {
4957
357
        *num_type = IS_REL_NUM;
4958
357
        sign = -1;
4959
357
        pnum_head = p;
4960
357
      }
4961
1
      else {
4962
1
        r = ONIGERR_INVALID_GROUP_NAME;
4963
1
      }
4964
358
    }
4965
3.43k
    else if (c == '+') {
4966
328
      if (is_ref == TRUE) {
4967
314
        *num_type = IS_REL_NUM;
4968
314
        sign = 1;
4969
314
        pnum_head = p;
4970
314
      }
4971
14
      else {
4972
14
        r = ONIGERR_INVALID_GROUP_NAME;
4973
14
      }
4974
328
    }
4975
3.10k
    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
4976
108
      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4977
108
    }
4978
23.7k
  }
4979
4980
23.7k
  if (r == 0) {
4981
1.30M
    while (!PEND) {
4982
1.30M
      name_end = p;
4983
1.30M
      PFETCH_S(c);
4984
1.30M
      if (c == end_code || c == ')') {
4985
22.8k
        if (*num_type != IS_NOT_NUM && digit_count == 0)
4986
0
          r = ONIGERR_INVALID_GROUP_NAME;
4987
22.8k
        break;
4988
22.8k
      }
4989
4990
1.27M
      if (*num_type != IS_NOT_NUM) {
4991
1.70k
        if (IS_CODE_DIGIT_ASCII(enc, c)) {
4992
1.51k
          digit_count++;
4993
1.51k
        }
4994
183
        else {
4995
183
          if (!ONIGENC_IS_CODE_WORD(enc, c))
4996
158
            r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
4997
25
          else
4998
25
            r = ONIGERR_INVALID_GROUP_NAME;
4999
5000
183
          *num_type = IS_NOT_NUM;
5001
183
        }
5002
1.70k
      }
5003
1.27M
      else {
5004
1.27M
        if (!ONIGENC_IS_CODE_WORD(enc, c)) {
5005
735k
          r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
5006
735k
        }
5007
1.27M
      }
5008
1.27M
    }
5009
5010
23.5k
    if (c != end_code) {
5011
793
      r = ONIGERR_INVALID_GROUP_NAME;
5012
793
      goto err;
5013
793
    }
5014
5015
22.7k
    if (*num_type != IS_NOT_NUM) {
5016
19.9k
      *rback_num = scan_number(&pnum_head, name_end, enc);
5017
19.9k
      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
5018
19.9k
      else if (*rback_num == 0) {
5019
4.20k
        if (*num_type == IS_REL_NUM) {
5020
174
          r = ONIGERR_INVALID_GROUP_NAME;
5021
174
          goto err;
5022
174
        }
5023
4.20k
      }
5024
5025
19.7k
      *rback_num *= sign;
5026
19.7k
    }
5027
5028
22.6k
    *rname_end = name_end;
5029
22.6k
    *src = p;
5030
22.6k
    return 0;
5031
22.7k
  }
5032
146
  else {
5033
5.25k
    while (!PEND) {
5034
5.12k
      name_end = p;
5035
5.12k
      PFETCH_S(c);
5036
5.12k
      if (c == end_code || c == ')')
5037
15
        break;
5038
5.12k
    }
5039
146
    if (PEND)
5040
133
      name_end = end;
5041
5042
1.11k
  err:
5043
1.11k
    onig_scan_env_set_error_string(env, r, *src, name_end);
5044
1.11k
    return r;
5045
146
  }
5046
23.7k
}
5047
5048
static void
5049
CC_ESC_WARN(ParseEnv* env, UChar *c)
5050
343k
{
5051
343k
  if (onig_warn == onig_null_warn) return ;
5052
5053
0
  if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
5054
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
5055
0
    UChar buf[WARN_BUFSIZE];
5056
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5057
0
                               env->pattern, env->pattern_end,
5058
0
                               (UChar* )"character class has '%s' without escape",
5059
0
                               c);
5060
0
    (*onig_warn)((char* )buf);
5061
0
  }
5062
0
}
5063
5064
static void
5065
CLOSE_BRACKET_WITHOUT_ESC_WARN(ParseEnv* env, UChar* c)
5066
666k
{
5067
666k
  if (onig_warn == onig_null_warn) return ;
5068
5069
0
  if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
5070
0
    UChar buf[WARN_BUFSIZE];
5071
0
    onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
5072
0
                         (env)->pattern, (env)->pattern_end,
5073
0
                         (UChar* )"regular expression has '%s' without escape", c);
5074
0
    (*onig_warn)((char* )buf);
5075
0
  }
5076
0
}
5077
5078
static UChar*
5079
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
5080
                  UChar **next, OnigEncoding enc)
5081
116
{
5082
116
  int i;
5083
116
  OnigCodePoint x;
5084
116
  UChar *q;
5085
116
  UChar *p = from;
5086
5087
1.07M
  while (p < to) {
5088
1.07M
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5089
1.07M
    q = p + enclen(enc, p);
5090
1.07M
    if (x == s[0]) {
5091
2.11k
      for (i = 1; i < n && q < to; i++) {
5092
2.06k
        x = ONIGENC_MBC_TO_CODE(enc, q, to);
5093
2.06k
        if (x != s[i]) break;
5094
0
        q += enclen(enc, q);
5095
0
      }
5096
2.11k
      if (i >= n) {
5097
0
        if (IS_NOT_NULL(next))
5098
0
          *next = q;
5099
0
        return p;
5100
0
      }
5101
2.11k
    }
5102
1.07M
    p = q;
5103
1.07M
  }
5104
116
  return NULL_UCHARP;
5105
116
}
5106
5107
static int
5108
is_head_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5109
0
{
5110
0
  UChar* start;
5111
0
  OnigCodePoint code;
5112
5113
0
  start = env->pattern;
5114
0
  if (p > start) {
5115
0
    p = onigenc_get_prev_char_head(enc, start, p);
5116
0
    if (p > start) {
5117
0
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5118
0
      if (code == '(' ||
5119
0
          (code == '|' &&
5120
0
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT))) {
5121
0
        p = onigenc_get_prev_char_head(enc, start, p);
5122
0
        code = ONIGENC_MBC_TO_CODE(enc, p, end);
5123
0
        if (IS_MC_ESC_CODE(code, env->syntax)) {
5124
0
          int count = 0;
5125
0
          while (p > start) {
5126
0
            p = onigenc_get_prev_char_head(enc, start, p);
5127
0
            code = ONIGENC_MBC_TO_CODE(enc, p, end);
5128
0
            if (! IS_MC_ESC_CODE(code, env->syntax)) break;
5129
0
            count++;
5130
0
          }
5131
0
          return (count % 2 == 0);
5132
0
        }
5133
0
      }
5134
0
    }
5135
0
    return FALSE;
5136
0
  }
5137
0
  else {
5138
0
    return TRUE;
5139
0
  }
5140
0
}
5141
5142
static int
5143
is_end_of_bre_subexp(UChar* p, UChar* end, OnigEncoding enc, ParseEnv* env)
5144
0
{
5145
0
  OnigCodePoint code;
5146
5147
0
  if (p == end) return TRUE;
5148
5149
0
  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5150
0
  if (IS_MC_ESC_CODE(code, env->syntax)) {
5151
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
5152
0
    if (p < end) {
5153
0
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5154
0
      if (code == ')' ||
5155
0
          (code == '|' &&
5156
0
           IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_VBAR_ALT)))
5157
0
        return TRUE;
5158
0
    }
5159
0
  }
5160
5161
0
  return FALSE;
5162
0
}
5163
5164
static int
5165
is_posix_bracket_start(UChar* from, UChar* to, OnigEncoding enc)
5166
125k
{
5167
125k
  int n;
5168
125k
  OnigCodePoint x;
5169
125k
  UChar *p;
5170
5171
125k
  n = 0;
5172
125k
  p = from;
5173
549k
  while (p < to) {
5174
549k
    x = ONIGENC_MBC_TO_CODE(enc, p, to);
5175
549k
    p += enclen(enc, p);
5176
549k
    if (x == ':') {
5177
9.78k
      if (p < to) {
5178
9.74k
        x = ONIGENC_MBC_TO_CODE(enc, p, to);
5179
9.74k
        if (x == ']') {
5180
94
          if (n == 0) return FALSE;
5181
1
          else        return TRUE;
5182
94
        }
5183
9.74k
      }
5184
5185
9.68k
      return FALSE;
5186
9.78k
    }
5187
539k
    else if (x == '^' && n == 0) {
5188
18
      ;
5189
18
    }
5190
539k
    else if (! ONIGENC_IS_CODE_ALPHA(enc, x)) {
5191
115k
      break;
5192
115k
    }
5193
5194
424k
    n += 1;
5195
424k
  }
5196
5197
115k
  return FALSE;
5198
125k
}
5199
5200
static int
5201
fetch_token_cc(PToken* tok, UChar** src, UChar* end, ParseEnv* env, int state)
5202
5.15M
{
5203
5.15M
  int r;
5204
5.15M
  OnigCodePoint code;
5205
5.15M
  OnigCodePoint c, c2;
5206
5.15M
  int mindigits, maxdigits;
5207
5.15M
  OnigSyntaxType* syn;
5208
5.15M
  OnigEncoding enc;
5209
5.15M
  UChar* prev;
5210
5.15M
  UChar* p;
5211
5.15M
  PFETCH_READY;
5212
5213
5.15M
  p = *src;
5214
5.15M
  enc = env->enc;
5215
5.15M
  syn = env->syntax;
5216
5.15M
  if (tok->code_point_continue != 0) {
5217
0
    r = get_next_code_point(&p, end, tok->base_num, enc, TRUE, &code);
5218
0
    if (r == 1) {
5219
0
      tok->code_point_continue = 0;
5220
0
    }
5221
0
    else if (r == 2) {
5222
0
      tok->type = TK_CC_RANGE;
5223
0
      goto end;
5224
0
    }
5225
0
    else if (r == 0) {
5226
0
      tok->type   = TK_CODE_POINT;
5227
0
      tok->u.code = code;
5228
0
      goto end;
5229
0
    }
5230
0
    else
5231
0
      return r; /* error */
5232
0
  }
5233
5234
5.15M
  if (PEND) {
5235
29.4k
    tok->type = TK_EOT;
5236
29.4k
    return tok->type;
5237
29.4k
  }
5238
5239
5.12M
  PFETCH(c);
5240
5.12M
  tok->type = TK_CHAR;
5241
5.12M
  tok->base_num = 0;
5242
5.12M
  tok->u.code   = c;
5243
5.12M
  tok->escaped  = 0;
5244
5245
5.12M
  if (c == ']') {
5246
162k
    tok->type = TK_CC_CLOSE;
5247
162k
  }
5248
4.96M
  else if (c == '-') {
5249
55.5k
    tok->type = TK_CC_RANGE;
5250
55.5k
  }
5251
4.90M
  else if (c == MC_ESC(syn)) {
5252
46.7k
    if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
5253
0
      goto end;
5254
5255
46.7k
    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5256
5257
46.5k
    PFETCH(c);
5258
46.5k
    tok->escaped = 1;
5259
46.5k
    tok->u.code = c;
5260
46.5k
    switch (c) {
5261
752
    case 'w':
5262
752
      tok->type = TK_CHAR_TYPE;
5263
752
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5264
752
      tok->u.prop.not   = 0;
5265
752
      break;
5266
14
    case 'W':
5267
14
      tok->type = TK_CHAR_TYPE;
5268
14
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5269
14
      tok->u.prop.not   = 1;
5270
14
      break;
5271
71
    case 'd':
5272
71
      tok->type = TK_CHAR_TYPE;
5273
71
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5274
71
      tok->u.prop.not   = 0;
5275
71
      break;
5276
150
    case 'D':
5277
150
      tok->type = TK_CHAR_TYPE;
5278
150
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5279
150
      tok->u.prop.not   = 1;
5280
150
      break;
5281
1.27k
    case 's':
5282
1.27k
      tok->type = TK_CHAR_TYPE;
5283
1.27k
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5284
1.27k
      tok->u.prop.not   = 0;
5285
1.27k
      break;
5286
12
    case 'S':
5287
12
      tok->type = TK_CHAR_TYPE;
5288
12
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5289
12
      tok->u.prop.not   = 1;
5290
12
      break;
5291
22
    case 'h':
5292
22
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5293
0
      tok->type = TK_CHAR_TYPE;
5294
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5295
0
      tok->u.prop.not   = 0;
5296
0
      break;
5297
154
    case 'H':
5298
154
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5299
0
      tok->type = TK_CHAR_TYPE;
5300
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5301
0
      tok->u.prop.not   = 1;
5302
0
      break;
5303
5304
13
    case 'p':
5305
436
    case 'P':
5306
436
      if (PEND) break;
5307
5308
436
      c2 = PPEEK;
5309
436
      if (c2 == '{' &&
5310
436
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
5311
0
        PINC;
5312
0
        tok->type = TK_CHAR_PROPERTY;
5313
0
        tok->u.prop.not = c == 'P';
5314
5315
0
        if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
5316
0
          PFETCH(c2);
5317
0
          if (c2 == '^') {
5318
0
            tok->u.prop.not = tok->u.prop.not == 0;
5319
0
          }
5320
0
          else
5321
0
            PUNFETCH;
5322
0
        }
5323
0
      }
5324
436
      break;
5325
5326
361
    case 'o':
5327
361
      if (PEND) break;
5328
5329
361
      prev = p;
5330
361
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5331
8
        PINC;
5332
8
        r = scan_octal_number(&p, end, 0, 11, enc, &code);
5333
8
        if (r < 0) return r;
5334
8
        if (!PEND) {
5335
8
          c2 = PPEEK;
5336
8
          if (IS_CODE_DIGIT_ASCII(enc, c2))
5337
0
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5338
8
        }
5339
5340
8
        tok->base_num = 8;
5341
8
        goto brace_code_point_entry;
5342
8
      }
5343
353
      break;
5344
5345
353
    case 'x':
5346
100
      if (PEND) break;
5347
5348
100
      prev = p;
5349
100
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5350
0
        PINC;
5351
0
        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5352
0
        if (r < 0) return r;
5353
0
        if (!PEND) {
5354
0
          c2 = PPEEK;
5355
0
          if (IS_CODE_XDIGIT_ASCII(enc, c2))
5356
0
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5357
0
        }
5358
5359
0
        tok->base_num = 16;
5360
8
      brace_code_point_entry:
5361
8
        if ((p > prev + enclen(enc, prev))) {
5362
0
          if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5363
0
          if (PPEEK_IS('}')) {
5364
0
            PINC;
5365
0
          }
5366
0
          else {
5367
0
            int curr_state;
5368
5369
0
            curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
5370
0
            r = check_code_point_sequence_cc(p, end, tok->base_num, enc,
5371
0
                                             curr_state);
5372
0
            if (r < 0) return r;
5373
0
            if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5374
0
            tok->code_point_continue = TRUE;
5375
0
          }
5376
0
          tok->type   = TK_CODE_POINT;
5377
0
          tok->u.code = code;
5378
0
        }
5379
8
        else {
5380
          /* can't read nothing or invalid format */
5381
8
          p = prev;
5382
8
        }
5383
8
      }
5384
100
      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5385
100
        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5386
100
        if (r < 0) return r;
5387
100
        if (p == prev) {  /* can't read nothing. */
5388
77
          code = 0; /* but, it's not error */
5389
77
        }
5390
100
        tok->type = TK_CRUDE_BYTE;
5391
100
        tok->base_num = 16;
5392
100
        tok->u.byte   = (UChar )code;
5393
100
      }
5394
108
      break;
5395
5396
752
    case 'u':
5397
752
      if (PEND) break;
5398
752
      prev = p;
5399
752
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5400
0
        mindigits = maxdigits = 4;
5401
0
      u_hex_digits:
5402
0
        r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5403
0
        if (r < 0) return r;
5404
0
        if (p == prev) {  /* can't read nothing. */
5405
0
          code = 0; /* but, it's not error */
5406
0
        }
5407
0
        tok->type = TK_CODE_POINT;
5408
0
        tok->base_num = 16;
5409
0
        tok->u.code   = code;
5410
0
      }
5411
752
      break;
5412
5413
752
    case 'U':
5414
99
      if (PEND) break;
5415
99
      prev = p;
5416
99
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5417
0
        mindigits = maxdigits = 8;
5418
0
        goto u_hex_digits;
5419
0
      }
5420
99
      break;
5421
5422
99
    case '0':
5423
1.45k
    case '1': case '2': case '3': case '4': case '5': case '6': case '7':
5424
1.45k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5425
1.45k
        PUNFETCH;
5426
1.45k
        prev = p;
5427
1.45k
        r = scan_octal_number(&p, end, 0, 3, enc, &code);
5428
1.45k
        if (r < 0) return r;
5429
1.45k
        if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;
5430
1.45k
        if (p == prev) {  /* can't read nothing. */
5431
0
          code = 0; /* but, it's not error */
5432
0
        }
5433
1.45k
        tok->type = TK_CRUDE_BYTE;
5434
1.45k
        tok->base_num = 8;
5435
1.45k
        tok->u.byte   = (UChar )code;
5436
1.45k
      }
5437
1.45k
      break;
5438
5439
40.9k
    default:
5440
40.9k
      PUNFETCH;
5441
40.9k
      r = fetch_escaped_value(&p, end, env, &c2);
5442
40.9k
      if (r < 0) return r;
5443
40.9k
      if (tok->u.code != c2) {
5444
726
        tok->u.code = c2;
5445
726
        tok->type   = TK_CODE_POINT;
5446
726
      }
5447
40.9k
      break;
5448
46.5k
    }
5449
46.5k
  }
5450
4.85M
  else if (c == '[') {
5451
322k
    if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
5452
125k
      tok->backp = p; /* point at '[' is read */
5453
125k
      PINC;
5454
125k
      if (is_posix_bracket_start(p, end, enc)) {
5455
1
        tok->type = TK_CC_POSIX_BRACKET_OPEN;
5456
1
      }
5457
125k
      else {
5458
125k
        PUNFETCH;
5459
125k
        goto cc_in_cc;
5460
125k
      }
5461
125k
    }
5462
196k
    else {
5463
322k
    cc_in_cc:
5464
322k
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
5465
0
        tok->type = TK_CC_OPEN_CC;
5466
0
      }
5467
322k
      else {
5468
322k
        CC_ESC_WARN(env, (UChar* )"[");
5469
322k
      }
5470
322k
    }
5471
322k
  }
5472
4.53M
  else if (c == '&') {
5473
3.46k
    if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
5474
3.46k
        !PEND && (PPEEK_IS('&'))) {
5475
0
      PINC;
5476
0
      tok->type = TK_CC_AND;
5477
0
    }
5478
3.46k
  }
5479
5480
5.12M
 end:
5481
5.12M
  *src = p;
5482
5.12M
  return tok->type;
5483
5.12M
}
5484
5485
static int
5486
fetch_token(PToken* tok, UChar** src, UChar* end, ParseEnv* env)
5487
83.8M
{
5488
83.8M
  int r;
5489
83.8M
  OnigCodePoint code;
5490
83.8M
  OnigCodePoint c;
5491
83.8M
  int mindigits, maxdigits;
5492
83.8M
  UChar* prev;
5493
83.8M
  int allow_num;
5494
83.8M
  OnigEncoding enc;
5495
83.8M
  OnigSyntaxType* syn;
5496
83.8M
  UChar* p;
5497
83.8M
  PFETCH_READY;
5498
5499
83.8M
  enc = env->enc;
5500
83.8M
  syn = env->syntax;
5501
83.8M
  p = *src;
5502
5503
83.8M
  if (tok->code_point_continue != 0) {
5504
0
    r = get_next_code_point(&p, end, tok->base_num, enc, FALSE, &code);
5505
0
    if (r == 1) {
5506
0
      tok->code_point_continue = 0;
5507
0
    }
5508
0
    else if (r == 0) {
5509
0
      tok->type   = TK_CODE_POINT;
5510
0
      tok->u.code = code;
5511
0
      goto out;
5512
0
    }
5513
0
    else
5514
0
      return r; /* error */
5515
0
  }
5516
5517
83.8M
 start:
5518
83.8M
  if (PEND) {
5519
1.23M
    tok->type = TK_EOT;
5520
1.23M
    return tok->type;
5521
1.23M
  }
5522
5523
82.6M
  tok->type = TK_STRING;
5524
82.6M
  tok->base_num = 0;
5525
82.6M
  tok->backp    = p;
5526
5527
82.6M
  PFETCH(c);
5528
82.6M
  if (IS_MC_ESC_CODE(c, syn)) {
5529
607k
    if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
5530
5531
603k
    tok->backp = p;
5532
603k
    PFETCH(c);
5533
5534
603k
    tok->u.code = c;
5535
603k
    tok->escaped = 1;
5536
603k
    switch (c) {
5537
39
    case '*':
5538
39
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
5539
0
      tok->type = TK_REPEAT;
5540
0
      tok->u.repeat.lower = 0;
5541
0
      tok->u.repeat.upper = INFINITE_REPEAT;
5542
0
      goto greedy_check;
5543
0
      break;
5544
5545
360
    case '+':
5546
360
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
5547
0
      tok->type = TK_REPEAT;
5548
0
      tok->u.repeat.lower = 1;
5549
0
      tok->u.repeat.upper = INFINITE_REPEAT;
5550
0
      goto greedy_check;
5551
0
      break;
5552
5553
1.78k
    case '?':
5554
1.78k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
5555
0
      tok->type = TK_REPEAT;
5556
0
      tok->u.repeat.lower = 0;
5557
0
      tok->u.repeat.upper = 1;
5558
2.64M
    greedy_check:
5559
2.64M
      tok->u.repeat.possessive = 0;
5560
2.66M
    greedy_check2:
5561
2.66M
      if (!PEND && PPEEK_IS('?') &&
5562
2.66M
          IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
5563
2.66M
          tok->u.repeat.possessive == 0) {
5564
117k
        PFETCH(c);
5565
117k
        tok->u.repeat.greedy = 0;
5566
117k
        tok->u.repeat.possessive = 0;
5567
117k
      }
5568
2.54M
      else {
5569
2.54M
      possessive_check:
5570
2.54M
        tok->u.repeat.greedy = 1;
5571
2.54M
        if (!PEND && PPEEK_IS('+') &&
5572
2.54M
            ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
5573
611k
              tok->type != TK_INTERVAL)  ||
5574
611k
             (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
5575
12
              tok->type == TK_INTERVAL)) &&
5576
2.54M
          tok->u.repeat.possessive == 0) {
5577
611k
          PFETCH(c);
5578
611k
          tok->u.repeat.possessive = 1;
5579
611k
        }
5580
2.54M
      }
5581
2.66M
      break;
5582
5583
2.66M
    case '{':
5584
272
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
5585
0
      r = fetch_interval(&p, end, tok, env);
5586
0
      if (r < 0) return r;  /* error */
5587
0
      if (r == 0) goto greedy_check2;
5588
0
      else if (r == 2) { /* {n} */
5589
0
        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
5590
0
          goto possessive_check;
5591
5592
0
        goto greedy_check2;
5593
0
      }
5594
      /* r == 1 : normal char */
5595
0
      break;
5596
5597
482
    case '|':
5598
482
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
5599
0
      tok->type = TK_ALT;
5600
0
      break;
5601
5602
225
    case '(':
5603
225
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5604
0
      tok->type = TK_SUBEXP_OPEN;
5605
0
      break;
5606
5607
68
    case ')':
5608
68
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
5609
0
      tok->type = TK_SUBEXP_CLOSE;
5610
0
      break;
5611
5612
2.02k
    case 'w':
5613
2.02k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5614
2.02k
      tok->type = TK_CHAR_TYPE;
5615
2.02k
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5616
2.02k
      tok->u.prop.not   = 0;
5617
2.02k
      break;
5618
5619
723
    case 'W':
5620
723
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
5621
723
      tok->type = TK_CHAR_TYPE;
5622
723
      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
5623
723
      tok->u.prop.not   = 1;
5624
723
      break;
5625
5626
1.87k
    case 'b':
5627
1.87k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5628
1.87k
      tok->type = TK_ANCHOR;
5629
1.87k
      tok->u.anchor = ANCR_WORD_BOUNDARY;
5630
1.87k
      break;
5631
5632
315
    case 'B':
5633
315
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
5634
315
      tok->type = TK_ANCHOR;
5635
315
      tok->u.anchor = ANCR_NO_WORD_BOUNDARY;
5636
315
      break;
5637
5638
6.01k
    case 'y':
5639
6.01k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5640
6.01k
      tok->type = TK_ANCHOR;
5641
6.01k
      tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
5642
6.01k
      break;
5643
5644
35
    case 'Y':
5645
35
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5646
35
      tok->type = TK_ANCHOR;
5647
35
      tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
5648
35
      break;
5649
5650
0
#ifdef USE_WORD_BEGIN_END
5651
139
    case '<':
5652
139
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5653
0
      tok->type = TK_ANCHOR;
5654
0
      tok->u.anchor = ANCR_WORD_BEGIN;
5655
0
      break;
5656
5657
79
    case '>':
5658
79
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
5659
0
      tok->type = TK_ANCHOR;
5660
0
      tok->u.anchor = ANCR_WORD_END;
5661
0
      break;
5662
0
#endif
5663
5664
589
    case 's':
5665
589
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5666
589
      tok->type = TK_CHAR_TYPE;
5667
589
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5668
589
      tok->u.prop.not   = 0;
5669
589
      break;
5670
5671
222
    case 'S':
5672
222
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
5673
222
      tok->type = TK_CHAR_TYPE;
5674
222
      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
5675
222
      tok->u.prop.not   = 1;
5676
222
      break;
5677
5678
309
    case 'd':
5679
309
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5680
309
      tok->type = TK_CHAR_TYPE;
5681
309
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5682
309
      tok->u.prop.not   = 0;
5683
309
      break;
5684
5685
8.03k
    case 'D':
5686
8.03k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
5687
8.03k
      tok->type = TK_CHAR_TYPE;
5688
8.03k
      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
5689
8.03k
      tok->u.prop.not   = 1;
5690
8.03k
      break;
5691
5692
106
    case 'h':
5693
106
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5694
0
      tok->type = TK_CHAR_TYPE;
5695
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5696
0
      tok->u.prop.not   = 0;
5697
0
      break;
5698
5699
364
    case 'H':
5700
364
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
5701
0
      tok->type = TK_CHAR_TYPE;
5702
0
      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
5703
0
      tok->u.prop.not   = 1;
5704
0
      break;
5705
5706
223
    case 'K':
5707
223
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break;
5708
223
      tok->type = TK_KEEP;
5709
223
      break;
5710
5711
641
    case 'R':
5712
641
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break;
5713
641
      tok->type = TK_GENERAL_NEWLINE;
5714
641
      break;
5715
5716
10
    case 'N':
5717
10
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5718
10
      tok->type = TK_NO_NEWLINE;
5719
10
      break;
5720
5721
2.23k
    case 'O':
5722
2.23k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break;
5723
2.23k
      tok->type = TK_TRUE_ANYCHAR;
5724
2.23k
      break;
5725
5726
2.74k
    case 'X':
5727
2.74k
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
5728
2.74k
      tok->type = TK_TEXT_SEGMENT;
5729
2.74k
      break;
5730
5731
266
    case 'A':
5732
266
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5733
266
    begin_buf:
5734
266
      tok->type = TK_ANCHOR;
5735
266
      tok->u.subtype = ANCR_BEGIN_BUF;
5736
266
      break;
5737
5738
2.22k
    case 'Z':
5739
2.22k
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5740
0
        goto end_buf;
5741
0
      }
5742
2.22k
      else {
5743
2.22k
        if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5744
2.22k
        tok->type = TK_ANCHOR;
5745
2.22k
        tok->u.subtype = ANCR_SEMI_END_BUF;
5746
2.22k
      }
5747
2.22k
      break;
5748
5749
2.22k
    case 'z':
5750
471
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON))
5751
0
        return ONIGERR_UNDEFINED_OPERATOR;
5752
5753
471
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
5754
471
    end_buf:
5755
471
      tok->type = TK_ANCHOR;
5756
471
      tok->u.subtype = ANCR_END_BUF;
5757
471
      break;
5758
5759
249
    case 'G':
5760
249
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
5761
249
      tok->type = TK_ANCHOR;
5762
249
      tok->u.subtype = ANCR_BEGIN_POSITION;
5763
249
      break;
5764
5765
122
    case '`':
5766
122
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5767
0
      goto begin_buf;
5768
0
      break;
5769
5770
778
    case '\'':
5771
778
      if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
5772
0
      goto end_buf;
5773
0
      break;
5774
5775
2.23k
    case 'o':
5776
2.23k
      if (PEND) break;
5777
5778
2.22k
      prev = p;
5779
2.22k
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
5780
49
        PINC;
5781
49
        r = scan_octal_number(&p, end, 0, 11, enc, &code);
5782
49
        if (r < 0) return r;
5783
49
        if (!PEND) {
5784
49
          if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
5785
0
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5786
49
        }
5787
5788
49
        tok->base_num = 8;
5789
49
        goto brace_code_point_entry;
5790
49
      }
5791
2.18k
      break;
5792
5793
2.18k
    case 'x':
5794
564
      if (PEND) break;
5795
5796
541
      prev = p;
5797
541
      if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
5798
0
        PINC;
5799
0
        r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
5800
0
        if (r < 0) return r;
5801
0
        if (!PEND) {
5802
0
          if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
5803
0
            return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
5804
0
        }
5805
5806
0
        tok->base_num = 16;
5807
49
      brace_code_point_entry:
5808
49
        if ((p > prev + enclen(enc, prev))) {
5809
0
          if (PEND) return ONIGERR_INVALID_CODE_POINT_VALUE;
5810
0
          if (PPEEK_IS('}')) {
5811
0
            PINC;
5812
0
          }
5813
0
          else {
5814
0
            r = check_code_point_sequence(p, end, tok->base_num, enc);
5815
0
            if (r < 0) return r;
5816
0
            if (r == 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
5817
0
            tok->code_point_continue = TRUE;
5818
0
          }
5819
0
          tok->type   = TK_CODE_POINT;
5820
0
          tok->u.code = code;
5821
0
        }
5822
49
        else {
5823
          /* can't read nothing or invalid format */
5824
49
          p = prev;
5825
49
        }
5826
49
      }
5827
541
      else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
5828
541
        r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
5829
541
        if (r < 0) return r;
5830
541
        if (p == prev) {  /* can't read nothing. */
5831
121
          code = 0; /* but, it's not error */
5832
121
        }
5833
541
        tok->type = TK_CRUDE_BYTE;
5834
541
        tok->base_num = 16;
5835
541
        tok->u.byte   = (UChar )code;
5836
541
      }
5837
590
      break;
5838
5839
1.07k
    case 'u':
5840
1.07k
      if (PEND) break;
5841
1.02k
      prev = p;
5842
1.02k
      mindigits = maxdigits = 4;
5843
1.02k
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
5844
0
    u_hex_digits:
5845
0
        r = scan_hexadecimal_number(&p, end, mindigits, maxdigits, enc, &code);
5846
0
        if (r < 0) return r;
5847
0
        if (p == prev) {  /* can't read nothing. */
5848
0
          code = 0; /* but, it's not error */
5849
0
        }
5850
0
        tok->type = TK_CODE_POINT;
5851
0
        tok->base_num = 16;
5852
0
        tok->u.code   = code;
5853
0
      }
5854
1.02k
      break;
5855
5856
1.02k
    case 'U':
5857
84
      if (PEND) break;
5858
81
      prev = p;
5859
81
      if (IS_SYNTAX_BV(syn, ONIG_SYN_PYTHON)) {
5860
0
        mindigits = maxdigits = 8;
5861
0
        goto u_hex_digits;
5862
0
      }
5863
81
      break;
5864
5865
20.5k
    case '1': case '2': case '3': case '4':
5866
26.1k
    case '5': case '6': case '7': case '8': case '9':
5867
26.1k
      PUNFETCH;
5868
26.1k
      prev = p;
5869
26.1k
      r = scan_number(&p, end, enc);
5870
26.1k
      if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {
5871
2.17k
        goto skip_backref;
5872
2.17k
      }
5873
5874
23.9k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
5875
23.9k
          (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */
5876
22.9k
        if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5877
0
          if (r > env->num_mem || IS_NULL(PARSEENV_MEMENV(env)[r].mem_node))
5878
0
            return ONIGERR_INVALID_BACKREF;
5879
0
        }
5880
5881
22.9k
        tok->type = TK_BACKREF;
5882
22.9k
        tok->u.backref.num     = 1;
5883
22.9k
        tok->u.backref.ref1    = r;
5884
22.9k
        tok->u.backref.by_name = 0;
5885
22.9k
#ifdef USE_BACKREF_WITH_LEVEL
5886
22.9k
        tok->u.backref.exist_level = 0;
5887
22.9k
#endif
5888
22.9k
        break;
5889
22.9k
      }
5890
5891
3.17k
    skip_backref:
5892
3.17k
      if (c == '8' || c == '9') {
5893
        /* normal char */
5894
1.55k
        p = prev; PINC;
5895
1.55k
        break;
5896
1.55k
      }
5897
5898
1.62k
      p = prev;
5899
      /* fall through */
5900
3.50k
    case '0':
5901
3.50k
      if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
5902
3.50k
        prev = p;
5903
3.50k
        r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code);
5904
3.50k
        if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;
5905
3.50k
        if (p == prev) {  /* can't read nothing. */
5906
1.48k
          code = 0; /* but, it's not error */
5907
1.48k
        }
5908
3.50k
        tok->type = TK_CRUDE_BYTE;
5909
3.50k
        tok->base_num = 8;
5910
3.50k
        tok->u.byte   = (UChar )code;
5911
3.50k
      }
5912
0
      else if (c != '0') {
5913
0
        PINC;
5914
0
      }
5915
3.50k
      break;
5916
5917
3.50k
    case 'k':
5918
296
      if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
5919
237
        PFETCH(c);
5920
237
        if (c == '<' || c == '\'') {
5921
0
          UChar* name_end;
5922
0
          int* backs;
5923
0
          int back_num;
5924
0
          enum REF_NUM num_type;
5925
5926
0
          allow_num = 1;
5927
5928
0
        backref_start:
5929
0
          prev = p;
5930
5931
0
#ifdef USE_BACKREF_WITH_LEVEL
5932
0
          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
5933
0
          r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
5934
0
                                 env, &back_num, &tok->u.backref.level, &num_type);
5935
0
          if (r == 1) tok->u.backref.exist_level = 1;
5936
0
          else        tok->u.backref.exist_level = 0;
5937
#else
5938
          r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
5939
#endif
5940
0
          if (r < 0) return r;
5941
5942
0
          if (num_type != IS_NOT_NUM) {
5943
0
            if (allow_num == 0) return ONIGERR_INVALID_BACKREF;
5944
5945
0
            if (num_type == IS_REL_NUM) {
5946
0
              back_num = backref_rel_to_abs(back_num, env);
5947
0
            }
5948
0
            if (back_num <= 0)
5949
0
              return ONIGERR_INVALID_BACKREF;
5950
5951
0
            if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5952
0
              if (back_num > env->num_mem ||
5953
0
                  IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
5954
0
                return ONIGERR_INVALID_BACKREF;
5955
0
            }
5956
0
            tok->type = TK_BACKREF;
5957
0
            tok->u.backref.by_name = 0;
5958
0
            tok->u.backref.num  = 1;
5959
0
            tok->u.backref.ref1 = back_num;
5960
0
          }
5961
0
          else {
5962
0
            int num = name_to_group_numbers(env, prev, name_end, &backs);
5963
0
            if (num <= 0) {
5964
0
              return ONIGERR_UNDEFINED_NAME_REFERENCE;
5965
0
            }
5966
0
            if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
5967
0
              int i;
5968
0
              for (i = 0; i < num; i++) {
5969
0
                if (backs[i] > env->num_mem ||
5970
0
                    IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
5971
0
                  return ONIGERR_INVALID_BACKREF;
5972
0
              }
5973
0
            }
5974
5975
0
            tok->type = TK_BACKREF;
5976
0
            tok->u.backref.by_name = 1;
5977
0
            if (num == 1) {
5978
0
              tok->u.backref.num  = 1;
5979
0
              tok->u.backref.ref1 = backs[0];
5980
0
            }
5981
0
            else {
5982
0
              tok->u.backref.num  = num;
5983
0
              tok->u.backref.refs = backs;
5984
0
            }
5985
0
          }
5986
0
        }
5987
237
        else
5988
237
          PUNFETCH;
5989
237
      }
5990
296
      break;
5991
5992
296
#ifdef USE_CALL
5993
3.13k
    case 'g':
5994
3.13k
      if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
5995
3.10k
        PFETCH(c);
5996
3.10k
        if (c == '<' || c == '\'') {
5997
0
          int gnum;
5998
0
          UChar* name_end;
5999
0
          enum REF_NUM num_type;
6000
6001
0
          allow_num = 1;
6002
6003
0
        call_start:
6004
0
          prev = p;
6005
0
          r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
6006
0
                         &gnum, &num_type, TRUE);
6007
0
          if (r < 0) return r;
6008
6009
0
          if (num_type != IS_NOT_NUM) {
6010
0
            if (allow_num == 0) return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6011
6012
0
            if (num_type == IS_REL_NUM) {
6013
0
              gnum = backref_rel_to_abs(gnum, env);
6014
0
              if (gnum < 0) {
6015
0
                onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
6016
0
                                               prev, name_end);
6017
0
                return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6018
0
              }
6019
0
            }
6020
0
            tok->u.call.by_number = 1;
6021
0
            tok->u.call.gnum      = gnum;
6022
0
          }
6023
0
          else {
6024
0
            tok->u.call.by_number = 0;
6025
0
            tok->u.call.gnum      = 0;
6026
0
          }
6027
6028
0
          tok->type = TK_CALL;
6029
0
          tok->u.call.name     = prev;
6030
0
          tok->u.call.name_end = name_end;
6031
0
        }
6032
3.10k
        else
6033
3.10k
          PUNFETCH;
6034
3.10k
      }
6035
3.13k
      break;
6036
3.13k
#endif
6037
6038
3.13k
    case 'Q':
6039
116
      if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
6040
116
        tok->type = TK_QUOTE_OPEN;
6041
116
      }
6042
116
      break;
6043
6044
1.74k
    case 'p':
6045
4.39k
    case 'P':
6046
4.39k
      if (!PEND && PPEEK_IS('{') &&
6047
4.39k
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
6048
2
        PINC;
6049
2
        tok->type = TK_CHAR_PROPERTY;
6050
2
        tok->u.prop.not = c == 'P';
6051
6052
2
        if (!PEND &&
6053
2
            IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
6054
2
          PFETCH(c);
6055
2
          if (c == '^') {
6056
0
            tok->u.prop.not = tok->u.prop.not == 0;
6057
0
          }
6058
2
          else
6059
2
            PUNFETCH;
6060
2
        }
6061
2
      }
6062
4.39k
      break;
6063
6064
529k
    default:
6065
529k
      {
6066
529k
        OnigCodePoint c2;
6067
6068
529k
        PUNFETCH;
6069
529k
        r = fetch_escaped_value(&p, end, env, &c2);
6070
529k
        if (r < 0) return r;
6071
529k
        if (tok->u.code != c2) {
6072
18.1k
          tok->type = TK_CODE_POINT;
6073
18.1k
          tok->u.code = c2;
6074
18.1k
        }
6075
511k
        else { /* string */
6076
511k
          p = tok->backp + enclen(enc, tok->backp);
6077
511k
        }
6078
529k
      }
6079
0
      break;
6080
603k
    }
6081
603k
  }
6082
82.0M
  else {
6083
82.0M
    tok->u.code = c;
6084
82.0M
    tok->escaped = 0;
6085
6086
82.0M
#ifdef USE_VARIABLE_META_CHARS
6087
82.0M
    if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
6088
82.0M
        IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
6089
0
      if (c == MC_ANYCHAR(syn))
6090
0
        goto any_char;
6091
0
      else if (c == MC_ANYTIME(syn))
6092
0
        goto any_time;
6093
0
      else if (c == MC_ZERO_OR_ONE_TIME(syn))
6094
0
        goto zero_or_one_time;
6095
0
      else if (c == MC_ONE_OR_MORE_TIME(syn))
6096
0
        goto one_or_more_time;
6097
0
      else if (c == MC_ANYCHAR_ANYTIME(syn)) {
6098
0
        tok->type = TK_ANYCHAR_ANYTIME;
6099
0
        goto out;
6100
0
      }
6101
0
    }
6102
82.0M
#endif
6103
6104
82.0M
    switch (c) {
6105
2.34M
    case '.':
6106
2.34M
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
6107
2.34M
#ifdef USE_VARIABLE_META_CHARS
6108
2.34M
    any_char:
6109
2.34M
#endif
6110
2.34M
      tok->type = TK_ANYCHAR;
6111
2.34M
      break;
6112
6113
650k
    case '*':
6114
650k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
6115
650k
#ifdef USE_VARIABLE_META_CHARS
6116
650k
    any_time:
6117
650k
#endif
6118
650k
      tok->type = TK_REPEAT;
6119
650k
      tok->u.repeat.lower = 0;
6120
650k
      tok->u.repeat.upper = INFINITE_REPEAT;
6121
650k
      goto greedy_check;
6122
0
      break;
6123
6124
954k
    case '+':
6125
954k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
6126
954k
#ifdef USE_VARIABLE_META_CHARS
6127
954k
    one_or_more_time:
6128
954k
#endif
6129
954k
      tok->type = TK_REPEAT;
6130
954k
      tok->u.repeat.lower = 1;
6131
954k
      tok->u.repeat.upper = INFINITE_REPEAT;
6132
954k
      goto greedy_check;
6133
0
      break;
6134
6135
1.03M
    case '?':
6136
1.03M
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
6137
1.03M
#ifdef USE_VARIABLE_META_CHARS
6138
1.03M
    zero_or_one_time:
6139
1.03M
#endif
6140
1.03M
      tok->type = TK_REPEAT;
6141
1.03M
      tok->u.repeat.lower = 0;
6142
1.03M
      tok->u.repeat.upper = 1;
6143
1.03M
      goto greedy_check;
6144
0
      break;
6145
6146
946k
    case '{':
6147
946k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
6148
946k
      r = fetch_interval(&p, end, tok, env);
6149
946k
      if (r < 0) return r;  /* error */
6150
946k
      if (r == 0) goto greedy_check2;
6151
944k
      else if (r == 2) { /* {n} */
6152
18.3k
        if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
6153
0
          goto possessive_check;
6154
6155
18.3k
        goto greedy_check2;
6156
18.3k
      }
6157
      /* r == 1 : normal char */
6158
926k
      break;
6159
6160
926k
    case '|':
6161
388k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
6162
388k
      tok->type = TK_ALT;
6163
388k
      break;
6164
6165
1.81M
    case '(':
6166
1.81M
      if (!PEND && PPEEK_IS('?') &&
6167
1.81M
          IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
6168
83.7k
        prev = p;
6169
83.7k
        PINC;
6170
83.7k
        if (! PEND) {
6171
81.9k
          c = PPEEK;
6172
81.9k
          if (c == '#') {
6173
417
            PFETCH(c);
6174
16.2k
            while (1) {
6175
16.2k
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6176
15.9k
              PFETCH(c);
6177
15.9k
              if (c == MC_ESC(syn)) {
6178
118
                if (! PEND) PFETCH(c);
6179
118
              }
6180
15.8k
              else {
6181
15.8k
                if (c == ')') break;
6182
15.8k
              }
6183
15.9k
            }
6184
114
            goto start;
6185
417
          }
6186
81.5k
          else if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL)) {
6187
81.5k
            int gnum;
6188
81.5k
            UChar* name;
6189
81.5k
            UChar* name_end;
6190
81.5k
            enum REF_NUM num_type;
6191
6192
81.5k
            switch (c) {
6193
40
            case '&':
6194
40
              {
6195
40
                PINC;
6196
40
                name = p;
6197
40
                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6198
40
                               &gnum, &num_type, FALSE);
6199
40
                if (r < 0) return r;
6200
6201
14
                tok->type = TK_CALL;
6202
14
                tok->u.call.by_number = 0;
6203
14
                tok->u.call.gnum      = 0;
6204
14
                tok->u.call.name      = name;
6205
14
                tok->u.call.name_end  = name_end;
6206
14
              }
6207
0
              break;
6208
6209
1
            case 'R':
6210
1
              tok->type = TK_CALL;
6211
1
              tok->u.call.by_number = 1;
6212
1
              tok->u.call.gnum      = 0;
6213
1
              tok->u.call.name      = p;
6214
1
              PINC;
6215
1
              if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
6216
0
              tok->u.call.name_end  = p;
6217
0
              break;
6218
6219
489
            case '-':
6220
913
            case '+':
6221
913
              if (! PEND) {
6222
913
                PINC;
6223
913
                if (! PEND) {
6224
795
                  c = PPEEK;
6225
795
                  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
6226
671
                    PUNFETCH;
6227
671
                    goto lparen_qmark_num;
6228
671
                  }
6229
795
                }
6230
913
              }
6231
242
              p = prev;
6232
242
              goto lparen_qmark_end2;
6233
0
              break;
6234
6235
80.6k
            default:
6236
80.6k
              if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto lparen_qmark_end;
6237
6238
20.5k
            lparen_qmark_num:
6239
20.5k
              {
6240
20.5k
                name = p;
6241
20.5k
                r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
6242
20.5k
                               &gnum, &num_type, TRUE);
6243
20.5k
                if (r < 0) return r;
6244
6245
19.8k
                if (num_type == IS_NOT_NUM) {
6246
51
                  return ONIGERR_INVALID_GROUP_NAME;
6247
51
                }
6248
19.7k
                else {
6249
19.7k
                  if (num_type == IS_REL_NUM) {
6250
391
                    gnum = backref_rel_to_abs(gnum, env);
6251
391
                    if (gnum < 0) {
6252
37
                      onig_scan_env_set_error_string(env,
6253
37
                             ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
6254
37
                      return ONIGERR_UNDEFINED_GROUP_REFERENCE;
6255
37
                    }
6256
391
                  }
6257
19.7k
                  tok->u.call.by_number = 1;
6258
19.7k
                  tok->u.call.gnum      = gnum;
6259
19.7k
                }
6260
6261
19.7k
                tok->type = TK_CALL;
6262
19.7k
                tok->u.call.name     = name;
6263
19.7k
                tok->u.call.name_end = name_end;
6264
19.7k
              }
6265
0
              break;
6266
81.5k
            }
6267
19.7k
            break;
6268
81.5k
          }
6269
0
          else if (c == 'P' &&
6270
0
                   IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
6271
0
            PINC; /* skip 'P' */
6272
0
            if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
6273
0
            PFETCH(c);
6274
0
            allow_num = 0;
6275
0
            if (c == '=') {
6276
0
              c = '(';
6277
0
              goto backref_start;
6278
0
            }
6279
0
            else if (c == '>') {
6280
0
#ifdef USE_CALL
6281
0
              c = '(';
6282
0
              goto call_start;
6283
#else
6284
              return ONIGERR_UNDEFINED_OPERATOR;
6285
#endif
6286
0
            }
6287
0
            else {
6288
0
              p = prev;
6289
0
              goto lparen_qmark_end2;
6290
0
            }
6291
0
          }
6292
81.9k
        }
6293
62.4k
      lparen_qmark_end:
6294
62.4k
        PUNFETCH;
6295
62.4k
      }
6296
6297
1.78M
    lparen_qmark_end2:
6298
1.78M
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6299
1.78M
      tok->type = TK_SUBEXP_OPEN;
6300
1.78M
      break;
6301
6302
459k
    case ')':
6303
459k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
6304
459k
      tok->type = TK_SUBEXP_CLOSE;
6305
459k
      break;
6306
6307
51.3k
    case '^':
6308
51.3k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6309
51.3k
      if (IS_SYNTAX_BV(syn, ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP)) {
6310
0
        if (! is_head_of_bre_subexp(PPREV, end, enc, env)) break;
6311
0
      }
6312
51.3k
      tok->type = TK_ANCHOR;
6313
51.3k
      tok->u.subtype = (OPTON_SINGLELINE(env->options)
6314
51.3k
                        ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE);
6315
51.3k
      break;
6316
6317
203k
    case '$':
6318
203k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
6319
203k
      if (IS_SYNTAX_BV(syn, ONIG_SYN_BRE_ANCHOR_AT_EDGE_OF_SUBEXP)) {
6320
0
        if (! is_end_of_bre_subexp(p, end, enc, env)) break;
6321
0
      }
6322
203k
      tok->type = TK_ANCHOR;
6323
203k
      tok->u.subtype = (OPTON_SINGLELINE(env->options)
6324
203k
                        ? ANCR_SEMI_END_BUF : ANCR_END_LINE);
6325
203k
      break;
6326
6327
175k
    case '[':
6328
175k
      if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
6329
175k
      tok->type = TK_OPEN_CC;
6330
175k
      break;
6331
6332
687k
    case ']':
6333
687k
      if (*src > env->pattern)   /* /].../ is allowed. */
6334
666k
        CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
6335
687k
      break;
6336
6337
88.1k
    case '#':
6338
88.1k
      if (OPTON_EXTEND(env->options)) {
6339
4.88k
        while (!PEND) {
6340
4.87k
          PFETCH(c);
6341
4.87k
          if (ONIGENC_IS_CODE_NEWLINE(enc, c))
6342
0
            break;
6343
4.87k
        }
6344
9
        goto start;
6345
0
        break;
6346
9
      }
6347
88.1k
      break;
6348
6349
649k
    case ' ': case '\t': case '\n': case '\r': case '\f':
6350
649k
      if (OPTON_EXTEND(env->options))
6351
311
        goto start;
6352
648k
      break;
6353
6354
71.5M
    default:
6355
      /* string */
6356
71.5M
      break;
6357
82.0M
    }
6358
82.0M
  }
6359
6360
82.6M
 out:
6361
82.6M
  *src = p;
6362
82.6M
  return tok->type;
6363
82.6M
}
6364
6365
static int
6366
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
6367
                         OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
6368
                         const OnigCodePoint mbr[])
6369
11.4k
{
6370
11.4k
  int i, r;
6371
11.4k
  OnigCodePoint j;
6372
6373
11.4k
  int n = ONIGENC_CODE_RANGE_NUM(mbr);
6374
6375
11.4k
  if (not == 0) {
6376
26.8k
    for (i = 0; i < n; i++) {
6377
26.8k
      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6378
170k
           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6379
155k
        if (j >= sb_out) {
6380
11.2k
          if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6381
0
            r = add_code_range_to_buf(&(cc->mbuf), j,
6382
0
                                      ONIGENC_CODE_RANGE_TO(mbr, i));
6383
0
            if (r != 0) return r;
6384
0
            i++;
6385
0
          }
6386
6387
11.2k
          goto sb_end;
6388
11.2k
        }
6389
155k
        BITSET_SET_BIT(cc->bs, j);
6390
144k
      }
6391
26.8k
    }
6392
6393
11.2k
  sb_end:
6394
1.13M
    for ( ; i < n; i++) {
6395
1.12M
      r = add_code_range_to_buf(&(cc->mbuf),
6396
1.12M
                                ONIGENC_CODE_RANGE_FROM(mbr, i),
6397
1.12M
                                ONIGENC_CODE_RANGE_TO(mbr, i));
6398
1.12M
      if (r != 0) return r;
6399
1.12M
    }
6400
11.2k
  }
6401
176
  else {
6402
176
    OnigCodePoint prev = 0;
6403
6404
406
    for (i = 0; i < n; i++) {
6405
20.4k
      for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
6406
20.2k
        if (j >= sb_out) {
6407
176
          goto sb_end2;
6408
176
        }
6409
20.2k
        BITSET_SET_BIT(cc->bs, j);
6410
20.0k
      }
6411
230
      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6412
230
    }
6413
0
    for (j = prev; j < sb_out; j++) {
6414
0
      BITSET_SET_BIT(cc->bs, j);
6415
0
    }
6416
6417
176
  sb_end2:
6418
176
    prev = sb_out;
6419
6420
20.6k
    for (i = 0; i < n; i++) {
6421
20.5k
      if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6422
20.3k
        r = add_code_range_to_buf(&(cc->mbuf), prev,
6423
20.3k
                                  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
6424
20.3k
        if (r != 0) return r;
6425
20.3k
      }
6426
20.5k
      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
6427
20.5k
      if (prev == 0) goto end;
6428
20.5k
    }
6429
6430
176
    r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6431
176
    if (r != 0) return r;
6432
176
  }
6433
6434
11.4k
 end:
6435
11.4k
  return 0;
6436
11.4k
}
6437
6438
static int
6439
add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
6440
                               OnigEncoding enc ARG_UNUSED,
6441
                               OnigCodePoint sb_out,
6442
                               const OnigCodePoint mbr[], OnigCodePoint limit)
6443
0
{
6444
0
  int i, r;
6445
0
  OnigCodePoint j;
6446
0
  OnigCodePoint from;
6447
0
  OnigCodePoint to;
6448
6449
0
  int n = ONIGENC_CODE_RANGE_NUM(mbr);
6450
6451
0
  if (not == 0) {
6452
0
    for (i = 0; i < n; i++) {
6453
0
      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
6454
0
           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
6455
0
        if (j > limit) goto end;
6456
0
        if (j >= sb_out) {
6457
0
          if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
6458
0
            to = ONIGENC_CODE_RANGE_TO(mbr, i);
6459
0
            if (to > limit) to = limit;
6460
0
            r = add_code_range_to_buf(&(cc->mbuf), j, to);
6461
0
            if (r != 0) return r;
6462
0
            i++;
6463
0
          }
6464
6465
0
          goto sb_end;
6466
0
        }
6467
0
        BITSET_SET_BIT(cc->bs, j);
6468
0
      }
6469
0
    }
6470
6471
0
  sb_end:
6472
0
    for ( ; i < n; i++) {
6473
0
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6474
0
      to   = ONIGENC_CODE_RANGE_TO(mbr, i);
6475
0
      if (from > limit) break;
6476
0
      if (to   > limit) to = limit;
6477
0
      r = add_code_range_to_buf(&(cc->mbuf), from, to);
6478
0
      if (r != 0) return r;
6479
0
    }
6480
0
  }
6481
0
  else {
6482
0
    OnigCodePoint prev = 0;
6483
6484
0
    for (i = 0; i < n; i++) {
6485
0
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6486
0
      if (from > limit) {
6487
0
        for (j = prev; j < sb_out; j++) {
6488
0
          BITSET_SET_BIT(cc->bs, j);
6489
0
        }
6490
0
        goto sb_end2;
6491
0
      }
6492
0
      for (j = prev; j < from; j++) {
6493
0
        if (j >= sb_out) goto sb_end2;
6494
0
        BITSET_SET_BIT(cc->bs, j);
6495
0
      }
6496
0
      prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6497
0
      if (prev > limit) prev = limit;
6498
0
      prev++;
6499
0
      if (prev == 0) goto end;
6500
0
    }
6501
0
    for (j = prev; j < sb_out; j++) {
6502
0
      BITSET_SET_BIT(cc->bs, j);
6503
0
    }
6504
6505
0
  sb_end2:
6506
0
    prev = sb_out;
6507
6508
0
    for (i = 0; i < n; i++) {
6509
0
      from = ONIGENC_CODE_RANGE_FROM(mbr, i);
6510
0
      if (from > limit) goto last;
6511
6512
0
      if (prev < from) {
6513
0
        r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
6514
0
        if (r != 0) return r;
6515
0
      }
6516
0
      prev = ONIGENC_CODE_RANGE_TO(mbr, i);
6517
0
      if (prev > limit) prev = limit;
6518
0
      prev++;
6519
0
      if (prev == 0) goto end;
6520
0
    }
6521
6522
0
  last:
6523
0
    r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
6524
0
    if (r != 0) return r;
6525
0
  }
6526
6527
0
 end:
6528
0
  return 0;
6529
0
}
6530
6531
static int
6532
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ParseEnv* env)
6533
11.4k
{
6534
11.4k
  int c, r;
6535
11.4k
  int ascii_mode;
6536
11.4k
  int is_single;
6537
11.4k
  const OnigCodePoint *ranges;
6538
11.4k
  OnigCodePoint limit;
6539
11.4k
  OnigCodePoint sb_out;
6540
11.4k
  OnigEncoding enc = env->enc;
6541
6542
11.4k
  ascii_mode = OPTON_IS_ASCII_MODE_CTYPE(ctype, env->options);
6543
6544
11.4k
  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
6545
11.4k
  if (r == 0) {
6546
11.4k
    if (ascii_mode == 0)
6547
11.4k
      r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
6548
0
    else
6549
0
      r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
6550
0
                                         ranges, ASCII_LIMIT);
6551
11.4k
    return r;
6552
11.4k
  }
6553
0
  else if (r != ONIG_NO_SUPPORT_CONFIG) {
6554
0
    return r;
6555
0
  }
6556
6557
0
  r = 0;
6558
0
  is_single = ONIGENC_IS_SINGLEBYTE(enc);
6559
0
  limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
6560
6561
0
  switch (ctype) {
6562
0
  case ONIGENC_CTYPE_ALPHA:
6563
0
  case ONIGENC_CTYPE_BLANK:
6564
0
  case ONIGENC_CTYPE_CNTRL:
6565
0
  case ONIGENC_CTYPE_DIGIT:
6566
0
  case ONIGENC_CTYPE_LOWER:
6567
0
  case ONIGENC_CTYPE_PUNCT:
6568
0
  case ONIGENC_CTYPE_SPACE:
6569
0
  case ONIGENC_CTYPE_UPPER:
6570
0
  case ONIGENC_CTYPE_XDIGIT:
6571
0
  case ONIGENC_CTYPE_ASCII:
6572
0
  case ONIGENC_CTYPE_ALNUM:
6573
0
    if (not != 0) {
6574
0
      for (c = 0; c < (int )limit; c++) {
6575
0
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6576
0
          if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6577
0
            BITSET_SET_BIT(cc->bs, c);
6578
0
        }
6579
0
      }
6580
0
      for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6581
0
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6582
0
          BITSET_SET_BIT(cc->bs, c);
6583
0
      }
6584
6585
0
      if (is_single == 0)
6586
0
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6587
0
    }
6588
0
    else {
6589
0
      for (c = 0; c < (int )limit; c++) {
6590
0
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
6591
0
          if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6592
0
            BITSET_SET_BIT(cc->bs, c);
6593
0
        }
6594
0
      }
6595
0
    }
6596
0
    break;
6597
6598
0
  case ONIGENC_CTYPE_GRAPH:
6599
0
  case ONIGENC_CTYPE_PRINT:
6600
0
  case ONIGENC_CTYPE_WORD:
6601
0
    if (not != 0) {
6602
0
      for (c = 0; c < (int )limit; c++) {
6603
        /* check invalid code point */
6604
0
        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6605
0
            && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6606
0
          BITSET_SET_BIT(cc->bs, c);
6607
0
      }
6608
0
      for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
6609
0
        if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6610
0
          BITSET_SET_BIT(cc->bs, c);
6611
0
      }
6612
0
      if (ascii_mode != 0 && is_single == 0)
6613
0
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6614
0
    }
6615
0
    else {
6616
0
      for (c = 0; c < (int )limit; c++) {
6617
0
        if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
6618
0
            && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
6619
0
          BITSET_SET_BIT(cc->bs, c);
6620
0
      }
6621
0
      if (ascii_mode == 0 && is_single == 0)
6622
0
        ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
6623
0
    }
6624
0
    break;
6625
6626
0
  default:
6627
0
    return ONIGERR_PARSER_BUG;
6628
0
    break;
6629
0
  }
6630
6631
0
  return r;
6632
0
}
6633
6634
static int
6635
prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ParseEnv* env)
6636
1
{
6637
1
  static PosixBracketEntryType PBS[] = {
6638
1
    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
6639
1
    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
6640
1
    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
6641
1
    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
6642
1
    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
6643
1
    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
6644
1
    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
6645
1
    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
6646
1
    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
6647
1
    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
6648
1
    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
6649
1
    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
6650
1
    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
6651
1
    { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
6652
1
    { (UChar* )NULL,     -1, 0 }
6653
1
  };
6654
6655
1
  PosixBracketEntryType *pb;
6656
1
  int not, r;
6657
1
  OnigEncoding enc = env->enc;
6658
1
  UChar *p = *src;
6659
6660
1
  if (PPEEK_IS('^')) {
6661
0
    PINC_S;
6662
0
    not = 1;
6663
0
  }
6664
1
  else
6665
1
    not = 0;
6666
6667
15
  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
6668
14
    if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
6669
0
      p = (UChar* )onigenc_step(enc, p, end, pb->len);
6670
0
      if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
6671
0
        break;
6672
6673
0
      r = add_ctype_to_cc(cc, pb->ctype, not, env);
6674
0
      if (r != 0) return r;
6675
6676
0
      PINC_S; PINC_S;
6677
0
      *src = p;
6678
0
      return 0;
6679
0
    }
6680
14
  }
6681
6682
1
  return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
6683
1
}
6684
6685
static int
6686
fetch_char_property_to_ctype(UChar** src, UChar* end, ParseEnv* env)
6687
2
{
6688
2
  int r;
6689
2
  OnigCodePoint c;
6690
2
  OnigEncoding enc;
6691
2
  UChar *prev, *start, *p;
6692
6693
2
  p = *src;
6694
2
  enc = env->enc;
6695
2
  r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
6696
2
  start = prev = p;
6697
6698
104
  while (!PEND) {
6699
103
    prev = p;
6700
103
    PFETCH_S(c);
6701
103
    if (c == '}') {
6702
0
      r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
6703
0
      if (r >= 0) {
6704
0
        *src = p;
6705
0
      }
6706
0
      else {
6707
0
        onig_scan_env_set_error_string(env, r, *src, prev);
6708
0
      }
6709
6710
0
      return r;
6711
0
    }
6712
103
    else if (c == '(' || c == ')' || c == '{' || c == '|') {
6713
1
      break;
6714
1
    }
6715
103
  }
6716
6717
2
  return r;
6718
2
}
6719
6720
static int
6721
prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end,
6722
                  ParseEnv* env)
6723
2
{
6724
2
  int r, ctype;
6725
2
  CClassNode* cc;
6726
6727
2
  ctype = fetch_char_property_to_ctype(src, end, env);
6728
2
  if (ctype < 0) return ctype;
6729
6730
0
  if (ctype == ONIGENC_CTYPE_WORD) {
6731
0
    *np = node_new_ctype(ctype, tok->u.prop.not, env->options);
6732
0
    CHECK_NULL_RETURN_MEMERR(*np);
6733
0
    return 0;
6734
0
  }
6735
6736
0
  *np = node_new_cclass();
6737
0
  CHECK_NULL_RETURN_MEMERR(*np);
6738
0
  cc = CCLASS_(*np);
6739
0
  r = add_ctype_to_cc(cc, ctype, FALSE, env);
6740
0
  if (r != 0) return r;
6741
0
  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
6742
6743
0
  return 0;
6744
0
}
6745
6746
6747
static int
6748
cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state,
6749
              ParseEnv* env)
6750
2.27k
{
6751
2.27k
  int r;
6752
6753
2.27k
  if (*state == CS_RANGE)
6754
1
    return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
6755
6756
2.27k
  if (*state == CS_VALUE) {
6757
2.25k
    if (*val == CV_SB)
6758
2.25k
      BITSET_SET_BIT(cc->bs, (int )(*pcode));
6759
495
    else if (*val == CV_MB) {
6760
495
      r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);
6761
495
      if (r < 0) return r;
6762
495
    }
6763
2.25k
  }
6764
6765
2.27k
  *state = CS_VALUE;
6766
2.27k
  *val   = CV_CPROP;
6767
2.27k
  return 0;
6768
2.27k
}
6769
6770
static int
6771
cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
6772
             int* from_raw, int to_raw, CVAL intype, CVAL* type,
6773
             CSTATE* state, ParseEnv* env)
6774
5.06M
{
6775
5.06M
  int r;
6776
6777
5.06M
  switch (*state) {
6778
4.81M
  case CS_VALUE:
6779
4.81M
    if (*type == CV_SB) {
6780
2.90M
      if (*from > 0xff)
6781
0
          return ONIGERR_INVALID_CODE_POINT_VALUE;
6782
6783
2.90M
      BITSET_SET_BIT(cc->bs, (int )(*from));
6784
2.90M
    }
6785
1.91M
    else if (*type == CV_MB) {
6786
1.91M
      r = add_code_range(&(cc->mbuf), env, *from, *from);
6787
1.91M
      if (r < 0) return r;
6788
1.91M
    }
6789
4.81M
    break;
6790
6791
4.81M
  case CS_RANGE:
6792
42.9k
    if (intype == *type) {
6793
33.2k
      if (intype == CV_SB) {
6794
30.9k
        if (*from > 0xff || to > 0xff)
6795
0
          return ONIGERR_INVALID_CODE_POINT_VALUE;
6796
6797
30.9k
        if (*from > to) {
6798
686
          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6799
0
            goto ccs_range_end;
6800
686
          else
6801
686
            return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6802
686
        }
6803
30.2k
        bitset_set_range(cc->bs, (int )*from, (int )to);
6804
30.2k
      }
6805
2.30k
      else {
6806
2.30k
        r = add_code_range(&(cc->mbuf), env, *from, to);
6807
2.30k
        if (r < 0) return r;
6808
2.30k
      }
6809
33.2k
    }
6810
9.62k
    else {
6811
9.62k
      if (*from > to) {
6812
76
        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
6813
0
          goto ccs_range_end;
6814
76
        else
6815
76
          return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
6816
76
      }
6817
9.55k
      bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
6818
9.55k
      r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
6819
9.55k
      if (r < 0) return r;
6820
9.55k
    }
6821
42.1k
  ccs_range_end:
6822
42.1k
    *state = CS_COMPLETE;
6823
42.1k
    break;
6824
6825
35.8k
  case CS_COMPLETE:
6826
200k
  case CS_START:
6827
200k
    *state = CS_VALUE;
6828
200k
    break;
6829
6830
0
  default:
6831
0
    break;
6832
5.06M
  }
6833
6834
5.06M
  *from_raw = to_raw;
6835
5.06M
  *from     = to;
6836
5.06M
  *type     = intype;
6837
5.06M
  return 0;
6838
5.06M
}
6839
6840
static int
6841
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
6842
                 ParseEnv* env)
6843
20.5k
{
6844
20.5k
  int in_esc;
6845
20.5k
  OnigCodePoint code;
6846
20.5k
  OnigEncoding enc = env->enc;
6847
20.5k
  UChar* p = from;
6848
6849
20.5k
  in_esc = 0;
6850
188k
  while (! PEND) {
6851
184k
    if (ignore_escaped && in_esc) {
6852
4.77k
      in_esc = 0;
6853
4.77k
    }
6854
179k
    else {
6855
179k
      PFETCH_S(code);
6856
179k
      if (code == c) return 1;
6857
162k
      if (code == MC_ESC(env->syntax)) in_esc = 1;
6858
162k
    }
6859
184k
  }
6860
3.94k
  return 0;
6861
20.5k
}
6862
6863
static int
6864
prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ParseEnv* env)
6865
175k
{
6866
175k
  int r, neg, len, fetched, and_start;
6867
175k
  OnigCodePoint in_code, curr_code;
6868
175k
  UChar *p;
6869
175k
  Node* node;
6870
175k
  CClassNode *cc, *prev_cc;
6871
175k
  CClassNode work_cc;
6872
175k
  int curr_raw, in_raw;
6873
175k
  CSTATE state;
6874
175k
  CVAL in_type;
6875
175k
  CVAL curr_type;
6876
6877
175k
  *np = NULL_NODE;
6878
175k
  INC_PARSE_DEPTH(env->parse_depth);
6879
6880
175k
  state = CS_START;
6881
175k
  prev_cc = (CClassNode* )NULL;
6882
175k
  r = fetch_token_cc(tok, src, end, env, state);
6883
175k
  if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
6884
2.63k
    neg = 1;
6885
2.63k
    r = fetch_token_cc(tok, src, end, env, state);
6886
2.63k
  }
6887
173k
  else {
6888
173k
    neg = 0;
6889
173k
  }
6890
6891
175k
  if (r < 0) return r;
6892
175k
  if (r == TK_CC_CLOSE) {
6893
20.5k
    if (! code_exist_check((OnigCodePoint )']',
6894
20.5k
                           *src, env->pattern_end, 1, env))
6895
3.94k
      return ONIGERR_EMPTY_CHAR_CLASS;
6896
6897
16.5k
    CC_ESC_WARN(env, (UChar* )"]");
6898
16.5k
    r = tok->type = TK_CHAR;  /* allow []...] */
6899
16.5k
  }
6900
6901
171k
  *np = node = node_new_cclass();
6902
171k
  CHECK_NULL_RETURN_MEMERR(node);
6903
171k
  cc = CCLASS_(node);
6904
6905
171k
  and_start = 0;
6906
171k
  curr_type = CV_UNDEF;
6907
6908
171k
  p = *src;
6909
5.14M
  while (r != TK_CC_CLOSE) {
6910
5.00M
    fetched = 0;
6911
5.00M
    switch (r) {
6912
4.91M
    case TK_CHAR:
6913
4.91M
    any_char_in:
6914
4.91M
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
6915
4.91M
      if (len < 0) {
6916
0
        r = len;
6917
0
        goto err;
6918
0
      }
6919
4.91M
      in_type = (len == 1) ? CV_SB : CV_MB;
6920
4.91M
      in_code = tok->u.code;
6921
4.91M
      in_raw = 0;
6922
4.91M
      goto val_entry2;
6923
0
      break;
6924
6925
1.55k
    case TK_CRUDE_BYTE:
6926
      /* tok->base_num != 0 : octal or hexadec. */
6927
1.55k
      if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base_num != 0) {
6928
1.55k
        int i, j;
6929
1.55k
        UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
6930
1.55k
        UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
6931
1.55k
        UChar* psave = p;
6932
1.55k
        int base_num = tok->base_num;
6933
6934
1.55k
        buf[0] = tok->u.byte;
6935
1.55k
        for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
6936
1.55k
          r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6937
1.55k
          if (r < 0) goto err;
6938
1.55k
          if (r != TK_CRUDE_BYTE || tok->base_num != base_num) {
6939
1.55k
            fetched = 1;
6940
1.55k
            break;
6941
1.55k
          }
6942
0
          buf[i] = tok->u.byte;
6943
0
        }
6944
6945
1.55k
        if (i < ONIGENC_MBC_MINLEN(env->enc)) {
6946
0
          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6947
0
          goto err;
6948
0
        }
6949
6950
        /* clear buf tail */
6951
10.8k
        for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
6952
6953
1.55k
        len = enclen(env->enc, buf);
6954
1.55k
        if (i < len) {
6955
2
          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
6956
2
          goto err;
6957
2
        }
6958
1.54k
        else if (i > len) { /* fetch back */
6959
0
          p = psave;
6960
0
          for (i = 1; i < len; i++) {
6961
0
            r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
6962
0
            if (r < 0) goto err;
6963
0
          }
6964
0
          fetched = 0;
6965
0
        }
6966
6967
1.54k
        if (i == 1) {
6968
1.54k
          in_code = (OnigCodePoint )buf[0];
6969
1.54k
          goto crude_single;
6970
1.54k
        }
6971
0
        else {
6972
0
          if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, buf, buf + len)) {
6973
0
            r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
6974
0
            goto err;
6975
0
          }
6976
6977
0
          in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
6978
0
          in_type = CV_MB;
6979
0
        }
6980
1.54k
      }
6981
0
      else {
6982
0
        in_code = (OnigCodePoint )tok->u.byte;
6983
1.54k
      crude_single:
6984
1.54k
        in_type = CV_SB;
6985
1.54k
      }
6986
1.54k
      in_raw = 1;
6987
1.54k
      goto val_entry2;
6988
0
      break;
6989
6990
726
    case TK_CODE_POINT:
6991
726
      in_code = tok->u.code;
6992
726
      in_raw  = 1;
6993
9.60k
    val_entry:
6994
9.60k
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);
6995
9.60k
      if (len < 0) {
6996
0
        if (state != CS_RANGE ||
6997
0
            ! IS_SYNTAX_BV(env->syntax,
6998
0
                           ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
6999
0
            in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
7000
0
          r = len;
7001
0
          goto err;
7002
0
        }
7003
0
      }
7004
9.60k
      in_type = (len == 1 ? CV_SB : CV_MB);
7005
4.92M
    val_entry2:
7006
4.92M
      r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type,
7007
4.92M
                       &curr_type, &state, env);
7008
4.92M
      if (r != 0) goto err;
7009
4.92M
      break;
7010
7011
4.92M
    case TK_CC_POSIX_BRACKET_OPEN:
7012
1
      r = prs_posix_bracket(cc, &p, end, env);
7013
1
      if (r < 0) goto err;
7014
0
      if (r == 1) {  /* is not POSIX bracket */
7015
0
        CC_ESC_WARN(env, (UChar* )"[");
7016
0
        p = tok->backp;
7017
0
        in_code = tok->u.code;
7018
0
        in_raw = 0;
7019
0
        goto val_entry;
7020
0
      }
7021
0
      goto next_cprop;
7022
0
      break;
7023
7024
2.27k
    case TK_CHAR_TYPE:
7025
2.27k
      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
7026
2.27k
      if (r != 0) goto err;
7027
7028
2.27k
    next_cprop:
7029
2.27k
      r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);
7030
2.27k
      if (r != 0) goto err;
7031
2.27k
      break;
7032
7033
2.27k
    case TK_CHAR_PROPERTY:
7034
0
      {
7035
0
        int ctype = fetch_char_property_to_ctype(&p, end, env);
7036
0
        if (ctype < 0) {
7037
0
          r = ctype;
7038
0
          goto err;
7039
0
        }
7040
0
        r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
7041
0
        if (r != 0) goto err;
7042
0
        goto next_cprop;
7043
0
      }
7044
0
      break;
7045
7046
55.5k
    case TK_CC_RANGE:
7047
55.5k
      if (state == CS_VALUE) {
7048
46.5k
        r = fetch_token_cc(tok, &p, end, env, CS_RANGE);
7049
46.5k
        if (r < 0) goto err;
7050
7051
46.5k
        fetched = 1;
7052
46.5k
        if (r == TK_CC_CLOSE) { /* allow [x-] */
7053
4.35k
        range_end_val:
7054
4.35k
          in_code = (OnigCodePoint )'-';
7055
4.35k
          in_raw = 0;
7056
4.35k
          goto val_entry;
7057
1.11k
        }
7058
45.4k
        else if (r == TK_CC_AND) {
7059
0
          CC_ESC_WARN(env, (UChar* )"-");
7060
0
          goto range_end_val;
7061
0
        }
7062
7063
45.4k
        if (curr_type == CV_CPROP) {
7064
7
          r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7065
7
          goto err;
7066
7
        }
7067
7068
45.4k
        state = CS_RANGE;
7069
45.4k
      }
7070
9.01k
      else if (state == CS_START) {
7071
        /* [-xa] is allowed */
7072
4.56k
        in_code = tok->u.code;
7073
4.56k
        in_raw = 0;
7074
7075
4.56k
        r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7076
4.56k
        if (r < 0) goto err;
7077
7078
4.52k
        fetched = 1;
7079
        /* [--x] or [a&&-x] is warned. */
7080
4.52k
        if (r == TK_CC_RANGE || and_start != 0)
7081
666
          CC_ESC_WARN(env, (UChar* )"-");
7082
7083
4.52k
        goto val_entry;
7084
4.56k
      }
7085
4.44k
      else if (state == CS_RANGE) {
7086
1.20k
        CC_ESC_WARN(env, (UChar* )"-");
7087
1.20k
        goto any_char_in;  /* [!--] is allowed */
7088
1.20k
      }
7089
3.24k
      else { /* CS_COMPLETE */
7090
3.24k
        r = fetch_token_cc(tok, &p, end, env, CS_VALUE);
7091
3.24k
        if (r < 0) goto err;
7092
7093
3.24k
        fetched = 1;
7094
3.24k
        if (r == TK_CC_CLOSE)
7095
62
          goto range_end_val; /* allow [a-b-] */
7096
3.17k
        else if (r == TK_CC_AND) {
7097
0
          CC_ESC_WARN(env, (UChar* )"-");
7098
0
          goto range_end_val;
7099
0
        }
7100
7101
3.17k
        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
7102
3.17k
          CC_ESC_WARN(env, (UChar* )"-");
7103
3.17k
          goto range_end_val;   /* [0-9-a] is allowed as [0-9\-a] */
7104
3.17k
        }
7105
0
        r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
7106
0
        goto err;
7107
3.17k
      }
7108
45.4k
      break;
7109
7110
45.4k
    case TK_CC_OPEN_CC: /* [ */
7111
0
      {
7112
0
        Node *anode;
7113
0
        CClassNode* acc;
7114
7115
0
        if (state == CS_VALUE) {
7116
0
          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7117
0
                           &state, env);
7118
0
          if (r != 0) goto err;
7119
0
        }
7120
0
        state = CS_COMPLETE;
7121
7122
0
        r = prs_cc(&anode, tok, &p, end, env);
7123
0
        if (r != 0) {
7124
0
          onig_node_free(anode);
7125
0
          goto cc_open_err;
7126
0
        }
7127
0
        acc = CCLASS_(anode);
7128
0
        r = or_cclass(cc, acc, env->enc);
7129
0
        onig_node_free(anode);
7130
7131
0
      cc_open_err:
7132
0
        if (r != 0) goto err;
7133
0
      }
7134
0
      break;
7135
7136
0
    case TK_CC_AND: /* && */
7137
0
      {
7138
0
        if (state == CS_VALUE) {
7139
0
          r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7140
0
                           &state, env);
7141
0
          if (r != 0) goto err;
7142
0
        }
7143
        /* initialize local variables */
7144
0
        and_start = 1;
7145
0
        state = CS_START;
7146
7147
0
        if (IS_NOT_NULL(prev_cc)) {
7148
0
          r = and_cclass(prev_cc, cc, env->enc);
7149
0
          if (r != 0) goto err;
7150
0
          bbuf_free(cc->mbuf);
7151
0
        }
7152
0
        else {
7153
0
          prev_cc = cc;
7154
0
          cc = &work_cc;
7155
0
        }
7156
0
        initialize_cclass(cc);
7157
0
      }
7158
0
      break;
7159
7160
29.4k
    case TK_EOT:
7161
29.4k
      r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
7162
29.4k
      goto err;
7163
0
      break;
7164
0
    default:
7165
0
      r = ONIGERR_PARSER_BUG;
7166
0
      goto err;
7167
0
      break;
7168
5.00M
    }
7169
7170
4.97M
    if (fetched)
7171
55.8k
      r = tok->type;
7172
4.91M
    else {
7173
4.91M
      r = fetch_token_cc(tok, &p, end, env, state);
7174
4.91M
      if (r < 0) goto err;
7175
4.91M
    }
7176
4.97M
  }
7177
7178
141k
  if (state == CS_VALUE) {
7179
135k
    r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
7180
135k
                     &state, env);
7181
135k
    if (r != 0) goto err;
7182
135k
  }
7183
7184
141k
  if (IS_NOT_NULL(prev_cc)) {
7185
0
    r = and_cclass(prev_cc, cc, env->enc);
7186
0
    if (r != 0) goto err;
7187
0
    bbuf_free(cc->mbuf);
7188
0
    cc = prev_cc;
7189
0
  }
7190
7191
141k
  if (neg != 0)
7192
141k
    NCCLASS_SET_NOT(cc);
7193
139k
  else
7194
141k
    NCCLASS_CLEAR_NOT(cc);
7195
141k
  if (IS_NCCLASS_NOT(cc) &&
7196
141k
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
7197
0
    int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
7198
0
    if (is_empty != 0)
7199
0
      BITSET_IS_EMPTY(cc->bs, is_empty);
7200
7201
0
    if (is_empty == 0) {
7202
0
      if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
7203
0
        if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
7204
0
          BITSET_SET_BIT(cc->bs, NEWLINE_CODE);
7205
0
        else
7206
0
          add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
7207
0
      }
7208
0
    }
7209
0
  }
7210
141k
  *src = p;
7211
141k
  DEC_PARSE_DEPTH(env->parse_depth);
7212
141k
  return 0;
7213
7214
30.3k
 err:
7215
30.3k
  if (cc != CCLASS_(*np))
7216
0
    bbuf_free(cc->mbuf);
7217
30.3k
  return r;
7218
141k
}
7219
7220
static int prs_alts(Node** top, PToken* tok, int term,
7221
                    UChar** src, UChar* end, ParseEnv* env, int group_head);
7222
7223
#ifdef USE_CALLOUT
7224
7225
/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
7226
static int
7227
prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end,
7228
                        ParseEnv* env)
7229
88
{
7230
88
  int r;
7231
88
  int i;
7232
88
  int in;
7233
88
  int num;
7234
88
  OnigCodePoint c;
7235
88
  UChar* code_start;
7236
88
  UChar* code_end;
7237
88
  UChar* contents;
7238
88
  UChar* tag_start;
7239
88
  UChar* tag_end;
7240
88
  int brace_nest;
7241
88
  CalloutListEntry* e;
7242
88
  RegexExt* ext;
7243
88
  OnigEncoding enc = env->enc;
7244
88
  UChar* p = *src;
7245
7246
88
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7247
7248
72
  brace_nest = 0;
7249
436
  while (PPEEK_IS('{')) {
7250
389
    brace_nest++;
7251
389
    PINC_S;
7252
389
    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7253
389
  }
7254
7255
47
  in = ONIG_CALLOUT_IN_PROGRESS;
7256
47
  code_start = p;
7257
59.5k
  while (1) {
7258
59.5k
    if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7259
7260
59.5k
    code_end = p;
7261
59.5k
    PFETCH_S(c);
7262
59.5k
    if (c == '}') {
7263
599
      i = brace_nest;
7264
754
      while (i > 0) {
7265
746
        if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7266
746
        PFETCH_S(c);
7267
746
        if (c == '}') i--;
7268
591
        else break;
7269
746
      }
7270
599
      if (i == 0) break;
7271
599
    }
7272
59.5k
  }
7273
7274
8
  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7275
7276
8
  PFETCH_S(c);
7277
8
  if (c == '[') {
7278
0
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7279
0
    tag_end = tag_start = p;
7280
0
    while (! PEND) {
7281
0
      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7282
0
      tag_end = p;
7283
0
      PFETCH_S(c);
7284
0
      if (c == ']') break;
7285
0
    }
7286
0
    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7287
0
      return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7288
7289
0
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7290
0
    PFETCH_S(c);
7291
0
  }
7292
8
  else {
7293
8
    tag_start = tag_end = 0;
7294
8
  }
7295
7296
8
  if (c == 'X') {
7297
0
    in |= ONIG_CALLOUT_IN_RETRACTION;
7298
0
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7299
0
    PFETCH_S(c);
7300
0
  }
7301
8
  else if (c == '<') {
7302
0
    in = ONIG_CALLOUT_IN_RETRACTION;
7303
0
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7304
0
    PFETCH_S(c);
7305
0
  }
7306
8
  else if (c == '>') { /* no needs (default) */
7307
0
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7308
0
    PFETCH_S(c);
7309
0
  }
7310
7311
8
  if (c != cterm)
7312
8
    return ONIGERR_INVALID_CALLOUT_PATTERN;
7313
7314
0
  r = reg_callout_list_entry(env, &num);
7315
0
  if (r != 0) return r;
7316
7317
0
  ext = onig_get_regex_ext(env->reg);
7318
0
  CHECK_NULL_RETURN_MEMERR(ext);
7319
0
  if (IS_NULL(ext->pattern)) {
7320
0
    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7321
0
    if (r != ONIG_NORMAL) return r;
7322
0
  }
7323
7324
0
  if (tag_start != tag_end) {
7325
0
    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7326
0
    if (r != ONIG_NORMAL) return r;
7327
0
  }
7328
7329
0
  contents = onigenc_strdup(enc, code_start, code_end);
7330
0
  CHECK_NULL_RETURN_MEMERR(contents);
7331
7332
0
  e = onig_reg_callout_list_at(env->reg, num);
7333
0
  if (IS_NULL(e)) {
7334
0
    xfree(contents);
7335
0
    return ONIGERR_MEMORY;
7336
0
  }
7337
7338
0
  r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
7339
0
  if (r != 0) {
7340
0
    xfree(contents);
7341
0
    return r;
7342
0
  }
7343
7344
0
  e->of      = ONIG_CALLOUT_OF_CONTENTS;
7345
0
  e->in      = in;
7346
0
  e->name_id = ONIG_NON_NAME_ID;
7347
0
  e->u.content.start = contents;
7348
0
  e->u.content.end   = contents + (code_end - code_start);
7349
7350
0
  *src = p;
7351
0
  return 0;
7352
0
}
7353
7354
static long
7355
prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
7356
0
{
7357
0
  long v;
7358
0
  long d;
7359
0
  int flag;
7360
0
  UChar* p;
7361
0
  OnigCodePoint c;
7362
7363
0
  if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG;
7364
7365
0
  flag = 1;
7366
0
  v = 0;
7367
0
  p = s;
7368
0
  while (p < end) {
7369
0
    c = ONIGENC_MBC_TO_CODE(enc, p, end);
7370
0
    p += ONIGENC_MBC_ENC_LEN(enc, p);
7371
0
    if (c >= '0' && c <= '9') {
7372
0
      d = (long )(c - '0');
7373
0
      if (v > (max - d) / 10)
7374
0
        return ONIGERR_INVALID_CALLOUT_ARG;
7375
7376
0
      v = v * 10 + d;
7377
0
    }
7378
0
    else if (sign_on != 0 && (c == '-' || c == '+')) {
7379
0
      if (c == '-') flag = -1;
7380
0
    }
7381
0
    else
7382
0
      return ONIGERR_INVALID_CALLOUT_ARG;
7383
7384
0
    sign_on = 0;
7385
0
  }
7386
7387
0
  *rl = flag * v;
7388
0
  return ONIG_NORMAL;
7389
0
}
7390
7391
static void
7392
clear_callout_args(int n, unsigned int types[], OnigValue vals[])
7393
0
{
7394
0
  int i;
7395
7396
0
  for (i = 0; i < n; i++) {
7397
0
    switch (types[i]) {
7398
0
    case ONIG_TYPE_STRING:
7399
0
      if (IS_NOT_NULL(vals[i].s.start))
7400
0
        xfree(vals[i].s.start);
7401
0
      break;
7402
0
    default:
7403
0
      break;
7404
0
    }
7405
0
  }
7406
0
}
7407
7408
static int
7409
prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
7410
                 int max_arg_num, unsigned int types[], OnigValue vals[],
7411
                 ParseEnv* env)
7412
385
{
7413
385
#define MAX_CALLOUT_ARG_BYTE_LENGTH   128
7414
7415
385
  int r;
7416
385
  int n;
7417
385
  int esc;
7418
385
  int cn;
7419
385
  UChar* s;
7420
385
  UChar* e;
7421
385
  UChar* eesc;
7422
385
  OnigCodePoint c;
7423
385
  UChar* bufend;
7424
385
  UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH];
7425
385
  OnigEncoding enc = env->enc;
7426
385
  UChar* p = *src;
7427
7428
385
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7429
7430
385
  c = 0;
7431
385
  n = 0;
7432
810
  while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
7433
768
    cn  = 0;
7434
768
    esc = 0;
7435
768
    eesc = 0;
7436
768
    bufend = buf;
7437
768
    s = e = p;
7438
1.03M
    while (1) {
7439
1.03M
      if (PEND) {
7440
327
        r = ONIGERR_INVALID_CALLOUT_PATTERN;
7441
327
        goto err_clear;
7442
327
      }
7443
7444
1.03M
      e = p;
7445
1.03M
      PFETCH_S(c);
7446
1.03M
      if (esc != 0) {
7447
8.24k
        esc = 0;
7448
8.24k
        if (c == '\\' || c == cterm || c == ',') {
7449
          /* */
7450
5.59k
        }
7451
2.65k
        else {
7452
2.65k
          e = eesc;
7453
2.65k
          cn++;
7454
2.65k
        }
7455
8.24k
        goto add_char;
7456
8.24k
      }
7457
1.02M
      else {
7458
1.02M
        if (c == '\\') {
7459
8.24k
          esc = 1;
7460
8.24k
          eesc = e;
7461
8.24k
        }
7462
1.01M
        else if (c == cterm || c == ',')
7463
441
          break;
7464
1.01M
        else {
7465
1.01M
          size_t clen;
7466
7467
1.02M
        add_char:
7468
1.02M
          if (skip_mode == FALSE) {
7469
0
            clen = p - e;
7470
0
            if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) {
7471
0
              r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
7472
0
              goto err_clear;
7473
0
            }
7474
7475
0
            xmemcpy(bufend, e, clen);
7476
0
            bufend += clen;
7477
0
          }
7478
1.02M
          cn++;
7479
1.02M
        }
7480
1.02M
      }
7481
1.03M
    }
7482
7483
441
    if (cn != 0) {
7484
352
      if (max_arg_num >= 0 && n >= max_arg_num) {
7485
0
        r = ONIGERR_INVALID_CALLOUT_ARG;
7486
0
        goto err_clear;
7487
0
      }
7488
7489
352
      if (skip_mode == FALSE) {
7490
0
        if ((types[n] & ONIG_TYPE_LONG) != 0) {
7491
0
          int fixed = 0;
7492
0
          if (cn > 0) {
7493
0
            long rl;
7494
0
            r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);
7495
0
            if (r == ONIG_NORMAL) {
7496
0
              vals[n].l = rl;
7497
0
              fixed = 1;
7498
0
              types[n] = ONIG_TYPE_LONG;
7499
0
            }
7500
0
          }
7501
7502
0
          if (fixed == 0) {
7503
0
            types[n] = (types[n] & ~ONIG_TYPE_LONG);
7504
0
            if (types[n] == ONIG_TYPE_VOID) {
7505
0
              r = ONIGERR_INVALID_CALLOUT_ARG;
7506
0
              goto err_clear;
7507
0
            }
7508
0
          }
7509
0
        }
7510
7511
0
        switch (types[n]) {
7512
0
        case ONIG_TYPE_LONG:
7513
0
          break;
7514
7515
0
        case ONIG_TYPE_CHAR:
7516
0
          if (cn != 1) {
7517
0
            r = ONIGERR_INVALID_CALLOUT_ARG;
7518
0
            goto err_clear;
7519
0
          }
7520
0
          vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
7521
0
          break;
7522
7523
0
        case ONIG_TYPE_STRING:
7524
0
          {
7525
0
            UChar* rs = onigenc_strdup(enc, buf, bufend);
7526
0
            if (IS_NULL(rs)) {
7527
0
              r = ONIGERR_MEMORY; goto err_clear;
7528
0
            }
7529
0
            vals[n].s.start = rs;
7530
0
            vals[n].s.end   = rs + (e - s);
7531
0
          }
7532
0
          break;
7533
7534
0
        case ONIG_TYPE_TAG:
7535
0
          if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) {
7536
0
            r = ONIGERR_INVALID_CALLOUT_TAG_NAME;
7537
0
            goto err_clear;
7538
0
          }
7539
7540
0
          vals[n].s.start = s;
7541
0
          vals[n].s.end   = e;
7542
0
          break;
7543
7544
0
        case ONIG_TYPE_VOID:
7545
0
        case ONIG_TYPE_POINTER:
7546
0
          r = ONIGERR_PARSER_BUG;
7547
0
          goto err_clear;
7548
0
          break;
7549
0
        }
7550
0
      }
7551
7552
352
      n++;
7553
352
    }
7554
7555
441
    if (c == cterm) break;
7556
441
  }
7557
7558
58
  if (c != cterm) {
7559
42
    r = ONIGERR_INVALID_CALLOUT_PATTERN;
7560
42
    goto err_clear;
7561
42
  }
7562
7563
16
  *src = p;
7564
16
  return n;
7565
7566
369
 err_clear:
7567
369
  if (skip_mode == FALSE)
7568
0
    clear_callout_args(n, types, vals);
7569
369
  return r;
7570
58
}
7571
7572
/* (*name[TAG]) (*name[TAG]{a,b,..}) */
7573
static int
7574
prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end,
7575
                    ParseEnv* env)
7576
903
{
7577
903
  int r;
7578
903
  int i;
7579
903
  int in;
7580
903
  int num;
7581
903
  int name_id;
7582
903
  int arg_num;
7583
903
  int max_arg_num;
7584
903
  int opt_arg_num;
7585
903
  int is_not_single;
7586
903
  OnigCodePoint c;
7587
903
  UChar* name_start;
7588
903
  UChar* name_end;
7589
903
  UChar* tag_start;
7590
903
  UChar* tag_end;
7591
903
  Node*  node;
7592
903
  CalloutListEntry* e;
7593
903
  RegexExt* ext;
7594
903
  unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM];
7595
903
  OnigValue    vals[ONIG_CALLOUT_MAX_ARGS_NUM];
7596
903
  OnigEncoding enc = env->enc;
7597
903
  UChar* p = *src;
7598
7599
  /* PFETCH_READY; */
7600
903
  if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
7601
7602
817
  node = 0;
7603
817
  name_start = p;
7604
8.63k
  while (1) {
7605
8.63k
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7606
8.40k
    name_end = p;
7607
8.40k
    PFETCH_S(c);
7608
8.40k
    if (c == cterm || c == '[' || c == '{') break;
7609
8.40k
  }
7610
7611
591
  if (! is_allowed_callout_name(enc, name_start, name_end))
7612
104
    return ONIGERR_INVALID_CALLOUT_NAME;
7613
7614
487
  if (c == '[') {
7615
27
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7616
27
    tag_end = tag_start = p;
7617
4.11k
    while (! PEND) {
7618
4.10k
      if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7619
4.10k
      tag_end = p;
7620
4.10k
      PFETCH_S(c);
7621
4.10k
      if (c == ']') break;
7622
4.10k
    }
7623
27
    if (! is_allowed_callout_tag_name(enc, tag_start, tag_end))
7624
25
      return ONIGERR_INVALID_CALLOUT_TAG_NAME;
7625
7626
2
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7627
1
    PFETCH_S(c);
7628
1
  }
7629
460
  else {
7630
460
    tag_start = tag_end = 0;
7631
460
  }
7632
7633
461
  if (c == '{') {
7634
436
    UChar* save;
7635
7636
436
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7637
7638
    /* read for single check only */
7639
385
    save = p;
7640
385
    arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
7641
385
    if (arg_num < 0) return arg_num;
7642
7643
16
    is_not_single = PPEEK_IS(cterm) ?  0 : 1;
7644
16
    p = save;
7645
16
    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7646
16
                                    &name_id);
7647
16
    if (r != ONIG_NORMAL) return r;
7648
7649
0
    max_arg_num = get_callout_arg_num_by_name_id(name_id);
7650
0
    for (i = 0; i < max_arg_num; i++) {
7651
0
      types[i] = get_callout_arg_type_by_name_id(name_id, i);
7652
0
    }
7653
7654
0
    arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
7655
0
    if (arg_num < 0) return arg_num;
7656
7657
0
    if (PEND) {
7658
0
      r = ONIGERR_END_PATTERN_IN_GROUP;
7659
0
      goto err_clear;
7660
0
    }
7661
0
    PFETCH_S(c);
7662
0
  }
7663
25
  else {
7664
25
    arg_num = 0;
7665
7666
25
    is_not_single = 0;
7667
25
    r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end,
7668
25
                                      &name_id);
7669
25
    if (r != ONIG_NORMAL) return r;
7670
7671
0
    max_arg_num = get_callout_arg_num_by_name_id(name_id);
7672
0
    for (i = 0; i < max_arg_num; i++) {
7673
0
      types[i] = get_callout_arg_type_by_name_id(name_id, i);
7674
0
    }
7675
0
  }
7676
7677
0
  in = onig_get_callout_in_by_name_id(name_id);
7678
0
  opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
7679
0
  if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) {
7680
0
    r = ONIGERR_INVALID_CALLOUT_ARG;
7681
0
    goto err_clear;
7682
0
  }
7683
7684
0
  if (c != cterm) {
7685
0
    r = ONIGERR_INVALID_CALLOUT_PATTERN;
7686
0
    goto err_clear;
7687
0
  }
7688
7689
0
  r = reg_callout_list_entry(env, &num);
7690
0
  if (r != 0) goto err_clear;
7691
7692
0
  ext = onig_get_regex_ext(env->reg);
7693
0
  if (IS_NULL(ext)) {
7694
0
    r = ONIGERR_MEMORY; goto err_clear;
7695
0
  }
7696
0
  if (IS_NULL(ext->pattern)) {
7697
0
    r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
7698
0
    if (r != ONIG_NORMAL) goto err_clear;
7699
0
  }
7700
7701
0
  if (tag_start != tag_end) {
7702
0
    r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
7703
0
    if (r != ONIG_NORMAL) goto err_clear;
7704
0
  }
7705
7706
0
  e = onig_reg_callout_list_at(env->reg, num);
7707
0
  if (IS_NULL(e)) {
7708
0
    r = ONIGERR_MEMORY; goto err_clear;
7709
0
  }
7710
7711
0
  r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
7712
0
  if (r != ONIG_NORMAL) goto err_clear;
7713
7714
0
  e->of         = ONIG_CALLOUT_OF_NAME;
7715
0
  e->in         = in;
7716
0
  e->name_id    = name_id;
7717
0
  e->type       = onig_get_callout_type_by_name_id(name_id);
7718
0
  e->start_func = onig_get_callout_start_func_by_name_id(name_id);
7719
0
  e->end_func   = onig_get_callout_end_func_by_name_id(name_id);
7720
0
  e->u.arg.num        = max_arg_num;
7721
0
  e->u.arg.passed_num = arg_num;
7722
0
  for (i = 0; i < max_arg_num; i++) {
7723
0
    e->u.arg.types[i] = types[i];
7724
0
    if (i < arg_num)
7725
0
      e->u.arg.vals[i] = vals[i];
7726
0
    else
7727
0
      e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i);
7728
0
  }
7729
7730
0
  *np = node;
7731
0
  *src = p;
7732
0
  return 0;
7733
7734
0
 err_clear:
7735
0
  clear_callout_args(arg_num, types, vals);
7736
0
  return r;
7737
0
}
7738
#endif
7739
7740
#ifdef USE_WHOLE_OPTIONS
7741
static int
7742
set_whole_options(OnigOptionType option, ParseEnv* env)
7743
0
{
7744
0
  if ((env->flags & PE_FLAG_HAS_WHOLE_OPTIONS) != 0)
7745
0
    return ONIGERR_INVALID_GROUP_OPTION;
7746
7747
0
  env->flags |= PE_FLAG_HAS_WHOLE_OPTIONS;
7748
7749
0
  if (OPTON_DONT_CAPTURE_GROUP(option)) {
7750
0
    env->reg->options |= ONIG_OPTION_DONT_CAPTURE_GROUP;
7751
0
    if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
7752
0
      return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
7753
0
  }
7754
7755
0
  if ((option & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) {
7756
0
    env->reg->case_fold_flag &=
7757
0
      ~(INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR |
7758
0
        ONIGENC_CASE_FOLD_TURKISH_AZERI);
7759
0
    env->reg->case_fold_flag |= ONIGENC_CASE_FOLD_ASCII_ONLY;
7760
0
    env->reg->options |= ONIG_OPTION_IGNORECASE_IS_ASCII;
7761
0
  }
7762
7763
0
  if (OPTON_FIND_LONGEST(option)) {
7764
0
    env->reg->options |= ONIG_OPTION_FIND_LONGEST;
7765
0
  }
7766
7767
0
  return 0;
7768
0
}
7769
#endif
7770
7771
static int
7772
prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
7773
        ParseEnv* env)
7774
1.78M
{
7775
1.78M
  int r, num;
7776
1.78M
  Node *target;
7777
1.78M
  OnigOptionType option;
7778
1.78M
  OnigCodePoint c;
7779
1.78M
  int list_capture;
7780
1.78M
  OnigEncoding enc;
7781
1.78M
  UChar* p;
7782
1.78M
  PFETCH_READY;
7783
7784
1.78M
  p = *src;
7785
1.78M
  enc = env->enc;
7786
1.78M
  *np = NULL;
7787
1.78M
  if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7788
7789
1.76M
  option = env->options;
7790
1.76M
  c = PPEEK;
7791
1.76M
  if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
7792
62.6k
    PINC;
7793
62.6k
    if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7794
7795
60.9k
    PFETCH(c);
7796
60.9k
    switch (c) {
7797
280
    case ':':   /* (?:...) grouping only */
7798
280
    group:
7799
280
      r = fetch_token(tok, &p, end, env);
7800
280
      if (r < 0) return r;
7801
280
      r = prs_alts(np, tok, term, &p, end, env, FALSE);
7802
280
      if (r < 0) return r;
7803
228
      *src = p;
7804
228
      return 1; /* group */
7805
0
      break;
7806
7807
1.65k
    case '=':
7808
1.65k
      *np = node_new_anchor(ANCR_PREC_READ);
7809
1.65k
      break;
7810
389
    case '!':  /*         preceding read */
7811
389
      *np = node_new_anchor(ANCR_PREC_READ_NOT);
7812
389
      break;
7813
64
    case '>':            /* (?>...) stop backtrack */
7814
64
      *np = node_new_bag(BAG_STOP_BACKTRACK);
7815
64
      break;
7816
7817
3.01k
    case '\'':
7818
3.01k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7819
3.01k
        goto named_group1;
7820
3.01k
      }
7821
0
      else
7822
0
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7823
0
      break;
7824
7825
203
    case '<':   /* look behind (?<=...), (?<!...) */
7826
203
      if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
7827
179
      PFETCH(c);
7828
179
      if (c == '=')
7829
6
        *np = node_new_anchor(ANCR_LOOK_BEHIND);
7830
173
      else if (c == '!')
7831
0
        *np = node_new_anchor(ANCR_LOOK_BEHIND_NOT);
7832
173
      else {
7833
173
        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
7834
173
          UChar *name;
7835
173
          UChar *name_end;
7836
173
          enum REF_NUM num_type;
7837
7838
173
          PUNFETCH;
7839
173
          c = '<';
7840
7841
3.18k
        named_group1:
7842
3.18k
          list_capture = 0;
7843
7844
3.18k
#ifdef USE_CAPTURE_HISTORY
7845
3.18k
        named_group2:
7846
3.18k
#endif
7847
3.18k
          name = p;
7848
3.18k
          r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
7849
3.18k
                         &num_type, FALSE);
7850
3.18k
          if (r < 0) return r;
7851
7852
2.78k
          num = scan_env_add_mem_entry(env);
7853
2.78k
          if (num < 0) return num;
7854
2.78k
          if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
7855
0
            return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
7856
7857
2.78k
          r = name_add(env->reg, name, name_end, num, env);
7858
2.78k
          if (r != 0) return r;
7859
2.78k
          *np = node_new_memory(1);
7860
2.78k
          CHECK_NULL_RETURN_MEMERR(*np);
7861
2.78k
          BAG_(*np)->m.regnum = num;
7862
2.78k
          if (list_capture != 0)
7863
0
            MEM_STATUS_ON_SIMPLE(env->cap_history, num);
7864
2.78k
          env->num_named++;
7865
2.78k
        }
7866
0
        else {
7867
0
          return ONIGERR_UNDEFINED_GROUP_OPTION;
7868
0
        }
7869
173
      }
7870
2.79k
      break;
7871
7872
9.24k
    case '~':
7873
9.24k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) {
7874
9.24k
        Node* absent;
7875
9.24k
        Node* expr;
7876
9.24k
        int head_bar;
7877
9.24k
        int is_range_cutter;
7878
7879
9.24k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7880
7881
9.17k
        if (PPEEK_IS('|')) { /* (?~|generator|absent) */
7882
44
          PINC;
7883
44
          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7884
7885
44
          head_bar = 1;
7886
44
          if (PPEEK_IS(')')) { /* (?~|)  : range clear */
7887
0
            PINC;
7888
0
            r = make_range_clear(np, env);
7889
0
            if (r != 0) return r;
7890
0
            env->flags |= PE_FLAG_HAS_ABSENT_STOPPER;
7891
0
            goto end;
7892
0
          }
7893
44
        }
7894
9.13k
        else
7895
9.13k
          head_bar = 0;
7896
7897
9.17k
        r = fetch_token(tok, &p, end, env);
7898
9.17k
        if (r < 0) return r;
7899
9.17k
        r = prs_alts(&absent, tok, term, &p, end, env, TRUE);
7900
9.17k
        if (r < 0) {
7901
6.69k
          onig_node_free(absent);
7902
6.69k
          return r;
7903
6.69k
        }
7904
7905
2.48k
        expr = NULL_NODE;
7906
2.48k
        is_range_cutter = 0;
7907
2.48k
        if (head_bar != 0) {
7908
43
          Node* top = absent;
7909
43
          if (ND_TYPE(top) != ND_ALT || IS_NULL(ND_CDR(top))) {
7910
39
            expr = NULL_NODE;
7911
39
            is_range_cutter = 1;
7912
39
            env->flags |= PE_FLAG_HAS_ABSENT_STOPPER;
7913
39
          }
7914
4
          else {
7915
4
            absent = ND_CAR(top);
7916
4
            expr   = ND_CDR(top);
7917
4
            ND_CAR(top) = NULL_NODE;
7918
4
            ND_CDR(top) = NULL_NODE;
7919
4
            onig_node_free(top);
7920
4
            if (IS_NULL(ND_CDR(expr))) {
7921
1
              top = expr;
7922
1
              expr = ND_CAR(top);
7923
1
              ND_CAR(top) = NULL_NODE;
7924
1
              onig_node_free(top);
7925
1
            }
7926
4
          }
7927
43
        }
7928
7929
2.48k
        r = make_absent_tree(np, absent, expr, is_range_cutter, env);
7930
2.48k
        if (r != 0) {
7931
0
          return r;
7932
0
        }
7933
2.48k
        goto end;
7934
2.48k
      }
7935
0
      else {
7936
0
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7937
0
      }
7938
0
      break;
7939
7940
0
#ifdef USE_CALLOUT
7941
88
    case '{':
7942
88
      if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
7943
0
        return ONIGERR_UNDEFINED_GROUP_OPTION;
7944
7945
88
      r = prs_callout_of_contents(np, ')', &p, end, env);
7946
88
      if (r != 0) return r;
7947
7948
0
      goto end;
7949
0
      break;
7950
0
#endif
7951
7952
31.3k
    case '(':
7953
      /* (?()...) */
7954
31.3k
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) {
7955
31.3k
        UChar *prev;
7956
31.3k
        Node* condition;
7957
31.3k
        int condition_is_checker;
7958
7959
31.3k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7960
30.7k
        PFETCH(c);
7961
30.7k
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
7962
7963
30.2k
        if (IS_CODE_DIGIT_ASCII(enc, c)
7964
30.2k
            || c == '-' || c == '+' || c == '<' || c == '\'') {
7965
12.3k
#ifdef USE_BACKREF_WITH_LEVEL
7966
12.3k
          int exist_level;
7967
12.3k
          int level;
7968
12.3k
#endif
7969
12.3k
          UChar* name_end;
7970
12.3k
          int back_num;
7971
12.3k
          enum REF_NUM num_type;
7972
12.3k
          int is_enclosed;
7973
7974
12.3k
          is_enclosed = (c == '<' || c == '\'') ? 1 : 0;
7975
12.3k
          if (! is_enclosed)
7976
12.3k
            PUNFETCH;
7977
12.3k
          prev = p;
7978
12.3k
#ifdef USE_BACKREF_WITH_LEVEL
7979
12.3k
          exist_level = 0;
7980
12.3k
          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
7981
12.3k
          r = fetch_name_with_level(
7982
12.3k
                    (OnigCodePoint )(is_enclosed != 0 ? c : '('),
7983
12.3k
                    &p, end, &name_end,
7984
12.3k
                    env, &back_num, &level, &num_type);
7985
12.3k
          if (r == 1) exist_level = 1;
7986
#else
7987
          r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
7988
                         &p, end, &name_end, env, &back_num, &num_type, TRUE);
7989
#endif
7990
12.3k
          if (r < 0) {
7991
9.19k
            if (is_enclosed == 0) {
7992
9.16k
              goto any_condition;
7993
9.16k
            }
7994
27
            else
7995
27
              return r;
7996
9.19k
          }
7997
7998
3.17k
          condition_is_checker = 1;
7999
3.17k
          if (num_type != IS_NOT_NUM) {
8000
3.17k
            if (num_type == IS_REL_NUM) {
8001
1
              back_num = backref_rel_to_abs(back_num, env);
8002
1
            }
8003
3.17k
            if (back_num <= 0)
8004
6
              return ONIGERR_INVALID_BACKREF;
8005
8006
3.17k
            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
8007
0
              if (back_num > env->num_mem ||
8008
0
                  IS_NULL(PARSEENV_MEMENV(env)[back_num].mem_node))
8009
0
                return ONIGERR_INVALID_BACKREF;
8010
0
            }
8011
8012
3.17k
            condition = node_new_backref_checker(1, &back_num, FALSE,
8013
3.17k
#ifdef USE_BACKREF_WITH_LEVEL
8014
3.17k
                                                 exist_level, level,
8015
3.17k
#endif
8016
3.17k
                                                 env);
8017
3.17k
          }
8018
0
          else {
8019
0
            int num;
8020
0
            int* backs;
8021
8022
0
            num = name_to_group_numbers(env, prev, name_end, &backs);
8023
0
            if (num <= 0) {
8024
0
              return ONIGERR_UNDEFINED_NAME_REFERENCE;
8025
0
            }
8026
0
            if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
8027
0
              int i;
8028
0
              for (i = 0; i < num; i++) {
8029
0
                if (backs[i] > env->num_mem ||
8030
0
                    IS_NULL(PARSEENV_MEMENV(env)[backs[i]].mem_node))
8031
0
                  return ONIGERR_INVALID_BACKREF;
8032
0
              }
8033
0
            }
8034
8035
0
            condition = node_new_backref_checker(num, backs, TRUE,
8036
0
#ifdef USE_BACKREF_WITH_LEVEL
8037
0
                                                 exist_level, level,
8038
0
#endif
8039
0
                                                 env);
8040
0
          }
8041
8042
3.17k
          if (is_enclosed != 0) {
8043
0
            if (PEND) goto err_if_else;
8044
0
            PFETCH(c);
8045
0
            if (c != ')') goto err_if_else;
8046
0
          }
8047
3.17k
        }
8048
17.8k
#ifdef USE_CALLOUT
8049
17.8k
        else if (c == '?') {
8050
10
          if (IS_SYNTAX_OP2(env->syntax,
8051
10
                            ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) {
8052
10
            if (! PEND && PPEEK_IS('{')) {
8053
              /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
8054
0
              condition_is_checker = 0;
8055
0
              PFETCH(c);
8056
0
              r = prs_callout_of_contents(&condition, ')', &p, end, env);
8057
0
              if (r != 0) return r;
8058
0
              goto end_condition;
8059
0
            }
8060
10
          }
8061
10
          goto any_condition;
8062
10
        }
8063
17.8k
        else if (c == '*' &&
8064
17.8k
                 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8065
8
          condition_is_checker = 0;
8066
8
          r = prs_callout_of_name(&condition, ')', &p, end, env);
8067
8
          if (r != 0) return r;
8068
0
          goto end_condition;
8069
8
        }
8070
17.8k
#endif
8071
17.8k
        else {
8072
27.0k
        any_condition:
8073
27.0k
          PUNFETCH;
8074
27.0k
          condition_is_checker = 0;
8075
27.0k
          r = fetch_token(tok, &p, end, env);
8076
27.0k
          if (r < 0) return r;
8077
27.0k
          r = prs_alts(&condition, tok, term, &p, end, env, FALSE);
8078
27.0k
          if (r < 0) {
8079
24.8k
            onig_node_free(condition);
8080
24.8k
            return r;
8081
24.8k
          }
8082
27.0k
        }
8083
8084
5.37k
#ifdef USE_CALLOUT
8085
5.37k
      end_condition:
8086
5.37k
#endif
8087
5.37k
        CHECK_NULL_RETURN_MEMERR(condition);
8088
8089
5.37k
        if (PEND) {
8090
66
        err_if_else:
8091
66
          onig_node_free(condition);
8092
66
          return ONIGERR_END_PATTERN_IN_GROUP;
8093
66
        }
8094
8095
5.30k
        if (PPEEK_IS(')')) { /* case: empty body: make backref checker */
8096
49
          if (condition_is_checker == 0) {
8097
3
            onig_node_free(condition);
8098
3
            return ONIGERR_INVALID_IF_ELSE_SYNTAX;
8099
3
          }
8100
46
          PFETCH(c);
8101
46
          *np = condition;
8102
46
        }
8103
5.25k
        else { /* if-else */
8104
5.25k
          int then_is_empty;
8105
5.25k
          Node *Then, *Else;
8106
8107
5.25k
          Then = 0;
8108
5.25k
          if (PPEEK_IS('|')) {
8109
48
            PFETCH(c);
8110
48
            then_is_empty = 1;
8111
48
          }
8112
5.21k
          else
8113
5.21k
            then_is_empty = 0;
8114
8115
5.25k
          r = fetch_token(tok, &p, end, env);
8116
5.25k
          if (r < 0) {
8117
0
            onig_node_free(condition);
8118
0
            return r;
8119
0
          }
8120
5.25k
          r = prs_alts(&target, tok, term, &p, end, env, TRUE);
8121
5.25k
          if (r < 0) {
8122
2.77k
            onig_node_free(condition);
8123
2.77k
            onig_node_free(target);
8124
2.77k
            return r;
8125
2.77k
          }
8126
8127
2.48k
          if (then_is_empty != 0) {
8128
19
            Else = target;
8129
19
          }
8130
2.46k
          else {
8131
2.46k
            if (ND_TYPE(target) == ND_ALT) {
8132
960
              Then = ND_CAR(target);
8133
960
              if (ND_CDR(ND_CDR(target)) == NULL_NODE) {
8134
464
                Else = ND_CAR(ND_CDR(target));
8135
464
                cons_node_free_alone(ND_CDR(target));
8136
464
              }
8137
496
              else {
8138
496
                Else = ND_CDR(target);
8139
496
              }
8140
960
              cons_node_free_alone(target);
8141
960
            }
8142
1.50k
            else {
8143
1.50k
              Then = target;
8144
1.50k
              Else = 0;
8145
1.50k
            }
8146
2.46k
          }
8147
8148
2.48k
          *np = node_new_bag_if_else(condition, Then, Else);
8149
2.48k
          if (IS_NULL(*np)) {
8150
0
            onig_node_free(condition);
8151
0
            onig_node_free(Then);
8152
0
            onig_node_free(Else);
8153
0
            return ONIGERR_MEMORY;
8154
0
          }
8155
2.48k
        }
8156
2.53k
        goto end;
8157
5.30k
      }
8158
0
      else {
8159
0
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8160
0
      }
8161
0
      break;
8162
8163
0
#ifdef USE_CAPTURE_HISTORY
8164
59
    case '@':
8165
59
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
8166
0
        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
8167
0
          PFETCH(c);
8168
0
          if (c == '<' || c == '\'') {
8169
0
            list_capture = 1;
8170
0
            goto named_group2; /* (?@<name>...) */
8171
0
          }
8172
0
          PUNFETCH;
8173
0
        }
8174
8175
0
        *np = node_new_memory(0);
8176
0
        CHECK_NULL_RETURN_MEMERR(*np);
8177
0
        num = scan_env_add_mem_entry(env);
8178
0
        if (num < 0) {
8179
0
          return num;
8180
0
        }
8181
0
        else if (num >= (int )MEM_STATUS_BITS_NUM) {
8182
0
          return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
8183
0
        }
8184
0
        BAG_(*np)->m.regnum = num;
8185
0
        MEM_STATUS_ON_SIMPLE(env->cap_history, num);
8186
0
      }
8187
59
      else {
8188
59
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8189
59
      }
8190
0
      break;
8191
0
#endif
8192
8193
0
#ifdef USE_WHOLE_OPTIONS
8194
41
    case 'C':
8195
126
    case 'I':
8196
147
    case 'L':
8197
147
      if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8198
147
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8199
8200
0
      goto options_start;
8201
0
      break;
8202
0
#endif
8203
8204
112
    case 'P':
8205
112
      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAME)) {
8206
0
        if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8207
0
        PFETCH(c);
8208
0
        if (c == '<') goto named_group1;
8209
8210
0
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8211
0
      }
8212
      /* else fall */
8213
149
    case 'W': case 'D': case 'S':
8214
162
    case 'y':
8215
162
      if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8216
162
        return ONIGERR_UNDEFINED_GROUP_OPTION;
8217
      /* else fall */
8218
8219
#ifdef USE_POSIXLINE_OPTION
8220
    case 'p':
8221
#endif
8222
18
    case 'a':
8223
14.0k
    case '-': case 'i': case 'm': case 's': case 'x':
8224
14.0k
#ifdef USE_WHOLE_OPTIONS
8225
14.0k
      options_start:
8226
14.0k
#endif
8227
14.0k
      {
8228
14.0k
        int neg;
8229
14.0k
#ifdef USE_WHOLE_OPTIONS
8230
14.0k
        int whole_options;
8231
14.0k
        whole_options = FALSE;
8232
14.0k
#endif
8233
14.0k
        neg = FALSE;
8234
32.7k
        while (1) {
8235
32.7k
          switch (c) {
8236
3.75k
          case ':':
8237
13.2k
          case ')':
8238
13.2k
            break;
8239
8240
185
          case '-':  neg = TRUE; break;
8241
2.41k
          case 'x':  OPTION_NEGATE(option, ONIG_OPTION_EXTEND,     neg); break;
8242
12.6k
          case 'i':  OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break;
8243
1.33k
          case 's':
8244
1.33k
            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8245
1.33k
              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8246
1.33k
            }
8247
0
            else
8248
0
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8249
1.33k
            break;
8250
8251
2.81k
          case 'm':
8252
2.81k
            if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
8253
2.81k
              OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == FALSE ? TRUE : FALSE));
8254
2.81k
            }
8255
0
            else if (IS_SYNTAX_OP2(env->syntax,
8256
0
                        ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
8257
0
              OPTION_NEGATE(option, ONIG_OPTION_MULTILINE,  neg);
8258
0
            }
8259
0
            else
8260
0
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8261
2.81k
            break;
8262
#ifdef USE_POSIXLINE_OPTION
8263
          case 'p':
8264
            OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
8265
            break;
8266
#endif
8267
2.81k
          case 'W':
8268
0
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8269
0
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8270
0
            OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg);
8271
0
            break;
8272
1
          case 'D':
8273
1
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8274
1
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8275
0
            OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg);
8276
0
            break;
8277
1
          case 'S':
8278
1
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8279
1
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8280
0
            OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg);
8281
0
            break;
8282
1
          case 'P':
8283
1
            if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8284
1
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8285
0
            OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8286
0
            break;
8287
8288
3
          case 'y': /* y{g}, y{w} */
8289
3
            {
8290
3
              if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
8291
3
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8292
8293
0
              if (neg == TRUE) return ONIGERR_UNDEFINED_GROUP_OPTION;
8294
8295
0
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8296
0
              if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
8297
0
              PFETCH(c);
8298
0
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8299
0
              PFETCH(c);
8300
0
              switch (c) {
8301
0
              case 'g':
8302
0
                if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8303
0
                  return ONIGERR_UNDEFINED_GROUP_OPTION;
8304
8305
0
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
8306
0
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
8307
0
                break;
8308
0
#ifdef USE_UNICODE_WORD_BREAK
8309
0
              case 'w':
8310
0
                if (! ONIGENC_IS_UNICODE_ENCODING(enc))
8311
0
                  return ONIGERR_UNDEFINED_GROUP_OPTION;
8312
8313
0
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
8314
0
                OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
8315
0
                break;
8316
0
#endif
8317
0
              default:
8318
0
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8319
0
                break;
8320
0
              }
8321
0
              if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8322
0
              PFETCH(c);
8323
0
              if (c != '}')
8324
0
                return ONIGERR_UNDEFINED_GROUP_OPTION;
8325
0
            } /* case 'y' */
8326
0
            break;
8327
8328
23
          case 'a':
8329
23
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_PYTHON))
8330
23
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8331
8332
0
            OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg);
8333
0
            break;
8334
8335
0
#ifdef USE_WHOLE_OPTIONS
8336
10
          case 'C':
8337
10
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8338
10
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8339
8340
0
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8341
0
            OPTION_NEGATE(option, ONIG_OPTION_DONT_CAPTURE_GROUP, neg);
8342
0
            whole_options = TRUE;
8343
0
            break;
8344
8345
1
          case 'I':
8346
1
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8347
1
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8348
8349
0
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8350
0
            OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE_IS_ASCII, neg);
8351
0
            whole_options = TRUE;
8352
0
            break;
8353
8354
1
          case 'L':
8355
1
            if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_WHOLE_OPTIONS))
8356
1
              return ONIGERR_UNDEFINED_GROUP_OPTION;
8357
8358
0
            if (neg == TRUE) return ONIGERR_INVALID_GROUP_OPTION;
8359
0
            OPTION_NEGATE(option, ONIG_OPTION_FIND_LONGEST, neg);
8360
0
            whole_options = TRUE;
8361
0
            break;
8362
0
#endif
8363
8364
105
          default:
8365
105
            return ONIGERR_UNDEFINED_GROUP_OPTION;
8366
32.7k
          }
8367
8368
32.6k
          if (c == ')') {
8369
9.49k
            *np = node_new_option(option);
8370
9.49k
            CHECK_NULL_RETURN_MEMERR(*np);
8371
8372
9.49k
#ifdef USE_WHOLE_OPTIONS
8373
9.49k
            if (whole_options == TRUE) {
8374
0
              r = set_whole_options(option, env);
8375
0
              if (r != 0) return r;
8376
0
              ND_STATUS_ADD(*np, WHOLE_OPTIONS);
8377
0
            }
8378
9.49k
#endif
8379
9.49k
            *src = p;
8380
9.49k
            return 2; /* option only */
8381
9.49k
          }
8382
23.1k
          else if (c == ':') {
8383
3.75k
            OnigOptionType prev = env->options;
8384
8385
3.75k
            env->options = option;
8386
3.75k
#ifdef USE_WHOLE_OPTIONS
8387
3.75k
            if (whole_options == TRUE) {
8388
0
              r = set_whole_options(option, env);
8389
0
              if (r != 0) return r;
8390
0
            }
8391
3.75k
#endif
8392
3.75k
            r = fetch_token(tok, &p, end, env);
8393
3.75k
            if (r < 0) return r;
8394
3.75k
            r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8395
3.75k
            env->options = prev;
8396
3.75k
            if (r < 0) {
8397
1.50k
              onig_node_free(target);
8398
1.50k
              return r;
8399
1.50k
            }
8400
8401
2.25k
            *np = node_new_option(option);
8402
2.25k
            CHECK_NULL_RETURN_MEMERR(*np);
8403
2.25k
            ND_BODY(*np) = target;
8404
2.25k
            ND_STATUS_ADD(*np, WHOLE_OPTIONS);
8405
8406
2.25k
            *src = p;
8407
2.25k
            return 0;
8408
2.25k
          }
8409
8410
19.3k
          if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
8411
18.7k
          PFETCH(c);
8412
18.7k
        } /* while (1) */
8413
14.0k
      }
8414
0
      break;
8415
8416
229
    default:
8417
229
      return ONIGERR_UNDEFINED_GROUP_OPTION;
8418
60.9k
    }
8419
60.9k
  }
8420
1.70M
#ifdef USE_CALLOUT
8421
1.70M
  else if (c == '*' &&
8422
1.70M
           IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
8423
895
    PINC;
8424
895
    r = prs_callout_of_name(np, ')', &p, end, env);
8425
895
    if (r != 0) return r;
8426
8427
0
    goto end;
8428
895
  }
8429
1.69M
#endif
8430
1.69M
  else {
8431
1.69M
    if (OPTON_DONT_CAPTURE_GROUP(env->options))
8432
0
      goto group;
8433
8434
1.69M
    *np = node_new_memory(0);
8435
1.69M
    CHECK_NULL_RETURN_MEMERR(*np);
8436
1.69M
    num = scan_env_add_mem_entry(env);
8437
1.69M
    if (num < 0) return num;
8438
1.69M
    BAG_(*np)->m.regnum = num;
8439
1.69M
  }
8440
8441
1.70M
  CHECK_NULL_RETURN_MEMERR(*np);
8442
1.70M
  r = fetch_token(tok, &p, end, env);
8443
1.70M
  if (r < 0) return r;
8444
1.70M
  r = prs_alts(&target, tok, term, &p, end, env, FALSE);
8445
1.70M
  if (r < 0) {
8446
1.27M
    onig_node_free(target);
8447
1.27M
    return r;
8448
1.27M
  }
8449
8450
427k
  ND_BODY(*np) = target;
8451
8452
427k
  if (ND_TYPE(*np) == ND_BAG) {
8453
425k
    if (BAG_(*np)->type == BAG_MEMORY) {
8454
      /* Don't move this to previous of prs_alts() */
8455
425k
      r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
8456
425k
      if (r != 0) return r;
8457
425k
    }
8458
425k
  }
8459
8460
432k
 end:
8461
432k
  *src = p;
8462
432k
  return 0;
8463
427k
}
8464
8465
static const char* PopularQStr[] = {
8466
  "?", "*", "+", "??", "*?", "+?"
8467
};
8468
8469
static const char* ReduceQStr[] = {
8470
  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
8471
};
8472
8473
static int
8474
assign_quantifier_body(Node* qnode, Node* target, int group, ParseEnv* env)
8475
2.62M
{
8476
2.62M
  QuantNode* qn;
8477
8478
2.62M
  qn = QUANT_(qnode);
8479
2.62M
  if (qn->lower == 1 && qn->upper == 1)
8480
2.09k
    return 1;
8481
8482
2.61M
  switch (ND_TYPE(target)) {
8483
1.57M
  case ND_STRING:
8484
1.57M
    if (group == 0) {
8485
1.57M
      if (str_node_can_be_split(target, env->enc)) {
8486
1.45M
        Node* n = str_node_split_last_char(target, env->enc);
8487
1.45M
        if (IS_NOT_NULL(n)) {
8488
1.45M
          ND_BODY(qnode) = n;
8489
1.45M
          return 2;
8490
1.45M
        }
8491
1.45M
      }
8492
1.57M
    }
8493
121k
    break;
8494
8495
414k
  case ND_QUANT:
8496
414k
    { /* check redundant double repeat. */
8497
      /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
8498
414k
      QuantNode* qnt   = QUANT_(target);
8499
414k
      int nestq_num   = quantifier_type_num(qn);
8500
414k
      int targetq_num = quantifier_type_num(qnt);
8501
8502
414k
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
8503
414k
      if (targetq_num >= 0 && nestq_num >= 0 &&
8504
414k
          IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
8505
0
        UChar buf[WARN_BUFSIZE];
8506
8507
0
        switch(ReduceTypeTable[targetq_num][nestq_num]) {
8508
0
        case RQ_ASIS:
8509
0
          break;
8510
8511
0
        case RQ_DEL:
8512
0
          if (onig_verb_warn != onig_null_warn) {
8513
0
            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8514
0
                                  env->pattern, env->pattern_end,
8515
0
                                  (UChar* )"redundant nested repeat operator");
8516
0
            (*onig_verb_warn)((char* )buf);
8517
0
          }
8518
0
          goto warn_exit;
8519
0
          break;
8520
8521
0
        default:
8522
0
          if (onig_verb_warn != onig_null_warn) {
8523
0
            onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
8524
0
                                       env->pattern, env->pattern_end,
8525
0
            (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
8526
0
            PopularQStr[targetq_num], PopularQStr[nestq_num],
8527
0
            ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
8528
0
            (*onig_verb_warn)((char* )buf);
8529
0
          }
8530
0
          goto warn_exit;
8531
0
          break;
8532
0
        }
8533
0
      }
8534
8535
414k
    warn_exit:
8536
414k
#endif
8537
414k
      if (targetq_num >= 0 && nestq_num < 0) {
8538
6.35k
        if (targetq_num == 1 || targetq_num == 2) { /* * or + */
8539
          /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
8540
3.80k
          if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
8541
1.73k
            qn->upper = (qn->lower == 0 ? 1 : qn->lower);
8542
1.73k
          }
8543
3.80k
        }
8544
6.35k
      }
8545
407k
      else {
8546
407k
        int r;
8547
8548
407k
        ND_BODY(qnode) = target;
8549
407k
        r = onig_reduce_nested_quantifier(qnode);
8550
407k
        return r;
8551
407k
      }
8552
414k
    }
8553
6.35k
    break;
8554
8555
628k
  default:
8556
628k
    break;
8557
2.61M
  }
8558
8559
756k
  ND_BODY(qnode) = target;
8560
756k
  return 0;
8561
2.61M
}
8562
8563
8564
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8565
static int
8566
clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
8567
{
8568
  BBuf *tbuf;
8569
  int r;
8570
8571
  if (IS_NCCLASS_NOT(cc)) {
8572
    bitset_invert(cc->bs);
8573
8574
    if (! ONIGENC_IS_SINGLEBYTE(enc)) {
8575
      r = not_code_range_buf(enc, cc->mbuf, &tbuf);
8576
      if (r != 0) return r;
8577
8578
      bbuf_free(cc->mbuf);
8579
      cc->mbuf = tbuf;
8580
    }
8581
8582
    NCCLASS_CLEAR_NOT(cc);
8583
  }
8584
8585
  return 0;
8586
}
8587
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8588
8589
22.4M
#define ADD_CODE_INTO_CC(cc, code, enc) do {\
8590
22.4M
  if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\
8591
20.1M
    add_code_range_to_buf(&((cc)->mbuf), code, code);\
8592
20.1M
  }\
8593
22.4M
  else {\
8594
2.23M
    BITSET_SET_BIT((cc)->bs, code);\
8595
2.23M
  }\
8596
22.4M
} while (0)
8597
8598
extern int
8599
onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
8600
                               int n, OnigCodePoint codes[])
8601
667k
{
8602
667k
  int i;
8603
667k
  Node* node;
8604
667k
  CClassNode* cc;
8605
8606
667k
  *rnode = NULL_NODE;
8607
8608
667k
  node = node_new_cclass();
8609
667k
  CHECK_NULL_RETURN_MEMERR(node);
8610
8611
667k
  cc = CCLASS_(node);
8612
8613
2.02M
  for (i = 0; i < n; i++) {
8614
1.35M
    ADD_CODE_INTO_CC(cc, codes[i], enc);
8615
1.35M
  }
8616
8617
667k
  *rnode = node;
8618
667k
  return 0;
8619
667k
}
8620
8621
typedef struct {
8622
  ParseEnv*   env;
8623
  CClassNode* cc;
8624
  Node*       alt_root;
8625
  Node**      ptail;
8626
} IApplyCaseFoldArg;
8627
8628
static int
8629
i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len,
8630
                  void* arg)
8631
105M
{
8632
105M
  IApplyCaseFoldArg* iarg;
8633
105M
  ParseEnv* env;
8634
105M
  OnigEncoding enc;
8635
105M
  CClassNode* cc;
8636
8637
105M
  iarg = (IApplyCaseFoldArg* )arg;
8638
105M
  env = iarg->env;
8639
105M
  cc  = iarg->cc;
8640
105M
  enc = env->enc;
8641
8642
105M
  if (to_len == 1) {
8643
102M
    int is_in = onig_is_code_in_cc(enc, from, cc);
8644
102M
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8645
102M
    if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
8646
102M
        (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
8647
17.4M
      ADD_CODE_INTO_CC(cc, *to, enc);
8648
17.4M
    }
8649
#else
8650
    if (is_in != 0) {
8651
      if (ONIGENC_MBC_MINLEN(enc) > 1 ||
8652
          ONIGENC_CODE_TO_MBCLEN(enc, *to) != 1) {
8653
        if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
8654
        add_code_range(&(cc->mbuf), env, *to, *to);
8655
      }
8656
      else {
8657
        if (IS_NCCLASS_NOT(cc)) {
8658
          BITSET_CLEAR_BIT(cc->bs, *to);
8659
        }
8660
        else
8661
          BITSET_SET_BIT(cc->bs, *to);
8662
      }
8663
    }
8664
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
8665
102M
  }
8666
3.57M
  else {
8667
3.57M
    int r, i, len;
8668
3.57M
    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8669
8670
3.57M
    if (onig_is_code_in_cc(enc, from, cc)
8671
3.57M
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
8672
3.57M
        && !IS_NCCLASS_NOT(cc)
8673
3.57M
#endif
8674
3.57M
        ) {
8675
723k
      int n, j, m, index;
8676
723k
      Node* list_node;
8677
723k
      Node* ns[3];
8678
8679
723k
      n = 0;
8680
2.28M
      for (i = 0; i < to_len; i++) {
8681
1.55M
        OnigCodePoint code;
8682
1.55M
        Node* csnode;
8683
1.55M
        CClassNode* cs_cc;
8684
8685
1.55M
        index = 0;
8686
1.55M
        if (ONIGENC_IS_UNICODE_ENCODING(enc) &&
8687
1.55M
            (index = onigenc_unicode_fold1_key(&to[i])) >= 0) {
8688
1.28M
          csnode = node_new_cclass();
8689
1.28M
          cs_cc = CCLASS_(csnode);
8690
1.28M
          if (IS_NULL(csnode)) {
8691
0
          err_free_ns:
8692
0
            for (j = 0; j < n; j++) onig_node_free(ns[j]);
8693
0
            return ONIGERR_MEMORY;
8694
0
          }
8695
1.28M
          m = FOLDS1_UNFOLDS_NUM(index);
8696
3.58M
          for (j = 0; j < m; j++) {
8697
2.30M
            code = FOLDS1_UNFOLDS(index)[j];
8698
2.30M
            ADD_CODE_INTO_CC(cs_cc, code, enc);
8699
2.30M
          }
8700
1.28M
          ADD_CODE_INTO_CC(cs_cc, to[i], enc);
8701
1.28M
          ns[n++] = csnode;
8702
1.28M
        }
8703
278k
        else {
8704
278k
          len = ONIGENC_CODE_TO_MBC(enc, to[i], buf);
8705
278k
          if (n == 0 || ND_TYPE(ns[n-1]) != ND_STRING) {
8706
201k
            csnode = node_new_str(buf, buf + len);
8707
201k
            if (IS_NULL(csnode)) goto err_free_ns;
8708
8709
201k
            if (index == 0)
8710
0
              ND_STATUS_ADD(csnode, IGNORECASE);
8711
201k
            else
8712
201k
              ND_STRING_SET_CASE_EXPANDED(csnode);
8713
8714
201k
            ns[n++] = csnode;
8715
201k
          }
8716
76.5k
          else {
8717
76.5k
            r = onig_node_str_cat(ns[n-1], buf, buf + len);
8718
76.5k
            if (r < 0) goto err_free_ns;
8719
76.5k
          }
8720
278k
        }
8721
1.55M
      }
8722
8723
723k
      if (n == 1)
8724
0
        list_node = ns[0];
8725
723k
      else
8726
723k
        list_node = make_list(n, ns);
8727
8728
723k
      *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE);
8729
723k
      if (IS_NULL(*(iarg->ptail))) {
8730
0
        onig_node_free(list_node);
8731
0
        return ONIGERR_MEMORY;
8732
0
      }
8733
723k
      iarg->ptail = &(ND_CDR((*(iarg->ptail))));
8734
723k
    }
8735
3.57M
  }
8736
8737
105M
  return 0;
8738
105M
}
8739
8740
static int
8741
prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
8742
        ParseEnv* env, int group_head)
8743
10.6M
{
8744
10.6M
  int r, len, group;
8745
10.6M
  Node* qn;
8746
10.6M
  Node** tp;
8747
10.6M
  unsigned int parse_depth;
8748
8749
10.6M
 retry:
8750
10.6M
  group = 0;
8751
10.6M
  *np = NULL;
8752
10.6M
  if (tok->type == (enum TokenSyms )term)
8753
30.0k
    goto end_of_token;
8754
8755
10.6M
  parse_depth = env->parse_depth;
8756
8757
10.6M
  switch (tok->type) {
8758
78.5k
  case TK_ALT:
8759
78.7k
  case TK_EOT:
8760
108k
  end_of_token:
8761
108k
    *np = node_new_empty();
8762
108k
    CHECK_NULL_RETURN_MEMERR(*np);
8763
108k
    return tok->type;
8764
0
  break;
8765
8766
1.78M
  case TK_SUBEXP_OPEN:
8767
1.78M
    r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
8768
1.78M
    if (r < 0) return r;
8769
444k
    if (r == 1) { /* group */
8770
228
      if (group_head == 0)
8771
228
        group = 1;
8772
0
      else {
8773
0
        Node* target = *np;
8774
0
        *np = node_new_group(target);
8775
0
        if (IS_NULL(*np)) {
8776
0
          onig_node_free(target);
8777
0
          return ONIGERR_MEMORY;
8778
0
        }
8779
0
        group = 2;
8780
0
      }
8781
228
    }
8782
444k
    else if (r == 2) { /* option only */
8783
9.49k
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH)) {
8784
9.49k
        env->options = BAG_(*np)->o.options;
8785
9.49k
        r = fetch_token(tok, src, end, env);
8786
9.49k
        if (r < 0) return r;
8787
9.49k
        onig_node_free(*np);
8788
9.49k
        goto retry;
8789
9.49k
      }
8790
0
      else {
8791
0
        Node* target;
8792
0
        OnigOptionType prev = env->options;
8793
8794
0
        env->options = BAG_(*np)->o.options;
8795
0
        r = fetch_token(tok, src, end, env);
8796
0
        if (r < 0) return r;
8797
0
        r = prs_alts(&target, tok, term, src, end, env, FALSE);
8798
0
        env->options = prev;
8799
0
        if (r < 0) {
8800
0
          onig_node_free(target);
8801
0
          return r;
8802
0
        }
8803
0
        ND_BODY(*np) = target;
8804
0
      }
8805
0
      return tok->type;
8806
9.49k
    }
8807
435k
    break;
8808
8809
435k
  case TK_SUBEXP_CLOSE:
8810
21.5k
    if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
8811
21.5k
      return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
8812
8813
0
    if (tok->escaped) goto tk_crude_byte;
8814
0
    else goto tk_byte;
8815
0
    break;
8816
8817
5.85M
  case TK_STRING:
8818
5.85M
  tk_byte:
8819
5.85M
    {
8820
5.85M
      *np = node_new_str_with_options(tok->backp, *src, env->options);
8821
5.85M
      CHECK_NULL_RETURN_MEMERR(*np);
8822
8823
74.4M
      while (1) {
8824
74.4M
        r = fetch_token(tok, src, end, env);
8825
74.4M
        if (r < 0) return r;
8826
74.4M
        if (r != TK_STRING) break;
8827
8828
68.5M
        r = onig_node_str_cat(*np, tok->backp, *src);
8829
68.5M
        if (r < 0) return r;
8830
68.5M
      }
8831
8832
5.85M
    string_end:
8833
5.85M
      tp = np;
8834
5.85M
      goto repeat;
8835
5.85M
    }
8836
0
    break;
8837
8838
4.04k
  case TK_CRUDE_BYTE:
8839
4.04k
  tk_crude_byte:
8840
4.04k
    {
8841
4.04k
      *np = node_new_str_crude_char(tok->u.byte, env->options);
8842
4.04k
      CHECK_NULL_RETURN_MEMERR(*np);
8843
4.04k
      len = 1;
8844
4.04k
      while (1) {
8845
4.04k
        if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
8846
4.04k
          if (len == enclen(env->enc, STR_(*np)->s)) {
8847
4.01k
            r = fetch_token(tok, src, end, env);
8848
4.01k
            goto tk_crude_byte_end;
8849
4.01k
          }
8850
4.04k
        }
8851
8852
33
        r = fetch_token(tok, src, end, env);
8853
33
        if (r < 0) return r;
8854
33
        if (r != TK_CRUDE_BYTE)
8855
33
          return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
8856
8857
0
        r = node_str_cat_char(*np, tok->u.byte);
8858
0
        if (r < 0) return r;
8859
8860
0
        len++;
8861
0
      }
8862
8863
4.01k
    tk_crude_byte_end:
8864
4.01k
      if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
8865
24
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
8866
8867
3.99k
      ND_STRING_CLEAR_CRUDE(*np);
8868
3.99k
      goto string_end;
8869
4.01k
    }
8870
0
    break;
8871
8872
18.1k
  case TK_CODE_POINT:
8873
18.1k
    {
8874
18.1k
      UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
8875
18.1k
      len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
8876
18.1k
      if (len < 0) return len;
8877
18.1k
      len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
8878
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
8879
      *np = node_new_str_crude(buf, buf + len, env->options);
8880
#else
8881
18.1k
      *np = node_new_str_with_options(buf, buf + len, env->options);
8882
18.1k
#endif
8883
18.1k
      CHECK_NULL_RETURN_MEMERR(*np);
8884
18.1k
    }
8885
18.1k
    break;
8886
8887
18.1k
  case TK_QUOTE_OPEN:
8888
116
    {
8889
116
      OnigCodePoint end_op[2];
8890
116
      UChar *qstart, *qend, *nextp;
8891
8892
116
      end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
8893
116
      end_op[1] = (OnigCodePoint )'E';
8894
116
      qstart = *src;
8895
116
      qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
8896
116
      if (IS_NULL(qend)) {
8897
116
        nextp = qend = end;
8898
116
      }
8899
116
      *np = node_new_str_with_options(qstart, qend, env->options);
8900
116
      CHECK_NULL_RETURN_MEMERR(*np);
8901
116
      *src = nextp;
8902
116
    }
8903
0
    break;
8904
8905
11.9k
  case TK_CHAR_TYPE:
8906
11.9k
    {
8907
11.9k
      switch (tok->u.prop.ctype) {
8908
2.74k
      case ONIGENC_CTYPE_WORD:
8909
2.74k
        *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
8910
2.74k
        CHECK_NULL_RETURN_MEMERR(*np);
8911
2.74k
        break;
8912
8913
2.74k
      case ONIGENC_CTYPE_SPACE:
8914
9.15k
      case ONIGENC_CTYPE_DIGIT:
8915
9.15k
      case ONIGENC_CTYPE_XDIGIT:
8916
9.15k
        {
8917
9.15k
          CClassNode* cc;
8918
8919
9.15k
          *np = node_new_cclass();
8920
9.15k
          CHECK_NULL_RETURN_MEMERR(*np);
8921
9.15k
          cc = CCLASS_(*np);
8922
9.15k
          r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
8923
9.15k
          if (r != 0) {
8924
0
            onig_node_free(*np);
8925
0
            *np = NULL_NODE;
8926
0
            return r;
8927
0
          }
8928
9.15k
          if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
8929
9.15k
        }
8930
0
        break;
8931
8932
0
      default:
8933
0
        return ONIGERR_PARSER_BUG;
8934
0
        break;
8935
11.9k
      }
8936
11.9k
    }
8937
11.9k
    break;
8938
8939
11.9k
  case TK_CHAR_PROPERTY:
8940
2
    r = prs_char_property(np, tok, src, end, env);
8941
2
    if (r != 0) return r;
8942
0
    break;
8943
8944
175k
  case TK_OPEN_CC:
8945
175k
    {
8946
175k
      CClassNode* cc;
8947
8948
175k
      r = prs_cc(np, tok, src, end, env);
8949
175k
      if (r != 0) return r;
8950
8951
141k
      cc = CCLASS_(*np);
8952
141k
      if (OPTON_IGNORECASE(env->options)) {
8953
34.3k
        IApplyCaseFoldArg iarg;
8954
8955
34.3k
        iarg.env      = env;
8956
34.3k
        iarg.cc       = cc;
8957
34.3k
        iarg.alt_root = NULL_NODE;
8958
34.3k
        iarg.ptail    = &(iarg.alt_root);
8959
8960
34.3k
        r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->reg->case_fold_flag,
8961
34.3k
                                        i_apply_case_fold, &iarg);
8962
34.3k
        if (r != 0) {
8963
0
          onig_node_free(iarg.alt_root);
8964
0
          return r;
8965
0
        }
8966
34.3k
        if (IS_NOT_NULL(iarg.alt_root)) {
8967
7.05k
          Node* work = onig_node_new_alt(*np, iarg.alt_root);
8968
7.05k
          if (IS_NULL(work)) {
8969
0
            onig_node_free(iarg.alt_root);
8970
0
            return ONIGERR_MEMORY;
8971
0
          }
8972
7.05k
          *np = work;
8973
7.05k
        }
8974
34.3k
      }
8975
141k
    }
8976
141k
    break;
8977
8978
2.34M
  case TK_ANYCHAR:
8979
2.34M
    *np = node_new_anychar(env->options);
8980
2.34M
    CHECK_NULL_RETURN_MEMERR(*np);
8981
2.34M
    break;
8982
8983
2.34M
  case TK_ANYCHAR_ANYTIME:
8984
0
    *np = node_new_anychar(env->options);
8985
0
    CHECK_NULL_RETURN_MEMERR(*np);
8986
0
    qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
8987
0
    CHECK_NULL_RETURN_MEMERR(qn);
8988
0
    ND_BODY(qn) = *np;
8989
0
    *np = qn;
8990
0
    break;
8991
8992
22.9k
  case TK_BACKREF:
8993
22.9k
    len = tok->u.backref.num;
8994
22.9k
    *np = node_new_backref(len,
8995
22.9k
                  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
8996
22.9k
                  tok->u.backref.by_name,
8997
22.9k
#ifdef USE_BACKREF_WITH_LEVEL
8998
22.9k
                           tok->u.backref.exist_level,
8999
22.9k
                           tok->u.backref.level,
9000
22.9k
#endif
9001
22.9k
                           env);
9002
22.9k
    CHECK_NULL_RETURN_MEMERR(*np);
9003
22.9k
    break;
9004
9005
22.9k
#ifdef USE_CALL
9006
22.9k
  case TK_CALL:
9007
19.7k
    {
9008
19.7k
      int gnum = tok->u.call.gnum;
9009
9010
19.7k
      *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
9011
19.7k
                          gnum, tok->u.call.by_number);
9012
19.7k
      CHECK_NULL_RETURN_MEMERR(*np);
9013
19.7k
      env->num_call++;
9014
19.7k
      if (tok->u.call.by_number != 0 && gnum == 0) {
9015
4.05k
        env->flags |= PE_FLAG_HAS_CALL_ZERO;
9016
4.05k
      }
9017
19.7k
    }
9018
0
    break;
9019
0
#endif
9020
9021
266k
  case TK_ANCHOR:
9022
266k
    *np = node_new_anchor_with_options(tok->u.anchor, env->options);
9023
266k
    CHECK_NULL_RETURN_MEMERR(*np);
9024
266k
    break;
9025
9026
266k
  case TK_REPEAT:
9027
43.5k
  case TK_INTERVAL:
9028
43.5k
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
9029
43.5k
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
9030
43.5k
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
9031
0
      else {
9032
0
        *np = node_new_empty();
9033
0
        CHECK_NULL_RETURN_MEMERR(*np);
9034
0
      }
9035
43.5k
    }
9036
0
    else {
9037
0
      goto tk_byte;
9038
0
    }
9039
0
    break;
9040
9041
223
  case TK_KEEP:
9042
223
    r = node_new_keep(np, env);
9043
223
    if (r < 0) return r;
9044
223
    break;
9045
9046
641
  case TK_GENERAL_NEWLINE:
9047
641
    r = node_new_general_newline(np, env);
9048
641
    if (r < 0) return r;
9049
641
    break;
9050
9051
641
  case TK_NO_NEWLINE:
9052
10
    r = node_new_no_newline(np, env);
9053
10
    if (r < 0) return r;
9054
10
    break;
9055
9056
2.23k
  case TK_TRUE_ANYCHAR:
9057
2.23k
    r = node_new_true_anychar(np);
9058
2.23k
    if (r < 0) return r;
9059
2.23k
    break;
9060
9061
2.74k
  case TK_TEXT_SEGMENT:
9062
2.74k
    r = make_text_segment(np, env);
9063
2.74k
    if (r < 0) return r;
9064
2.74k
    break;
9065
9066
2.74k
  default:
9067
0
    return ONIGERR_PARSER_BUG;
9068
0
    break;
9069
10.6M
  }
9070
9071
3.26M
  {
9072
3.26M
    tp = np;
9073
9074
5.88M
  re_entry:
9075
5.88M
    r = fetch_token(tok, src, end, env);
9076
5.88M
    if (r < 0) return r;
9077
9078
11.7M
  repeat:
9079
11.7M
    if (r == TK_REPEAT || r == TK_INTERVAL) {
9080
2.62M
      Node* target;
9081
9082
2.62M
      if (is_invalid_quantifier_target(*tp))
9083
80
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
9084
9085
2.62M
      INC_PARSE_DEPTH(parse_depth);
9086
9087
2.62M
      qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
9088
2.62M
                               r == TK_INTERVAL);
9089
2.62M
      CHECK_NULL_RETURN_MEMERR(qn);
9090
2.62M
      QUANT_(qn)->greedy = tok->u.repeat.greedy;
9091
2.62M
      if (group == 2) {
9092
0
        target = node_drop_group(*tp);
9093
0
        *tp = NULL_NODE;
9094
0
      }
9095
2.62M
      else {
9096
2.62M
        target = *tp;
9097
2.62M
      }
9098
2.62M
      r = assign_quantifier_body(qn, target, group, env);
9099
2.62M
      if (r < 0) {
9100
1
        onig_node_free(qn);
9101
1
        *tp = NULL_NODE;
9102
1
        return r;
9103
1
      }
9104
9105
2.62M
      if (tok->u.repeat.possessive != 0) {
9106
602k
        Node* en;
9107
602k
        en = node_new_bag(BAG_STOP_BACKTRACK);
9108
602k
        if (IS_NULL(en)) {
9109
0
          onig_node_free(qn);
9110
0
          return ONIGERR_MEMORY;
9111
0
        }
9112
602k
        ND_BODY(en) = qn;
9113
602k
        qn = en;
9114
602k
      }
9115
9116
2.62M
      if (r == 0) {
9117
1.16M
        *tp = qn;
9118
1.16M
      }
9119
1.45M
      else if (r == 1) { /* x{1,1} ==> x */
9120
2.09k
        onig_node_free(qn);
9121
2.09k
        *tp = target;
9122
2.09k
      }
9123
1.45M
      else if (r == 2) { /* split case: /abc+/ */
9124
1.45M
        Node *tmp;
9125
9126
1.45M
        *tp = node_new_list(*tp, NULL);
9127
1.45M
        if (IS_NULL(*tp)) {
9128
0
          onig_node_free(qn);
9129
0
          return ONIGERR_MEMORY;
9130
0
        }
9131
1.45M
        tmp = ND_CDR(*tp) = node_new_list(qn, NULL);
9132
1.45M
        if (IS_NULL(tmp)) {
9133
0
          onig_node_free(qn);
9134
0
          return ONIGERR_MEMORY;
9135
0
        }
9136
1.45M
        tp = &(ND_CAR(tmp));
9137
1.45M
      }
9138
2.62M
      group = 0;
9139
2.62M
      goto re_entry;
9140
2.62M
    }
9141
11.7M
  }
9142
9143
9.12M
  return r;
9144
11.7M
}
9145
9146
static int
9147
prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9148
           ParseEnv* env, int group_head)
9149
3.51M
{
9150
3.51M
  int r;
9151
3.51M
  Node *node, **headp;
9152
9153
3.51M
  *top = NULL;
9154
3.51M
  INC_PARSE_DEPTH(env->parse_depth);
9155
9156
3.51M
  r = prs_exp(&node, tok, term, src, end, env, group_head);
9157
3.51M
  if (r < 0) {
9158
1.10M
    onig_node_free(node);
9159
1.10M
    return r;
9160
1.10M
  }
9161
9162
2.40M
  if (r == TK_EOT || r == term || r == TK_ALT) {
9163
1.58M
    *top = node;
9164
1.58M
  }
9165
815k
  else {
9166
815k
    *top = node_new_list(node, NULL);
9167
815k
    if (IS_NULL(*top)) {
9168
0
    mem_err:
9169
0
      onig_node_free(node);
9170
0
      return ONIGERR_MEMORY;
9171
0
    }
9172
9173
815k
    headp = &(ND_CDR(*top));
9174
7.64M
    while (r != TK_EOT && r != term && r != TK_ALT) {
9175
7.16M
      r = prs_exp(&node, tok, term, src, end, env, FALSE);
9176
7.16M
      if (r < 0) {
9177
341k
        onig_node_free(node);
9178
341k
        return r;
9179
341k
      }
9180
9181
6.82M
      if (ND_TYPE(node) == ND_LIST) {
9182
1.10M
        *headp = node;
9183
2.20M
        while (IS_NOT_NULL(ND_CDR(node))) node = ND_CDR(node);
9184
1.10M
        headp = &(ND_CDR(node));
9185
1.10M
      }
9186
5.72M
      else {
9187
5.72M
        *headp = node_new_list(node, NULL);
9188
5.72M
        if (IS_NULL(*headp)) goto mem_err;
9189
5.72M
        headp = &(ND_CDR(*headp));
9190
5.72M
      }
9191
6.82M
    }
9192
815k
  }
9193
9194
2.06M
  DEC_PARSE_DEPTH(env->parse_depth);
9195
2.06M
  return r;
9196
2.40M
}
9197
9198
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
9199
static int
9200
prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
9201
         ParseEnv* env, int group_head)
9202
3.12M
{
9203
3.12M
  int r;
9204
3.12M
  Node *node, **headp;
9205
3.12M
  OnigOptionType save_options;
9206
9207
3.12M
  *top = NULL;
9208
3.12M
  INC_PARSE_DEPTH(env->parse_depth);
9209
3.12M
  save_options = env->options;
9210
9211
3.12M
  r = prs_branch(&node, tok, term, src, end, env, group_head);
9212
3.12M
  if (r < 0) {
9213
1.43M
    onig_node_free(node);
9214
1.43M
    return r;
9215
1.43M
  }
9216
9217
1.69M
  if (r == term) {
9218
1.57M
    *top = node;
9219
1.57M
  }
9220
110k
  else if (r == TK_ALT) {
9221
94.0k
    *top  = onig_node_new_alt(node, NULL);
9222
94.0k
    if (IS_NULL(*top)) {
9223
0
      onig_node_free(node);
9224
0
      return ONIGERR_MEMORY;
9225
0
    }
9226
9227
94.0k
    headp = &(ND_CDR(*top));
9228
465k
    while (r == TK_ALT) {
9229
388k
      r = fetch_token(tok, src, end, env);
9230
388k
      if (r < 0) return r;
9231
388k
      r = prs_branch(&node, tok, term, src, end, env, FALSE);
9232
388k
      if (r < 0) {
9233
16.7k
        onig_node_free(node);
9234
16.7k
        return r;
9235
16.7k
      }
9236
371k
      *headp = onig_node_new_alt(node, NULL);
9237
371k
      if (IS_NULL(*headp)) {
9238
0
        onig_node_free(node);
9239
0
        onig_node_free(*top);
9240
0
        *top = NULL_NODE;
9241
0
        return ONIGERR_MEMORY;
9242
0
      }
9243
9244
371k
      headp = &(ND_CDR(*headp));
9245
371k
    }
9246
9247
77.2k
    if (tok->type != (enum TokenSyms )term)
9248
288
      goto err;
9249
77.2k
  }
9250
16.5k
  else {
9251
16.5k
    onig_node_free(node);
9252
16.8k
  err:
9253
16.8k
    if (term == TK_SUBEXP_CLOSE)
9254
16.8k
      return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
9255
0
    else
9256
0
      return ONIGERR_PARSER_BUG;
9257
16.8k
  }
9258
9259
1.65M
  env->options = save_options;
9260
1.65M
  DEC_PARSE_DEPTH(env->parse_depth);
9261
1.65M
  return r;
9262
1.69M
}
9263
9264
static int
9265
prs_regexp(Node** top, UChar** src, UChar* end, ParseEnv* env)
9266
1.37M
{
9267
1.37M
  int r;
9268
1.37M
  PToken tok;
9269
9270
1.37M
  ptoken_init(&tok);
9271
1.37M
  r = fetch_token(&tok, src, end, env);
9272
1.37M
  if (r < 0) return r;
9273
1.37M
  r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);
9274
1.37M
  if (r < 0) return r;
9275
9276
1.21M
  return 0;
9277
1.37M
}
9278
9279
#ifdef USE_CALL
9280
static int
9281
make_call_zero_body(Node* node, ParseEnv* env, Node** rnode)
9282
1.18k
{
9283
1.18k
  int r;
9284
9285
1.18k
  Node* x = node_new_memory(0 /* 0: is not named */);
9286
1.18k
  CHECK_NULL_RETURN_MEMERR(x);
9287
9288
1.18k
  ND_BODY(x) = node;
9289
1.18k
  BAG_(x)->m.regnum = 0;
9290
1.18k
  r = scan_env_set_mem_node(env, 0, x);
9291
1.18k
  if (r != 0) {
9292
0
    onig_node_free(x);
9293
0
    return r;
9294
0
  }
9295
9296
1.18k
  *rnode = x;
9297
1.18k
  return 0;
9298
1.18k
}
9299
#endif
9300
9301
extern int
9302
onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
9303
                regex_t* reg, ParseEnv* env)
9304
1.37M
{
9305
1.37M
  int r;
9306
1.37M
  UChar* p;
9307
1.37M
#ifdef USE_CALLOUT
9308
1.37M
  RegexExt* ext;
9309
1.37M
#endif
9310
9311
1.37M
  reg->string_pool        = 0;
9312
1.37M
  reg->string_pool_end    = 0;
9313
1.37M
  reg->num_mem            = 0;
9314
1.37M
  reg->num_repeat         = 0;
9315
1.37M
  reg->num_empty_check    = 0;
9316
1.37M
  reg->repeat_range_alloc = 0;
9317
1.37M
  reg->repeat_range       = (RepeatRange* )NULL;
9318
9319
1.37M
  names_clear(reg);
9320
9321
1.37M
  scan_env_clear(env);
9322
1.37M
  env->options        = reg->options;
9323
1.37M
  env->case_fold_flag = reg->case_fold_flag;
9324
1.37M
  env->enc            = reg->enc;
9325
1.37M
  env->syntax         = reg->syntax;
9326
1.37M
  env->pattern        = (UChar* )pattern;
9327
1.37M
  env->pattern_end    = (UChar* )end;
9328
1.37M
  env->reg            = reg;
9329
9330
1.37M
  *root = NULL;
9331
9332
1.37M
  if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, pattern, end))
9333
0
    return ONIGERR_INVALID_WIDE_CHAR_VALUE;
9334
9335
1.37M
  p = (UChar* )pattern;
9336
1.37M
  r = prs_regexp(root, &p, (UChar* )end, env);
9337
1.37M
  if (r != 0) return r;
9338
9339
1.21M
#ifdef USE_CALL
9340
1.21M
  if ((env->flags & PE_FLAG_HAS_CALL_ZERO) != 0) {
9341
1.18k
    Node* zero_node;
9342
1.18k
    r = make_call_zero_body(*root, env, &zero_node);
9343
1.18k
    if (r != 0) return r;
9344
9345
1.18k
    *root = zero_node;
9346
1.18k
  }
9347
1.21M
#endif
9348
9349
1.21M
  reg->num_mem = env->num_mem;
9350
9351
1.21M
#ifdef USE_CALLOUT
9352
1.21M
  ext = reg->extp;
9353
1.21M
  if (IS_NOT_NULL(ext) && ext->callout_num > 0) {
9354
0
    r = setup_ext_callout_list_values(reg);
9355
0
  }
9356
1.21M
#endif
9357
9358
1.21M
  return r;
9359
1.21M
}
9360
9361
extern void
9362
onig_scan_env_set_error_string(ParseEnv* env, int ecode ARG_UNUSED,
9363
                               UChar* arg, UChar* arg_end)
9364
10.8k
{
9365
10.8k
  env->error     = arg;
9366
10.8k
  env->error_end = arg_end;
9367
10.8k
}