Coverage Report

Created: 2024-02-25 06:12

/src/oniguruma/src/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "regparse.h"
31
32
25.6k
#define OPS_INIT_SIZE  8
33
34
#define ND_IS_REAL_IGNORECASE(node) \
35
284k
  (ND_IS_IGNORECASE(node) && !ND_STRING_IS_CRUDE(node))
36
37
typedef struct {
38
  OnigLen min;
39
  OnigLen max;
40
} MinMaxLen;
41
42
typedef struct {
43
  OnigLen min;
44
  OnigLen max;
45
  int min_is_sure;
46
} MinMaxCharLen;
47
48
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
49
50
static OnigLen node_min_byte_len(Node* node, ParseEnv* env);
51
52
static int
53
ops_init(regex_t* reg, int init_alloc_size)
54
25.6k
{
55
25.6k
  Operation* p;
56
25.6k
  size_t size;
57
58
25.6k
  if (init_alloc_size <= 0)
59
0
    return ONIGERR_PARSER_BUG;
60
61
25.6k
  size = sizeof(Operation) * init_alloc_size;
62
25.6k
  p = (Operation* )xrealloc(reg->ops, size);
63
25.6k
  CHECK_NULL_RETURN_MEMERR(p);
64
25.6k
  reg->ops = p;
65
25.6k
#ifdef USE_DIRECT_THREADED_CODE
66
25.6k
  {
67
25.6k
    enum OpCode* cp;
68
25.6k
    size = sizeof(enum OpCode) * init_alloc_size;
69
25.6k
    cp = (enum OpCode* )xrealloc(reg->ocs, size);
70
25.6k
    CHECK_NULL_RETURN_MEMERR(cp);
71
25.6k
    reg->ocs = cp;
72
25.6k
  }
73
0
#endif
74
75
0
  reg->ops_curr  = 0; /* !!! not yet done ops_new() */
76
25.6k
  reg->ops_alloc = init_alloc_size;
77
25.6k
  reg->ops_used  = 0;
78
79
25.6k
  return ONIG_NORMAL;
80
25.6k
}
81
82
static int
83
ops_resize(regex_t* reg, int n)
84
38.2k
{
85
38.2k
#ifdef USE_DIRECT_THREADED_CODE
86
38.2k
  enum OpCode* cp;
87
38.2k
#endif
88
38.2k
  Operation* p;
89
38.2k
  size_t size;
90
91
38.2k
  if (n == reg->ops_alloc) return ONIG_NORMAL;
92
36.9k
  if (n <= 0) return ONIGERR_PARSER_BUG;
93
94
36.9k
  size = sizeof(Operation) * n;
95
36.9k
  p = (Operation* )xrealloc(reg->ops, size);
96
36.9k
  CHECK_NULL_RETURN_MEMERR(p);
97
36.9k
  reg->ops = p;
98
99
36.9k
#ifdef USE_DIRECT_THREADED_CODE
100
36.9k
  size = sizeof(enum OpCode) * n;
101
36.9k
  cp = (enum OpCode* )xrealloc(reg->ocs, size);
102
36.9k
  CHECK_NULL_RETURN_MEMERR(cp);
103
36.9k
  reg->ocs = cp;
104
36.9k
#endif
105
106
36.9k
  reg->ops_alloc = n;
107
36.9k
  if (reg->ops_used == 0)
108
0
    reg->ops_curr = 0;
109
36.9k
  else
110
36.9k
    reg->ops_curr = reg->ops + (reg->ops_used - 1);
111
112
36.9k
  return ONIG_NORMAL;
113
36.9k
}
114
115
static int
116
ops_new(regex_t* reg)
117
729k
{
118
729k
  if (reg->ops_used >= reg->ops_alloc) {
119
19.7k
    int r = ops_resize(reg, reg->ops_alloc << 1);
120
19.7k
    if (r != ONIG_NORMAL) return r;
121
19.7k
  }
122
123
729k
  reg->ops_curr = reg->ops + reg->ops_used;
124
729k
  reg->ops_used++;
125
126
729k
  xmemset(reg->ops_curr, 0, sizeof(Operation));
127
729k
  return ONIG_NORMAL;
128
729k
}
129
130
static int
131
is_in_string_pool(regex_t* reg, UChar* s)
132
7.58k
{
133
7.58k
  return (s >= reg->string_pool && s < reg->string_pool_end);
134
7.58k
}
135
136
static void
137
ops_free(regex_t* reg)
138
51.3k
{
139
51.3k
  int i;
140
141
51.3k
  if (IS_NULL(reg->ops)) return ;
142
143
755k
  for (i = 0; i < (int )reg->ops_used; i++) {
144
729k
    enum OpCode opcode;
145
729k
    Operation* op;
146
147
729k
    op = reg->ops + i;
148
149
729k
#ifdef USE_DIRECT_THREADED_CODE
150
729k
    opcode = *(reg->ocs + i);
151
#else
152
    opcode = op->opcode;
153
#endif
154
155
729k
    switch (opcode) {
156
2.00k
    case OP_STR_MBN:
157
2.00k
      if (! is_in_string_pool(reg, op->exact_len_n.s))
158
0
        xfree(op->exact_len_n.s);
159
2.00k
      break;
160
5.58k
    case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N:
161
5.58k
      if (! is_in_string_pool(reg, op->exact_n.s))
162
0
        xfree(op->exact_n.s);
163
5.58k
      break;
164
63.5k
    case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
165
93.7k
    case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
166
96.9k
    case OP_STR_MB2N3:
167
96.9k
      break;
168
169
49.9k
    case OP_CCLASS_NOT: case OP_CCLASS:
170
49.9k
      xfree(op->cclass.bsp);
171
49.9k
      break;
172
173
98.2k
    case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
174
98.2k
      xfree(op->cclass_mb.mb);
175
98.2k
      break;
176
6.55k
    case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
177
6.55k
      xfree(op->cclass_mix.mb);
178
6.55k
      xfree(op->cclass_mix.bsp);
179
6.55k
      break;
180
181
4.87k
    case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
182
4.87k
      break;
183
1.06k
    case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC:
184
1.26k
    case OP_BACKREF_CHECK:
185
1.26k
#ifdef USE_BACKREF_WITH_LEVEL
186
1.56k
    case OP_BACKREF_WITH_LEVEL:
187
1.90k
    case OP_BACKREF_WITH_LEVEL_IC:
188
2.13k
    case OP_BACKREF_CHECK_WITH_LEVEL:
189
2.13k
#endif
190
2.13k
      if (op->backref_general.num != 1)
191
1.11k
        xfree(op->backref_general.ns);
192
2.13k
      break;
193
194
463k
    default:
195
463k
      break;
196
729k
    }
197
729k
  }
198
199
25.6k
  xfree(reg->ops);
200
25.6k
#ifdef USE_DIRECT_THREADED_CODE
201
25.6k
  xfree(reg->ocs);
202
25.6k
  reg->ocs = 0;
203
25.6k
#endif
204
205
25.6k
  reg->ops = 0;
206
25.6k
  reg->ops_curr  = 0;
207
25.6k
  reg->ops_alloc = 0;
208
25.6k
  reg->ops_used  = 0;
209
25.6k
}
210
211
static int
212
ops_calc_size_of_string_pool(regex_t* reg)
213
18.5k
{
214
18.5k
  int i;
215
18.5k
  int total;
216
217
18.5k
  if (IS_NULL(reg->ops)) return 0;
218
219
18.5k
  total = 0;
220
748k
  for (i = 0; i < (int )reg->ops_used; i++) {
221
729k
    enum OpCode opcode;
222
729k
    Operation* op;
223
224
729k
    op = reg->ops + i;
225
729k
#ifdef USE_DIRECT_THREADED_CODE
226
729k
    opcode = *(reg->ocs + i);
227
#else
228
    opcode = op->opcode;
229
#endif
230
231
729k
    switch (opcode) {
232
2.00k
    case OP_STR_MBN:
233
2.00k
      total += op->exact_len_n.len * op->exact_len_n.n;
234
2.00k
      break;
235
3.07k
    case OP_STR_N:
236
3.40k
    case OP_STR_MB2N:
237
3.40k
      total += op->exact_n.n * 2;
238
3.40k
      break;
239
2.17k
    case OP_STR_MB3N:
240
2.17k
      total += op->exact_n.n * 3;
241
2.17k
      break;
242
243
722k
    default:
244
722k
      break;
245
729k
    }
246
729k
  }
247
248
18.5k
  return total;
249
18.5k
}
250
251
static int
252
ops_make_string_pool(regex_t* reg)
253
18.5k
{
254
18.5k
  int i;
255
18.5k
  int len;
256
18.5k
  int size;
257
18.5k
  UChar* pool;
258
18.5k
  UChar* curr;
259
260
18.5k
  size = ops_calc_size_of_string_pool(reg);
261
18.5k
  if (size <= 0) {
262
15.5k
    return 0;
263
15.5k
  }
264
265
2.96k
  curr = pool = (UChar* )xmalloc((size_t )size);
266
2.96k
  CHECK_NULL_RETURN_MEMERR(pool);
267
268
273k
  for (i = 0; i < (int )reg->ops_used; i++) {
269
270k
    enum OpCode opcode;
270
270k
    Operation* op;
271
272
270k
    op = reg->ops + i;
273
270k
#ifdef USE_DIRECT_THREADED_CODE
274
270k
    opcode = *(reg->ocs + i);
275
#else
276
    opcode = op->opcode;
277
#endif
278
279
270k
    switch (opcode) {
280
2.00k
    case OP_STR_MBN:
281
2.00k
      len = op->exact_len_n.len * op->exact_len_n.n;
282
2.00k
      xmemcpy(curr, op->exact_len_n.s, len);
283
2.00k
      xfree(op->exact_len_n.s);
284
2.00k
      op->exact_len_n.s = curr;
285
2.00k
      curr += len;
286
2.00k
      break;
287
3.07k
    case OP_STR_N:
288
3.07k
      len = op->exact_n.n;
289
5.58k
    copy:
290
5.58k
      xmemcpy(curr, op->exact_n.s, len);
291
5.58k
      xfree(op->exact_n.s);
292
5.58k
      op->exact_n.s = curr;
293
5.58k
      curr += len;
294
5.58k
      break;
295
331
    case OP_STR_MB2N:
296
331
      len = op->exact_n.n * 2;
297
331
      goto copy;
298
0
      break;
299
2.17k
    case OP_STR_MB3N:
300
2.17k
      len = op->exact_n.n * 3;
301
2.17k
      goto copy;
302
0
      break;
303
304
262k
    default:
305
262k
      break;
306
270k
    }
307
270k
  }
308
309
2.96k
  reg->string_pool     = pool;
310
2.96k
  reg->string_pool_end = pool + size;
311
2.96k
  return 0;
312
2.96k
}
313
314
extern OnigCaseFoldType
315
onig_get_default_case_fold_flag(void)
316
0
{
317
0
  return OnigDefaultCaseFoldFlag;
318
0
}
319
320
extern int
321
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
322
0
{
323
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
324
0
  return 0;
325
0
}
326
327
static int
328
len_multiply_cmp(OnigLen x, int y, OnigLen v)
329
14.1k
{
330
14.1k
  if (x == 0 || y == 0) return -1;
331
332
14.1k
  if (x < INFINITE_LEN / y) {
333
13.9k
    OnigLen xy = x * (OnigLen )y;
334
13.9k
    if (xy > v) return 1;
335
3.95k
    else {
336
3.95k
      if (xy == v) return 0;
337
3.16k
      else return -1;
338
3.95k
    }
339
13.9k
  }
340
264
  else
341
264
    return v == INFINITE_LEN ? 0 : 1;
342
14.1k
}
343
344
extern int
345
onig_positive_int_multiply(int x, int y)
346
10.6k
{
347
10.6k
  if (x == 0 || y == 0) return 0;
348
349
9.40k
  if (x < ONIG_INT_MAX / y)
350
9.34k
    return x * y;
351
56
  else
352
56
    return -1;
353
9.40k
}
354
355
356
static void
357
node_swap(Node* a, Node* b)
358
70.7k
{
359
70.7k
  Node c;
360
361
70.7k
  c = *a; *a = *b; *b = c;
362
363
70.7k
  if (ND_TYPE(a) == ND_STRING) {
364
30.3k
    StrNode* sn = STR_(a);
365
30.3k
    if (sn->capacity == 0) {
366
30.1k
      int len = (int )(sn->end - sn->s);
367
30.1k
      sn->s   = sn->buf;
368
30.1k
      sn->end = sn->s + len;
369
30.1k
    }
370
30.3k
  }
371
372
70.7k
  if (ND_TYPE(b) == ND_STRING) {
373
62.5k
    StrNode* sn = STR_(b);
374
62.5k
    if (sn->capacity == 0) {
375
61.5k
      int len = (int )(sn->end - sn->s);
376
61.5k
      sn->s   = sn->buf;
377
61.5k
      sn->end = sn->s + len;
378
61.5k
    }
379
62.5k
  }
380
70.7k
}
381
382
static int
383
node_list_len(Node* list)
384
62.5k
{
385
62.5k
  int len;
386
387
62.5k
  len = 1;
388
123k
  while (IS_NOT_NULL(ND_CDR(list))) {
389
60.6k
    list = ND_CDR(list);
390
60.6k
    len++;
391
60.6k
  }
392
393
62.5k
  return len;
394
62.5k
}
395
396
static Node*
397
node_list_add(Node* list, Node* x)
398
60.6k
{
399
60.6k
  Node *n;
400
401
60.6k
  n = onig_node_new_list(x, NULL);
402
60.6k
  if (IS_NULL(n)) return NULL_NODE;
403
404
60.6k
  if (IS_NOT_NULL(list)) {
405
889k
    while (IS_NOT_NULL(ND_CDR(list)))
406
828k
      list = ND_CDR(list);
407
408
60.6k
    ND_CDR(list) = n;
409
60.6k
  }
410
411
60.6k
  return n;
412
60.6k
}
413
414
static int
415
node_str_node_cat(Node* node, Node* add)
416
30.9k
{
417
30.9k
  int r;
418
419
30.9k
  if (ND_STATUS(node) != ND_STATUS(add))
420
0
    return ONIGERR_TYPE_BUG;
421
422
30.9k
  if (STR_(node)->flag != STR_(add)->flag)
423
0
    return ONIGERR_TYPE_BUG;
424
425
30.9k
  r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
426
30.9k
  if (r != 0) return r;
427
428
30.9k
  return 0;
429
30.9k
}
430
431
static void
432
node_conv_to_str_node(Node* node, Node* ref_node)
433
902
{
434
902
  xmemset(node, 0, sizeof(*node));
435
902
  ND_SET_TYPE(node, ND_STRING);
436
902
  ND_STATUS(node) = ND_STATUS(ref_node);
437
438
902
  STR_(node)->flag     = STR_(ref_node)->flag;
439
902
  STR_(node)->s        = STR_(node)->buf;
440
902
  STR_(node)->end      = STR_(node)->buf;
441
902
  STR_(node)->capacity = 0;
442
902
}
443
444
static OnigLen
445
distance_add(OnigLen d1, OnigLen d2)
446
1.55M
{
447
1.55M
  if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
448
183k
    return INFINITE_LEN;
449
1.37M
  else {
450
1.37M
    if (d1 <= INFINITE_LEN - d2) return d1 + d2;
451
938
    else return INFINITE_LEN;
452
1.37M
  }
453
1.55M
}
454
455
static OnigLen
456
distance_multiply(OnigLen d, int m)
457
89.7k
{
458
89.7k
  if (m == 0) return 0;
459
460
67.8k
  if (d < INFINITE_LEN / m)
461
63.6k
    return d * m;
462
4.14k
  else
463
4.14k
    return INFINITE_LEN;
464
67.8k
}
465
466
static int
467
bitset_is_empty(BitSetRef bs)
468
101k
{
469
101k
  int i;
470
471
866k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {
472
772k
    if (bs[i] != 0) return 0;
473
772k
  }
474
94.4k
  return 1;
475
101k
}
476
477
#ifdef USE_CALL
478
479
static int
480
unset_addr_list_init(UnsetAddrList* list, int size)
481
3.47k
{
482
3.47k
  UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
483
3.47k
  CHECK_NULL_RETURN_MEMERR(p);
484
485
3.47k
  list->num   = 0;
486
3.47k
  list->alloc = size;
487
3.47k
  list->us    = p;
488
3.47k
  return 0;
489
3.47k
}
490
491
static void
492
unset_addr_list_end(UnsetAddrList* list)
493
3.47k
{
494
3.47k
  if (IS_NOT_NULL(list->us))
495
3.47k
    xfree(list->us);
496
3.47k
}
497
498
static int
499
unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
500
3.96k
{
501
3.96k
  UnsetAddr* p;
502
3.96k
  int size;
503
504
3.96k
  if (list->num >= list->alloc) {
505
237
    size = list->alloc * 2;
506
237
    p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size);
507
237
    CHECK_NULL_RETURN_MEMERR(p);
508
237
    list->alloc = size;
509
237
    list->us    = p;
510
237
  }
511
512
3.96k
  list->us[list->num].offset = offset;
513
3.96k
  list->us[list->num].target = node;
514
3.96k
  list->num++;
515
3.96k
  return 0;
516
3.96k
}
517
#endif /* USE_CALL */
518
519
enum CharLenReturnType {
520
  CHAR_LEN_NORMAL = 0,       /* fixed or variable */
521
  CHAR_LEN_TOP_ALT_FIXED = 1
522
};
523
524
static int
525
mmcl_fixed(MinMaxCharLen* c)
526
31.2k
{
527
31.2k
  return (c->min == c->max && c->min != INFINITE_LEN);
528
31.2k
}
529
530
static void
531
mmcl_set(MinMaxCharLen* l, OnigLen len)
532
100k
{
533
100k
  l->min = len;
534
100k
  l->max = len;
535
100k
  l->min_is_sure = TRUE;
536
100k
}
537
538
static void
539
mmcl_set_min_max(MinMaxCharLen* l, OnigLen min, OnigLen max, int min_is_sure)
540
6.49k
{
541
6.49k
  l->min = min;
542
6.49k
  l->max = max;
543
6.49k
  l->min_is_sure = min_is_sure;
544
6.49k
}
545
546
static void
547
mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)
548
63.5k
{
549
63.5k
  to->min = distance_add(to->min, add->min);
550
63.5k
  to->max = distance_add(to->max, add->max);
551
552
63.5k
  to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;
553
63.5k
}
554
555
static void
556
mmcl_multiply(MinMaxCharLen* to, int m)
557
3.85k
{
558
3.85k
  to->min = distance_multiply(to->min, m);
559
3.85k
  to->max = distance_multiply(to->max, m);
560
3.85k
}
561
562
static void
563
mmcl_repeat_range_multiply(MinMaxCharLen* to, int mlow, int mhigh)
564
10.7k
{
565
10.7k
  to->min = distance_multiply(to->min, mlow);
566
567
10.7k
  if (IS_INFINITE_REPEAT(mhigh))
568
8.52k
    to->max = INFINITE_LEN;
569
2.21k
  else
570
2.21k
    to->max = distance_multiply(to->max, mhigh);
571
10.7k
}
572
573
static void
574
mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)
575
25.9k
{
576
25.9k
  if (to->min > alt->min) {
577
3.15k
    to->min         = alt->min;
578
3.15k
    to->min_is_sure = alt->min_is_sure;
579
3.15k
  }
580
22.7k
  else if (to->min == alt->min) {
581
7.26k
    if (alt->min_is_sure != FALSE)
582
3.55k
      to->min_is_sure = TRUE;
583
7.26k
  }
584
585
25.9k
  if (to->max < alt->max) to->max = alt->max;
586
25.9k
}
587
588
#ifndef ONIG_DONT_OPTIMIZE
589
590
static int
591
mml_is_equal(MinMaxLen* a, MinMaxLen* b)
592
5.99k
{
593
5.99k
  return a->min == b->min && a->max == b->max;
594
5.99k
}
595
596
static void
597
mml_set_min_max(MinMaxLen* l, OnigLen min, OnigLen max)
598
311k
{
599
311k
  l->min = min;
600
311k
  l->max = max;
601
311k
}
602
603
static void
604
mml_clear(MinMaxLen* l)
605
2.23M
{
606
2.23M
  l->min = l->max = 0;
607
2.23M
}
608
609
static void
610
mml_copy(MinMaxLen* to, MinMaxLen* from)
611
1.44M
{
612
1.44M
  to->min = from->min;
613
1.44M
  to->max = from->max;
614
1.44M
}
615
616
static void
617
mml_add(MinMaxLen* to, MinMaxLen* add)
618
572k
{
619
572k
  to->min = distance_add(to->min, add->min);
620
572k
  to->max = distance_add(to->max, add->max);
621
572k
}
622
623
static void
624
mml_alt_merge(MinMaxLen* to, MinMaxLen* alt)
625
127k
{
626
127k
  if (to->min > alt->min) to->min = alt->min;
627
127k
  if (to->max < alt->max) to->max = alt->max;
628
127k
}
629
630
#endif
631
632
/* fixed size pattern node only */
633
static int
634
node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env,
635
               int level)
636
167k
{
637
167k
  MinMaxCharLen tci;
638
167k
  int r = CHAR_LEN_NORMAL;
639
640
167k
  level++;
641
642
167k
  switch (ND_TYPE(node)) {
643
31.5k
  case ND_LIST:
644
31.5k
    {
645
31.5k
      int first = TRUE;
646
94.4k
      do {
647
94.4k
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
648
94.4k
        if (r < 0) break;
649
94.3k
        if (first == TRUE) {
650
31.5k
          *ci = tci;
651
31.5k
          first = FALSE;
652
31.5k
        }
653
62.8k
        else
654
62.8k
          mmcl_add(ci, &tci);
655
94.3k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
656
31.5k
    }
657
0
    break;
658
659
4.57k
  case ND_ALT:
660
4.57k
    {
661
4.57k
      int fixed;
662
663
4.57k
      r = node_char_len1(ND_CAR(node), reg, ci, env, level);
664
4.57k
      if (r < 0) break;
665
666
4.53k
      fixed = TRUE;
667
27.3k
      while (IS_NOT_NULL(node = ND_CDR(node))) {
668
22.8k
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
669
22.8k
        if (r < 0) break;
670
22.8k
        if (! mmcl_fixed(&tci))
671
608
          fixed = FALSE;
672
22.8k
        mmcl_alt_merge(ci, &tci);
673
22.8k
      }
674
4.53k
      if (r < 0) break;
675
676
4.51k
      r = CHAR_LEN_NORMAL;
677
4.51k
      if (mmcl_fixed(ci)) break;
678
679
3.31k
      if (fixed == TRUE && level == 1) {
680
1.54k
        r = CHAR_LEN_TOP_ALT_FIXED;
681
1.54k
      }
682
3.31k
    }
683
0
    break;
684
685
33.8k
  case ND_STRING:
686
33.8k
    {
687
33.8k
      OnigLen clen;
688
33.8k
      StrNode* sn = STR_(node);
689
33.8k
      UChar *s = sn->s;
690
691
33.8k
      if (ND_IS_REAL_IGNORECASE(node) &&
692
33.8k
          CASE_FOLD_IS_NOT_ASCII_ONLY(env->case_fold_flag)) {
693
        /* Such a case is possible.
694
           ex. /(?i)(?<=\1)(a)/
695
           Backref node refer to capture group, but it doesn't tune yet.
696
         */
697
54
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
698
54
        break;
699
54
      }
700
701
33.7k
      clen = 0;
702
90.6k
      while (s < sn->end) {
703
56.9k
        s += enclen(reg->enc, s);
704
56.9k
        clen = distance_add(clen, 1);
705
56.9k
      }
706
33.7k
      mmcl_set(ci, clen);
707
33.7k
    }
708
0
    break;
709
710
16.4k
  case ND_QUANT:
711
16.4k
    {
712
16.4k
      QuantNode* qn = QUANT_(node);
713
714
16.4k
      if (qn->lower == qn->upper) {
715
5.64k
        if (qn->upper == 0) {
716
1.77k
          mmcl_set(ci, 0);
717
1.77k
        }
718
3.86k
        else {
719
3.86k
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
720
3.86k
          if (r < 0) break;
721
3.85k
          mmcl_multiply(ci, qn->lower);
722
3.85k
        }
723
5.64k
      }
724
10.8k
      else {
725
10.8k
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
726
10.8k
        if (r < 0) break;
727
10.7k
        mmcl_repeat_range_multiply(ci, qn->lower, qn->upper);
728
10.7k
      }
729
16.4k
    }
730
16.3k
    break;
731
732
16.3k
#ifdef USE_CALL
733
16.3k
  case ND_CALL:
734
938
    if (ND_IS_RECURSION(node))
735
418
      mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
736
520
    else
737
520
      r = node_char_len1(ND_BODY(node), reg, ci, env, level);
738
938
    break;
739
0
#endif
740
741
6.85k
  case ND_CTYPE:
742
53.0k
  case ND_CCLASS:
743
53.0k
    mmcl_set(ci, 1);
744
53.0k
    break;
745
746
13.3k
  case ND_BAG:
747
13.3k
    {
748
13.3k
      BagNode* en = BAG_(node);
749
750
13.3k
      switch (en->type) {
751
8.19k
      case BAG_MEMORY:
752
8.19k
        if (ND_IS_FIXED_CLEN(node)) {
753
5.56k
          mmcl_set_min_max(ci, en->min_char_len, en->max_char_len,
754
5.56k
                           ND_IS_FIXED_CLEN_MIN_SURE(node));
755
5.56k
        }
756
2.62k
        else {
757
2.62k
          if (ND_IS_MARK1(node)) {
758
204
            mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
759
204
          }
760
2.41k
          else {
761
2.41k
            ND_STATUS_ADD(node, MARK1);
762
2.41k
            r = node_char_len1(ND_BODY(node), reg, ci, env, level);
763
2.41k
            ND_STATUS_REMOVE(node, MARK1);
764
2.41k
            if (r < 0) break;
765
766
2.34k
            en->min_char_len = ci->min;
767
2.34k
            en->max_char_len = ci->max;
768
2.34k
            ND_STATUS_ADD(node, FIXED_CLEN);
769
2.34k
            if (ci->min_is_sure != FALSE)
770
2.06k
              ND_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);
771
2.34k
          }
772
2.62k
        }
773
        /* can't optimize look-behind if capture exists. */
774
8.11k
        ci->min_is_sure = FALSE;
775
8.11k
        break;
776
74
      case BAG_OPTION:
777
3.71k
      case BAG_STOP_BACKTRACK:
778
3.71k
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
779
3.71k
        break;
780
1.45k
      case BAG_IF_ELSE:
781
1.45k
        {
782
1.45k
          MinMaxCharLen eci;
783
784
1.45k
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
785
1.45k
          if (r < 0) break;
786
787
1.45k
          if (IS_NOT_NULL(en->te.Then)) {
788
768
            r = node_char_len1(en->te.Then, reg, &tci, env, level);
789
768
            if (r < 0) break;
790
762
            mmcl_add(ci, &tci);
791
762
          }
792
793
1.44k
          if (IS_NOT_NULL(en->te.Else)) {
794
1.00k
            r = node_char_len1(en->te.Else, reg, &eci, env, level);
795
1.00k
            if (r < 0) break;
796
1.00k
          }
797
438
          else {
798
438
            mmcl_set(&eci, 0);
799
438
          }
800
801
1.44k
          mmcl_alt_merge(ci, &eci);
802
1.44k
        }
803
0
        break;
804
0
      default: /* never come here */
805
0
        r = ONIGERR_PARSER_BUG;
806
0
        break;
807
13.3k
      }
808
13.3k
    }
809
13.3k
    break;
810
811
13.3k
  case ND_GIMMICK:
812
4.24k
    mmcl_set(ci, 0);
813
4.24k
    break;
814
815
7.22k
  case ND_ANCHOR:
816
7.38k
  zero:
817
7.38k
    mmcl_set(ci, 0);
818
    /* can't optimize look-behind if anchor exists. */
819
7.38k
    ci->min_is_sure = FALSE;
820
7.38k
    break;
821
822
2.74k
  case ND_BACKREF:
823
2.74k
    if (ND_IS_CHECKER(node))
824
154
      goto zero;
825
826
2.59k
    if (ND_IS_RECURSION(node)) {
827
304
#ifdef USE_BACKREF_WITH_LEVEL
828
304
      if (ND_IS_NEST_LEVEL(node)) {
829
62
        mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
830
62
        break;
831
62
      }
832
242
#endif
833
834
242
      mmcl_set_min_max(ci, 0, 0, FALSE);
835
242
      break;
836
304
    }
837
838
2.28k
    {
839
2.28k
      int i;
840
2.28k
      int* backs;
841
2.28k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
842
2.28k
      BackRefNode* br = BACKREF_(node);
843
844
2.28k
      backs = BACKREFS_P(br);
845
2.28k
      r = node_char_len1(mem_env[backs[0]].mem_node, reg, ci, env, level);
846
2.28k
      if (r < 0) break;
847
2.25k
      if (! mmcl_fixed(ci)) ci->min_is_sure = FALSE;
848
849
3.90k
      for (i = 1; i < br->back_num; i++) {
850
1.65k
        r = node_char_len1(mem_env[backs[i]].mem_node, reg, &tci, env, level);
851
1.65k
        if (r < 0) break;
852
1.65k
        if (! mmcl_fixed(&tci)) tci.min_is_sure = FALSE;
853
1.65k
        mmcl_alt_merge(ci, &tci);
854
1.65k
      }
855
2.25k
    }
856
0
    break;
857
858
0
  default: /* never come here */
859
0
    r = ONIGERR_PARSER_BUG;
860
0
    break;
861
167k
  }
862
863
167k
  return r;
864
167k
}
865
866
static int
867
node_char_len(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env)
868
17.5k
{
869
17.5k
  return node_char_len1(node, reg, ci, env, 0);
870
17.5k
}
871
872
873
static int
874
add_op(regex_t* reg, int opcode)
875
729k
{
876
729k
  int r;
877
878
729k
  r = ops_new(reg);
879
729k
  if (r != ONIG_NORMAL) return r;
880
881
729k
#ifdef USE_DIRECT_THREADED_CODE
882
729k
  *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
883
#else
884
  reg->ops_curr->opcode = opcode;
885
#endif
886
887
729k
  return 0;
888
729k
}
889
890
static int compile_length_tree(Node* node, regex_t* reg, ParseEnv* env);
891
static int compile_tree(Node* node, regex_t* reg, ParseEnv* env);
892
893
894
#define IS_NEED_STR_LEN_OP(op) \
895
102k
   ((op) == OP_STR_N    || (op) == OP_STR_MB2N ||\
896
102k
    (op) == OP_STR_MB3N || (op) == OP_STR_MBN)
897
898
static int
899
select_str_opcode(int mb_len, int str_len)
900
104k
{
901
104k
  int op;
902
903
104k
  switch (mb_len) {
904
67.8k
  case 1:
905
67.8k
    switch (str_len) {
906
37.9k
    case 1:  op = OP_STR_1; break;
907
20.1k
    case 2:  op = OP_STR_2; break;
908
3.80k
    case 3:  op = OP_STR_3; break;
909
1.70k
    case 4:  op = OP_STR_4; break;
910
1.21k
    case 5:  op = OP_STR_5; break;
911
3.07k
    default: op = OP_STR_N; break;
912
67.8k
    }
913
67.8k
    break;
914
915
67.8k
  case 2:
916
32.5k
    switch (str_len) {
917
18.3k
    case 1:  op = OP_STR_MB2N1; break;
918
10.6k
    case 2:  op = OP_STR_MB2N2; break;
919
3.25k
    case 3:  op = OP_STR_MB2N3; break;
920
331
    default: op = OP_STR_MB2N;  break;
921
32.5k
    }
922
32.5k
    break;
923
924
32.5k
  case 3:
925
2.17k
    op = OP_STR_MB3N;
926
2.17k
    break;
927
928
2.00k
  default:
929
2.00k
    op = OP_STR_MBN;
930
2.00k
    break;
931
104k
  }
932
933
104k
  return op;
934
104k
}
935
936
static int
937
is_strict_real_node(Node* node)
938
35.1k
{
939
35.1k
  switch (ND_TYPE(node)) {
940
9.49k
  case ND_STRING:
941
9.49k
    {
942
9.49k
      StrNode* sn = STR_(node);
943
9.49k
      return (sn->end != sn->s);
944
0
    }
945
0
    break;
946
947
6.33k
  case ND_CCLASS:
948
12.9k
  case ND_CTYPE:
949
12.9k
    return 1;
950
0
    break;
951
952
12.7k
  default:
953
12.7k
    return 0;
954
0
    break;
955
35.1k
  }
956
35.1k
}
957
958
static int
959
compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ParseEnv* env)
960
25.5k
{
961
25.5k
  int r;
962
25.5k
  int saved_num_empty_check;
963
25.5k
  int emptiness;
964
25.5k
  Node* body;
965
966
25.5k
  body = ND_BODY((Node* )qn);
967
25.5k
  emptiness = qn->emptiness;
968
25.5k
  saved_num_empty_check = reg->num_empty_check;
969
970
25.5k
  if (emptiness != BODY_IS_NOT_EMPTY) {
971
5.55k
    r = add_op(reg, OP_EMPTY_CHECK_START);
972
5.55k
    if (r != 0) return r;
973
5.55k
    COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
974
5.55k
    reg->num_empty_check++;
975
5.55k
  }
976
977
25.5k
  r = compile_tree(body, reg, env);
978
25.5k
  if (r != 0) return r;
979
980
25.5k
  if (emptiness != BODY_IS_NOT_EMPTY) {
981
5.55k
    if (emptiness == BODY_MAY_BE_EMPTY)
982
2.62k
      r = add_op(reg, OP_EMPTY_CHECK_END);
983
2.93k
    else if (emptiness == BODY_MAY_BE_EMPTY_MEM) {
984
2.34k
      if (ND_IS_EMPTY_STATUS_CHECK(qn) != 0 && qn->empty_status_mem != 0) {
985
495
        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
986
495
        if (r != 0) return r;
987
495
        COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
988
495
      }
989
1.85k
      else
990
1.85k
        r = add_op(reg, OP_EMPTY_CHECK_END);
991
2.34k
    }
992
583
#ifdef USE_CALL
993
583
    else if (emptiness == BODY_MAY_BE_EMPTY_REC) {
994
583
      r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
995
583
      if (r != 0) return r;
996
583
      COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
997
583
    }
998
5.55k
#endif
999
1000
5.55k
    if (r != 0) return r;
1001
5.55k
    COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
1002
5.55k
  }
1003
25.5k
  return r;
1004
25.5k
}
1005
1006
#ifdef USE_CALL
1007
static int
1008
compile_call(CallNode* node, regex_t* reg, ParseEnv* env)
1009
3.96k
{
1010
3.96k
  int r;
1011
3.96k
  int offset;
1012
1013
3.96k
  r = add_op(reg, OP_CALL);
1014
3.96k
  if (r != 0) return r;
1015
1016
3.96k
  COP(reg)->call.addr = 0; /* dummy addr. */
1017
#ifdef ONIG_DEBUG_MATCH_COUNTER
1018
  COP(reg)->call.called_mem = node->called_gnum;
1019
#endif
1020
1021
3.96k
  offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
1022
3.96k
  r = unset_addr_list_add(env->unset_addr_list, offset, ND_CALL_BODY(node));
1023
3.96k
  return r;
1024
3.96k
}
1025
#endif
1026
1027
static int
1028
compile_tree_n_times(Node* node, int n, regex_t* reg, ParseEnv* env)
1029
34.7k
{
1030
34.7k
  int i, r;
1031
1032
56.8k
  for (i = 0; i < n; i++) {
1033
22.0k
    r = compile_tree(node, reg, env);
1034
22.0k
    if (r != 0) return r;
1035
22.0k
  }
1036
34.7k
  return 0;
1037
34.7k
}
1038
1039
static int
1040
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
1041
                          regex_t* reg ARG_UNUSED)
1042
240k
{
1043
240k
  return 1;
1044
240k
}
1045
1046
static int
1047
add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
1048
104k
{
1049
104k
  int op;
1050
104k
  int r;
1051
104k
  int byte_len;
1052
104k
  UChar* p;
1053
104k
  UChar* end;
1054
1055
104k
  op = select_str_opcode(mb_len, str_len);
1056
104k
  r = add_op(reg, op);
1057
104k
  if (r != 0) return r;
1058
1059
104k
  byte_len = mb_len * str_len;
1060
104k
  end = s + byte_len;
1061
1062
104k
  if (op == OP_STR_MBN) {
1063
2.00k
    p = onigenc_strdup(reg->enc, s, end);
1064
2.00k
    CHECK_NULL_RETURN_MEMERR(p);
1065
1066
2.00k
    COP(reg)->exact_len_n.len = mb_len;
1067
2.00k
    COP(reg)->exact_len_n.n   = str_len;
1068
2.00k
    COP(reg)->exact_len_n.s   = p;
1069
2.00k
  }
1070
102k
  else if (IS_NEED_STR_LEN_OP(op)) {
1071
5.58k
    p = onigenc_strdup(reg->enc, s, end);
1072
5.58k
    CHECK_NULL_RETURN_MEMERR(p);
1073
5.58k
    COP(reg)->exact_n.n = str_len;
1074
5.58k
    COP(reg)->exact_n.s = p;
1075
5.58k
  }
1076
96.9k
  else {
1077
96.9k
    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
1078
96.9k
    xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
1079
96.9k
  }
1080
1081
104k
  return 0;
1082
104k
}
1083
1084
static int
1085
compile_length_string_node(Node* node, regex_t* reg)
1086
256k
{
1087
256k
  int rlen, r, len, prev_len, slen;
1088
256k
  UChar *p, *prev;
1089
256k
  StrNode* sn;
1090
256k
  OnigEncoding enc = reg->enc;
1091
1092
256k
  sn = STR_(node);
1093
256k
  if (sn->end <= sn->s)
1094
33.9k
    return 0;
1095
1096
222k
  p = prev = sn->s;
1097
222k
  prev_len = enclen(enc, p);
1098
222k
  p += prev_len;
1099
222k
  slen = 1;
1100
222k
  rlen = 0;
1101
1102
412k
  for (; p < sn->end; ) {
1103
189k
    len = enclen(enc, p);
1104
189k
    if (len == prev_len) {
1105
179k
      slen++;
1106
179k
    }
1107
10.0k
    else {
1108
10.0k
      r = add_compile_string_length(prev, prev_len, slen, reg);
1109
10.0k
      rlen += r;
1110
10.0k
      prev = p;
1111
10.0k
      slen = 1;
1112
10.0k
      prev_len = len;
1113
10.0k
    }
1114
189k
    p += len;
1115
189k
  }
1116
1117
222k
  r = add_compile_string_length(prev, prev_len, slen, reg);
1118
222k
  rlen += r;
1119
222k
  return rlen;
1120
256k
}
1121
1122
static int
1123
compile_length_string_crude_node(StrNode* sn, regex_t* reg)
1124
7.76k
{
1125
7.76k
  if (sn->end <= sn->s)
1126
0
    return 0;
1127
1128
7.76k
  return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
1129
7.76k
                                   reg);
1130
7.76k
}
1131
1132
static int
1133
compile_string_node(Node* node, regex_t* reg)
1134
113k
{
1135
113k
  int r, len, prev_len, slen;
1136
113k
  UChar *p, *prev, *end;
1137
113k
  StrNode* sn;
1138
113k
  OnigEncoding enc = reg->enc;
1139
1140
113k
  sn = STR_(node);
1141
113k
  if (sn->end <= sn->s)
1142
15.2k
    return 0;
1143
1144
97.8k
  end = sn->end;
1145
1146
97.8k
  p = prev = sn->s;
1147
97.8k
  prev_len = enclen(enc, p);
1148
97.8k
  p += prev_len;
1149
97.8k
  slen = 1;
1150
1151
213k
  for (; p < end; ) {
1152
115k
    len = enclen(enc, p);
1153
115k
    if (len == prev_len) {
1154
110k
      slen++;
1155
110k
    }
1156
4.94k
    else {
1157
4.94k
      r = add_compile_string(prev, prev_len, slen, reg);
1158
4.94k
      if (r != 0) return r;
1159
1160
4.94k
      prev  = p;
1161
4.94k
      slen  = 1;
1162
4.94k
      prev_len = len;
1163
4.94k
    }
1164
1165
115k
    p += len;
1166
115k
  }
1167
1168
97.8k
  return add_compile_string(prev, prev_len, slen, reg);
1169
97.8k
}
1170
1171
static int
1172
compile_string_crude_node(StrNode* sn, regex_t* reg)
1173
1.78k
{
1174
1.78k
  if (sn->end <= sn->s)
1175
0
    return 0;
1176
1177
1.78k
  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
1178
1.78k
}
1179
1180
static void*
1181
set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
1182
104k
{
1183
104k
  size_t len;
1184
104k
  void* p;
1185
1186
104k
  len = (size_t )mbuf->used;
1187
104k
  p = xmalloc(len);
1188
104k
  if (IS_NULL(p)) return NULL;
1189
1190
104k
  xmemcpy(p, mbuf->p, len);
1191
104k
  return p;
1192
104k
}
1193
1194
static int
1195
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
1196
411k
{
1197
411k
  return 1;
1198
411k
}
1199
1200
static int
1201
compile_cclass_node(CClassNode* cc, regex_t* reg)
1202
154k
{
1203
154k
  int r;
1204
1205
154k
  if (IS_NULL(cc->mbuf)) {
1206
49.9k
    r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
1207
49.9k
    if (r != 0) return r;
1208
1209
49.9k
    COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
1210
49.9k
    CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
1211
49.9k
    xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
1212
49.9k
  }
1213
104k
  else {
1214
104k
    void* p;
1215
1216
104k
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
1217
98.2k
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
1218
98.2k
      if (r != 0) return r;
1219
1220
98.2k
      p = set_multi_byte_cclass(cc->mbuf, reg);
1221
98.2k
      CHECK_NULL_RETURN_MEMERR(p);
1222
98.2k
      COP(reg)->cclass_mb.mb = p;
1223
98.2k
    }
1224
6.55k
    else {
1225
6.55k
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
1226
6.55k
      if (r != 0) return r;
1227
1228
6.55k
      COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
1229
6.55k
      CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
1230
6.55k
      xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
1231
1232
6.55k
      p = set_multi_byte_cclass(cc->mbuf, reg);
1233
6.55k
      CHECK_NULL_RETURN_MEMERR(p);
1234
6.55k
      COP(reg)->cclass_mix.mb = p;
1235
6.55k
    }
1236
104k
  }
1237
1238
154k
  return 0;
1239
154k
}
1240
1241
static void
1242
set_addr_in_repeat_range(regex_t* reg)
1243
18.5k
{
1244
18.5k
  int i;
1245
1246
23.8k
  for (i = 0; i < reg->num_repeat; i++) {
1247
5.27k
    RepeatRange* p = reg->repeat_range + i;
1248
5.27k
    int offset = p->u.offset;
1249
5.27k
    p->u.pcode = reg->ops + offset;
1250
5.27k
  }
1251
18.5k
}
1252
1253
static int
1254
entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
1255
5.27k
{
1256
5.27k
#define REPEAT_RANGE_ALLOC  4
1257
1258
5.27k
  RepeatRange* p;
1259
1260
5.27k
  if (reg->repeat_range_alloc == 0) {
1261
2.45k
    p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
1262
2.45k
    CHECK_NULL_RETURN_MEMERR(p);
1263
2.45k
    reg->repeat_range = p;
1264
2.45k
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
1265
2.45k
  }
1266
2.82k
  else if (reg->repeat_range_alloc <= id) {
1267
342
    int n;
1268
342
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
1269
342
    p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
1270
342
    CHECK_NULL_RETURN_MEMERR(p);
1271
342
    reg->repeat_range = p;
1272
342
    reg->repeat_range_alloc = n;
1273
342
  }
1274
2.47k
  else {
1275
2.47k
    p = reg->repeat_range;
1276
2.47k
  }
1277
1278
5.27k
  p[id].lower    = lower;
1279
5.27k
  p[id].upper    = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
1280
5.27k
  p[id].u.offset = ops_index;
1281
5.27k
  return 0;
1282
5.27k
}
1283
1284
static int
1285
compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
1286
                          regex_t* reg, ParseEnv* env)
1287
5.27k
{
1288
5.27k
  int r;
1289
5.27k
  int num_repeat = reg->num_repeat++;
1290
1291
5.27k
  r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
1292
5.27k
  if (r != 0) return r;
1293
1294
5.27k
  COP(reg)->repeat.id   = num_repeat;
1295
5.27k
  COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
1296
1297
5.27k
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
1298
5.27k
                         COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
1299
5.27k
  if (r != 0) return r;
1300
1301
5.27k
  r = compile_quant_body_with_empty_check(qn, reg, env);
1302
5.27k
  if (r != 0) return r;
1303
1304
5.27k
  r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
1305
5.27k
  if (r != 0) return r;
1306
1307
5.27k
  COP(reg)->repeat_inc.id = num_repeat;
1308
5.27k
  return r;
1309
5.27k
}
1310
1311
static int
1312
is_anychar_infinite_greedy(QuantNode* qn)
1313
96.9k
{
1314
96.9k
  if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
1315
96.9k
      ND_IS_ANYCHAR(ND_QUANT_BODY(qn)))
1316
10.7k
    return 1;
1317
86.1k
  else
1318
86.1k
    return 0;
1319
96.9k
}
1320
1321
40.1k
#define QUANTIFIER_EXPAND_LIMIT_SIZE   10
1322
#define CKN_ON   (ckn > 0)
1323
1324
static int
1325
compile_length_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1326
58.7k
{
1327
58.7k
  int len, mod_tlen;
1328
58.7k
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1329
58.7k
  enum BodyEmptyType emptiness = qn->emptiness;
1330
58.7k
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1331
1332
58.7k
  if (tlen < 0) return tlen;
1333
58.7k
  if (tlen == 0) return 0;
1334
1335
  /* anychar repeat */
1336
57.8k
  if (is_anychar_infinite_greedy(qn)) {
1337
5.00k
    if (qn->lower <= 1 ||
1338
5.00k
        len_multiply_cmp((OnigLen )tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
1339
4.82k
      if (IS_NOT_NULL(qn->next_head_exact))
1340
423
        return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
1341
4.40k
      else
1342
4.40k
        return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
1343
4.82k
    }
1344
5.00k
  }
1345
1346
53.0k
  mod_tlen = tlen;
1347
53.0k
  if (emptiness != BODY_IS_NOT_EMPTY)
1348
10.5k
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1349
1350
53.0k
  if (infinite &&
1351
53.0k
      (qn->lower <= 1 ||
1352
33.1k
       len_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1353
32.5k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1354
3.32k
      len = OPSIZE_JUMP;
1355
3.32k
    }
1356
29.2k
    else {
1357
29.2k
      len = tlen * qn->lower;
1358
29.2k
    }
1359
1360
32.5k
    if (qn->greedy) {
1361
30.7k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1362
30.7k
      if (IS_NOT_NULL(qn->head_exact))
1363
8.07k
        len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
1364
22.6k
      else
1365
22.6k
#endif
1366
22.6k
      if (IS_NOT_NULL(qn->next_head_exact))
1367
1.77k
        len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
1368
20.8k
      else
1369
20.8k
        len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
1370
30.7k
    }
1371
1.86k
    else
1372
1.86k
      len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
1373
32.5k
  }
1374
20.4k
  else if (qn->upper == 0) {
1375
2.02k
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1376
169
      len = OPSIZE_JUMP + tlen;
1377
169
    }
1378
1.85k
    else
1379
1.85k
      len = 0;
1380
2.02k
  }
1381
18.4k
  else if (!infinite && qn->greedy &&
1382
18.4k
           (qn->upper == 1 ||
1383
15.5k
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1384
10.7k
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1385
10.7k
    len = tlen * qn->lower;
1386
10.7k
    len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
1387
10.7k
  }
1388
7.67k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1389
899
    len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
1390
899
  }
1391
6.78k
  else {
1392
6.78k
    len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
1393
6.78k
  }
1394
1395
53.0k
  return len;
1396
57.8k
}
1397
1398
static int
1399
compile_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1400
39.5k
{
1401
39.5k
  int i, r, mod_tlen;
1402
39.5k
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1403
39.5k
  enum BodyEmptyType emptiness = qn->emptiness;
1404
39.5k
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1405
1406
39.5k
  if (tlen < 0) return tlen;
1407
39.5k
  if (tlen == 0) return 0;
1408
1409
39.0k
  if (is_anychar_infinite_greedy(qn) &&
1410
39.0k
      (qn->lower <= 1 ||
1411
5.78k
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1412
5.65k
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1413
5.65k
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1414
5.65k
    if (r != 0) return r;
1415
5.65k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1416
409
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1417
242
                 OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
1418
409
      if (r != 0) return r;
1419
1420
409
      COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
1421
409
      return 0;
1422
409
    }
1423
5.24k
    else {
1424
5.24k
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1425
2.85k
                 OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
1426
5.24k
      return r;
1427
5.24k
    }
1428
5.65k
  }
1429
1430
33.4k
  mod_tlen = tlen;
1431
33.4k
  if (emptiness != BODY_IS_NOT_EMPTY)
1432
7.39k
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1433
1434
33.4k
  if (infinite &&
1435
33.4k
      (qn->lower <= 1 ||
1436
20.7k
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1437
20.3k
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1438
20.3k
    int addr;
1439
1440
20.3k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1441
1.75k
      r = add_op(reg, OP_JUMP);
1442
1.75k
      if (r != 0) return r;
1443
1.75k
      if (qn->greedy) {
1444
1.72k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1445
1.72k
        if (IS_NOT_NULL(qn->head_exact))
1446
249
          COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
1447
1.47k
        else
1448
1.47k
#endif
1449
1.47k
        if (IS_NOT_NULL(qn->next_head_exact))
1450
223
          COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
1451
1.25k
        else
1452
1.25k
          COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
1453
1.72k
      }
1454
30
      else {
1455
30
        COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
1456
30
      }
1457
1.75k
    }
1458
18.5k
    else {
1459
18.5k
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1460
18.5k
      if (r != 0) return r;
1461
18.5k
    }
1462
1463
20.3k
    if (qn->greedy) {
1464
19.3k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1465
19.3k
      if (IS_NOT_NULL(qn->head_exact)) {
1466
4.02k
        r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
1467
4.02k
        if (r != 0) return r;
1468
4.02k
        COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1469
4.02k
        COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0];
1470
1471
4.02k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1472
4.02k
        if (r != 0) return r;
1473
1474
4.02k
        addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
1475
4.02k
      }
1476
15.3k
      else
1477
15.3k
#endif
1478
15.3k
      if (IS_NOT_NULL(qn->next_head_exact)) {
1479
1.35k
        r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
1480
1.35k
        if (r != 0) return r;
1481
1.35k
        COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1482
1.35k
        COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0];
1483
1484
1.35k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1485
1.35k
        if (r != 0) return r;
1486
1487
1.35k
        addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
1488
1.35k
      }
1489
13.9k
      else {
1490
13.9k
        r = add_op(reg, OP_PUSH);
1491
13.9k
        if (r != 0) return r;
1492
13.9k
        COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1493
1494
13.9k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1495
13.9k
        if (r != 0) return r;
1496
1497
13.9k
        addr = -(mod_tlen + (int )OPSIZE_PUSH);
1498
13.9k
      }
1499
1500
19.3k
      r = add_op(reg, OP_JUMP);
1501
19.3k
      if (r != 0) return r;
1502
19.3k
      COP(reg)->jump.addr = addr;
1503
19.3k
    }
1504
954
    else {
1505
954
      r = add_op(reg, OP_JUMP);
1506
954
      if (r != 0) return r;
1507
954
      COP(reg)->jump.addr = mod_tlen + SIZE_INC;
1508
1509
954
      r = compile_quant_body_with_empty_check(qn, reg, env);
1510
954
      if (r != 0) return r;
1511
1512
954
      r = add_op(reg, OP_PUSH);
1513
954
      if (r != 0) return r;
1514
954
      COP(reg)->push.addr = -mod_tlen;
1515
954
    }
1516
20.3k
  }
1517
13.1k
  else if (qn->upper == 0) {
1518
812
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1519
101
      r = add_op(reg, OP_JUMP);
1520
101
      if (r != 0) return r;
1521
101
      COP(reg)->jump.addr = tlen + SIZE_INC;
1522
1523
101
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1524
101
    }
1525
711
    else {
1526
      /* Nothing output */
1527
711
      r = 0;
1528
711
    }
1529
812
  }
1530
12.2k
  else if (! infinite && qn->greedy &&
1531
12.2k
           (qn->upper == 1 ||
1532
10.6k
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1533
6.48k
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1534
6.48k
    int n = qn->upper - qn->lower;
1535
1536
6.48k
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1537
6.48k
    if (r != 0) return r;
1538
1539
11.9k
    for (i = 0; i < n; i++) {
1540
5.48k
      int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
1541
5.48k
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1542
1543
5.48k
      r = add_op(reg, OP_PUSH);
1544
5.48k
      if (r != 0) return r;
1545
5.48k
      COP(reg)->push.addr = v;
1546
1547
5.48k
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1548
5.48k
      if (r != 0) return r;
1549
5.48k
    }
1550
6.48k
  }
1551
5.81k
  else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1552
532
    r = add_op(reg, OP_PUSH);
1553
532
    if (r != 0) return r;
1554
532
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
1555
1556
532
    r = add_op(reg, OP_JUMP);
1557
532
    if (r != 0) return r;
1558
532
    COP(reg)->jump.addr = tlen + SIZE_INC;
1559
1560
532
    r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1561
532
  }
1562
5.27k
  else {
1563
5.27k
    r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
1564
5.27k
  }
1565
33.4k
  return r;
1566
33.4k
}
1567
1568
static int
1569
compile_length_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1570
523
{
1571
523
  int tlen;
1572
1573
523
  tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1574
1575
523
  return tlen;
1576
523
}
1577
1578
static int
1579
compile_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1580
271
{
1581
271
  int r;
1582
1583
271
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1584
1585
271
  return r;
1586
271
}
1587
1588
static int
1589
compile_length_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1590
51.4k
{
1591
51.4k
  int len;
1592
51.4k
  int tlen;
1593
1594
51.4k
  if (node->type == BAG_OPTION)
1595
523
    return compile_length_option_node(node, reg, env);
1596
1597
50.8k
  if (ND_BAG_BODY(node)) {
1598
50.8k
    tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1599
50.8k
    if (tlen < 0) return tlen;
1600
50.8k
  }
1601
0
  else
1602
0
    tlen = 0;
1603
1604
50.8k
  switch (node->type) {
1605
28.6k
  case BAG_MEMORY:
1606
28.6k
#ifdef USE_CALL
1607
1608
28.6k
    if (node->m.regnum == 0 && ND_IS_CALLED(node)) {
1609
0
      len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1610
0
      return len;
1611
0
    }
1612
1613
28.6k
    if (ND_IS_CALLED(node)) {
1614
3.28k
      len = OPSIZE_MEM_START_PUSH + tlen
1615
3.28k
        + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1616
3.28k
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1617
478
        len += (ND_IS_RECURSION(node)
1618
478
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1619
2.80k
      else
1620
2.80k
        len += (ND_IS_RECURSION(node)
1621
2.80k
                ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1622
3.28k
    }
1623
25.4k
    else if (ND_IS_RECURSION(node)) {
1624
2.39k
      len = OPSIZE_MEM_START_PUSH;
1625
2.39k
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1626
2.39k
                     ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
1627
2.39k
    }
1628
23.0k
    else
1629
23.0k
#endif
1630
23.0k
    {
1631
23.0k
      if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1632
22.9k
        len = OPSIZE_MEM_START_PUSH;
1633
75
      else
1634
75
        len = OPSIZE_MEM_START;
1635
1636
23.0k
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1637
23.0k
                     ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
1638
23.0k
    }
1639
28.6k
    break;
1640
1641
17.0k
  case BAG_STOP_BACKTRACK:
1642
17.0k
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1643
4.03k
      int v;
1644
4.03k
      QuantNode* qn;
1645
1646
4.03k
      qn = QUANT_(ND_BAG_BODY(node));
1647
4.03k
      tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1648
4.03k
      if (tlen < 0) return tlen;
1649
1650
4.03k
      v = onig_positive_int_multiply(qn->lower, tlen);
1651
4.03k
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1652
4.03k
      len = v + OPSIZE_PUSH + tlen + OPSIZE_POP + OPSIZE_JUMP;
1653
4.03k
    }
1654
13.0k
    else {
1655
13.0k
      len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1656
13.0k
    }
1657
17.0k
    break;
1658
1659
17.0k
  case BAG_IF_ELSE:
1660
5.15k
    {
1661
5.15k
      Node* cond = ND_BAG_BODY(node);
1662
5.15k
      Node* Then = node->te.Then;
1663
5.15k
      Node* Else = node->te.Else;
1664
1665
5.15k
      len = compile_length_tree(cond, reg, env);
1666
5.15k
      if (len < 0) return len;
1667
5.15k
      len += OPSIZE_PUSH + OPSIZE_MARK + OPSIZE_CUT_TO_MARK;
1668
1669
5.15k
      if (IS_NOT_NULL(Then)) {
1670
1.97k
        tlen = compile_length_tree(Then, reg, env);
1671
1.97k
        if (tlen < 0) return tlen;
1672
1.97k
        len += tlen;
1673
1.97k
      }
1674
1675
5.15k
      len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK;
1676
1677
5.15k
      if (IS_NOT_NULL(Else)) {
1678
3.75k
        tlen = compile_length_tree(Else, reg, env);
1679
3.75k
        if (tlen < 0) return tlen;
1680
3.75k
        len += tlen;
1681
3.75k
      }
1682
5.15k
    }
1683
5.15k
    break;
1684
1685
5.15k
  case BAG_OPTION:
1686
    /* never come here, but set for escape warning */
1687
0
    len = 0;
1688
0
    break;
1689
1690
0
  default:
1691
0
    return ONIGERR_TYPE_BUG;
1692
0
    break;
1693
50.8k
  }
1694
1695
50.8k
  return len;
1696
50.8k
}
1697
1698
static int
1699
compile_bag_memory_node(BagNode* node, regex_t* reg, ParseEnv* env)
1700
17.9k
{
1701
17.9k
  int r;
1702
1703
17.9k
#ifdef USE_CALL
1704
17.9k
  if (ND_IS_CALLED(node)) {
1705
2.21k
    int len;
1706
1707
2.21k
    r = add_op(reg, OP_CALL);
1708
2.21k
    if (r != 0) return r;
1709
1710
2.21k
    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
1711
2.21k
    ND_STATUS_ADD(node, FIXED_ADDR);
1712
2.21k
    COP(reg)->call.addr = (int )node->m.called_addr;
1713
1714
2.21k
    if (node->m.regnum == 0) {
1715
847
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1716
847
      len += OPSIZE_RETURN;
1717
1718
847
      r = add_op(reg, OP_JUMP);
1719
847
      if (r != 0) return r;
1720
847
      COP(reg)->jump.addr = len + SIZE_INC;
1721
1722
847
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1723
847
      if (r != 0) return r;
1724
1725
847
      r = add_op(reg, OP_RETURN);
1726
847
      return r;
1727
847
    }
1728
1.36k
    else {
1729
1.36k
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1730
1.36k
      len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
1731
1.36k
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1732
224
        len += (ND_IS_RECURSION(node)
1733
224
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1734
1.14k
      else
1735
1.14k
        len += (ND_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1736
1737
1.36k
      r = add_op(reg, OP_JUMP);
1738
1.36k
      if (r != 0) return r;
1739
1.36k
      COP(reg)->jump.addr = len + SIZE_INC;
1740
1.36k
    }
1741
2.21k
  }
1742
17.0k
#endif
1743
1744
17.0k
  if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1745
13.8k
    r = add_op(reg, OP_MEM_START_PUSH);
1746
3.20k
  else
1747
3.20k
    r = add_op(reg, OP_MEM_START);
1748
17.0k
  if (r != 0) return r;
1749
17.0k
  COP(reg)->memory_start.num = node->m.regnum;
1750
1751
17.0k
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1752
17.0k
  if (r != 0) return r;
1753
1754
17.0k
#ifdef USE_CALL
1755
17.0k
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1756
6.24k
    r = add_op(reg, (ND_IS_RECURSION(node)
1757
6.24k
                     ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
1758
10.8k
  else
1759
10.8k
    r = add_op(reg, (ND_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
1760
17.0k
  if (r != 0) return r;
1761
17.0k
  COP(reg)->memory_end.num = node->m.regnum;
1762
1763
17.0k
  if (ND_IS_CALLED(node)) {
1764
1.36k
    r = add_op(reg, OP_RETURN);
1765
1.36k
  }
1766
#else
1767
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1768
    r = add_op(reg, OP_MEM_END_PUSH);
1769
  else
1770
    r = add_op(reg, OP_MEM_END);
1771
  if (r != 0) return r;
1772
  COP(reg)->memory_end.num = node->m.regnum;
1773
#endif
1774
1775
17.0k
  return r;
1776
17.0k
}
1777
1778
static int
1779
compile_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1780
33.1k
{
1781
33.1k
  int r, len;
1782
1783
33.1k
  switch (node->type) {
1784
17.9k
  case BAG_MEMORY:
1785
17.9k
    r = compile_bag_memory_node(node, reg, env);
1786
17.9k
    break;
1787
1788
271
  case BAG_OPTION:
1789
271
    r = compile_option_node(node, reg, env);
1790
271
    break;
1791
1792
11.7k
  case BAG_STOP_BACKTRACK:
1793
11.7k
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1794
4.09k
      QuantNode* qn = QUANT_(ND_BAG_BODY(node));
1795
4.09k
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1796
4.09k
      if (r != 0) return r;
1797
1798
4.09k
      len = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1799
4.09k
      if (len < 0) return len;
1800
1801
4.09k
      r = add_op(reg, OP_PUSH);
1802
4.09k
      if (r != 0) return r;
1803
4.09k
      COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP + OPSIZE_JUMP;
1804
1805
4.09k
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1806
4.09k
      if (r != 0) return r;
1807
4.09k
      r = add_op(reg, OP_POP);
1808
4.09k
      if (r != 0) return r;
1809
1810
4.09k
      r = add_op(reg, OP_JUMP);
1811
4.09k
      if (r != 0) return r;
1812
4.09k
      COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP);
1813
4.09k
    }
1814
7.62k
    else {
1815
7.62k
      MemNumType mid;
1816
1817
7.62k
      ID_ENTRY(env, mid);
1818
7.62k
      r = add_op(reg, OP_MARK);
1819
7.62k
      if (r != 0) return r;
1820
7.62k
      COP(reg)->mark.id = mid;
1821
7.62k
      COP(reg)->mark.save_pos = 0;
1822
1823
7.62k
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1824
7.62k
      if (r != 0) return r;
1825
7.62k
      r = add_op(reg, OP_CUT_TO_MARK);
1826
7.62k
      if (r != 0) return r;
1827
7.62k
      COP(reg)->cut_to_mark.id = mid;
1828
7.62k
      COP(reg)->cut_to_mark.restore_pos = 0;
1829
7.62k
    }
1830
11.7k
    break;
1831
1832
11.7k
  case BAG_IF_ELSE:
1833
3.19k
    {
1834
3.19k
      int cond_len, then_len, else_len, jump_len;
1835
3.19k
      MemNumType mid;
1836
3.19k
      Node* cond = ND_BAG_BODY(node);
1837
3.19k
      Node* Then = node->te.Then;
1838
3.19k
      Node* Else = node->te.Else;
1839
1840
3.19k
      ID_ENTRY(env, mid);
1841
1842
3.19k
      r = add_op(reg, OP_MARK);
1843
3.19k
      if (r != 0) return r;
1844
3.19k
      COP(reg)->mark.id = mid;
1845
3.19k
      COP(reg)->mark.save_pos = 0;
1846
1847
3.19k
      cond_len = compile_length_tree(cond, reg, env);
1848
3.19k
      if (cond_len < 0) return cond_len;
1849
3.19k
      if (IS_NOT_NULL(Then)) {
1850
1.32k
        then_len = compile_length_tree(Then, reg, env);
1851
1.32k
        if (then_len < 0) return then_len;
1852
1.32k
      }
1853
1.87k
      else
1854
1.87k
        then_len = 0;
1855
1856
3.19k
      jump_len = cond_len + then_len + OPSIZE_CUT_TO_MARK + OPSIZE_JUMP;
1857
1858
3.19k
      r = add_op(reg, OP_PUSH);
1859
3.19k
      if (r != 0) return r;
1860
3.19k
      COP(reg)->push.addr = SIZE_INC + jump_len;
1861
1862
3.19k
      r = compile_tree(cond, reg, env);
1863
3.19k
      if (r != 0) return r;
1864
3.19k
      r = add_op(reg, OP_CUT_TO_MARK);
1865
3.19k
      if (r != 0) return r;
1866
3.19k
      COP(reg)->cut_to_mark.id = mid;
1867
3.19k
      COP(reg)->cut_to_mark.restore_pos = 0;
1868
1869
3.19k
      if (IS_NOT_NULL(Then)) {
1870
1.32k
        r = compile_tree(Then, reg, env);
1871
1.32k
        if (r != 0) return r;
1872
1.32k
      }
1873
1874
3.19k
      if (IS_NOT_NULL(Else)) {
1875
2.12k
        else_len = compile_length_tree(Else, reg, env);
1876
2.12k
        if (else_len < 0) return else_len;
1877
2.12k
      }
1878
1.07k
      else
1879
1.07k
        else_len = 0;
1880
1881
3.19k
      r = add_op(reg, OP_JUMP);
1882
3.19k
      if (r != 0) return r;
1883
3.19k
      COP(reg)->jump.addr = OPSIZE_CUT_TO_MARK + else_len + SIZE_INC;
1884
1885
3.19k
      r = add_op(reg, OP_CUT_TO_MARK);
1886
3.19k
      if (r != 0) return r;
1887
3.19k
      COP(reg)->cut_to_mark.id = mid;
1888
3.19k
      COP(reg)->cut_to_mark.restore_pos = 0;
1889
1890
3.19k
      if (IS_NOT_NULL(Else)) {
1891
2.12k
        r = compile_tree(Else, reg, env);
1892
2.12k
      }
1893
3.19k
    }
1894
0
    break;
1895
1896
0
  default:
1897
0
    return ONIGERR_TYPE_BUG;
1898
0
    break;
1899
33.1k
  }
1900
1901
33.1k
  return r;
1902
33.1k
}
1903
1904
static int
1905
compile_length_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1906
29.1k
{
1907
29.1k
  int len;
1908
29.1k
  int tlen = 0;
1909
1910
29.1k
  if (IS_NOT_NULL(ND_ANCHOR_BODY(node))) {
1911
15.0k
    tlen = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
1912
15.0k
    if (tlen < 0) return tlen;
1913
15.0k
  }
1914
1915
29.1k
  switch (node->type) {
1916
2.96k
  case ANCR_PREC_READ:
1917
2.96k
    len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1918
2.96k
    break;
1919
706
  case ANCR_PREC_READ_NOT:
1920
706
    len = OPSIZE_PUSH + OPSIZE_MARK + tlen + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
1921
706
    break;
1922
9.24k
  case ANCR_LOOK_BEHIND:
1923
9.24k
    if (node->char_min_len == node->char_max_len)
1924
7.80k
      len = OPSIZE_MARK + OPSIZE_STEP_BACK_START + tlen + OPSIZE_CUT_TO_MARK;
1925
1.43k
    else {
1926
1.43k
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_UPDATE_VAR + OPSIZE_FAIL + OPSIZE_JUMP + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_CUT_TO_MARK + OPSIZE_UPDATE_VAR;
1927
1928
1.43k
      if (IS_NOT_NULL(node->lead_node)) {
1929
823
        int llen = compile_length_tree(node->lead_node, reg, env);
1930
823
        if (llen < 0) return llen;
1931
1932
823
        len += OPSIZE_MOVE + llen;
1933
823
      }
1934
1935
1.43k
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1936
398
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1937
1.43k
    }
1938
9.24k
    break;
1939
9.24k
  case ANCR_LOOK_BEHIND_NOT:
1940
2.16k
    if (node->char_min_len == node->char_max_len)
1941
1.56k
      len = OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + tlen + OPSIZE_POP_TO_MARK + OPSIZE_FAIL + OPSIZE_POP;
1942
599
    else {
1943
599
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_POP;
1944
1945
599
      if (IS_NOT_NULL(node->lead_node)) {
1946
433
        int llen = compile_length_tree(node->lead_node, reg, env);
1947
433
        if (llen < 0) return llen;
1948
1949
433
        len += OPSIZE_MOVE + llen;
1950
433
      }
1951
1952
599
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1953
214
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1954
599
    }
1955
2.16k
    break;
1956
1957
2.16k
  case ANCR_WORD_BOUNDARY:
1958
298
  case ANCR_NO_WORD_BOUNDARY:
1959
298
#ifdef USE_WORD_BEGIN_END
1960
369
  case ANCR_WORD_BEGIN:
1961
370
  case ANCR_WORD_END:
1962
370
#endif
1963
370
    len = OPSIZE_WORD_BOUNDARY;
1964
370
    break;
1965
1966
415
  case ANCR_TEXT_SEGMENT_BOUNDARY:
1967
8.39k
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1968
8.39k
    len = SIZE_OPCODE;
1969
8.39k
    break;
1970
1971
5.26k
  default:
1972
5.26k
    len = SIZE_OPCODE;
1973
5.26k
    break;
1974
29.1k
  }
1975
1976
29.1k
  return len;
1977
29.1k
}
1978
1979
static int
1980
compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1981
3.19k
{
1982
3.19k
  int r;
1983
1984
3.19k
  if (node->char_min_len == node->char_max_len) {
1985
2.63k
    MemNumType mid;
1986
1987
2.63k
    ID_ENTRY(env, mid);
1988
2.63k
    r = add_op(reg, OP_MARK);
1989
2.63k
    if (r != 0) return r;
1990
2.63k
    COP(reg)->mark.id = mid;
1991
2.63k
    COP(reg)->mark.save_pos = FALSE;
1992
1993
2.63k
    r = add_op(reg, OP_STEP_BACK_START);
1994
2.63k
    if (r != 0) return r;
1995
2.63k
    COP(reg)->step_back_start.initial   = node->char_min_len;
1996
2.63k
    COP(reg)->step_back_start.remaining = 0;
1997
2.63k
    COP(reg)->step_back_start.addr      = 1;
1998
1999
2.63k
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2000
2.63k
    if (r != 0) return r;
2001
2002
2.63k
    r = add_op(reg, OP_CUT_TO_MARK);
2003
2.63k
    if (r != 0) return r;
2004
2.63k
    COP(reg)->cut_to_mark.id = mid;
2005
2.63k
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2006
2.63k
  }
2007
564
  else {
2008
564
    OnigLen diff;
2009
564
    MemNumType mid1, mid2;
2010
564
    MemNumType mid3 = 0; /* ignore uninitialized warning */
2011
2012
564
    if (IS_NOT_NULL(node->lead_node)) {
2013
270
      MinMaxCharLen ci;
2014
2015
270
      r = node_char_len(node->lead_node, reg, &ci, env);
2016
270
      if (r < 0) return r;
2017
270
      r = add_op(reg, OP_MOVE);
2018
270
      if (r != 0) return r;
2019
270
      COP(reg)->move.n = -((RelPositionType )ci.min);
2020
270
      r = compile_tree(node->lead_node, reg, env);
2021
270
      if (r != 0) return r;
2022
270
    }
2023
2024
564
    ID_ENTRY(env, mid1);
2025
564
    r = add_op(reg, OP_SAVE_VAL);
2026
564
    if (r != 0) return r;
2027
564
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2028
564
    COP(reg)->save_val.id   = mid1;
2029
2030
564
    r = add_op(reg, OP_UPDATE_VAR);
2031
564
    if (r != 0) return r;
2032
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2033
2034
564
    ID_ENTRY(env, mid2);
2035
564
    r = add_op(reg, OP_MARK);
2036
564
    if (r != 0) return r;
2037
564
    COP(reg)->mark.id = mid2;
2038
564
    COP(reg)->mark.save_pos = FALSE;
2039
2040
564
    r = add_op(reg, OP_PUSH);
2041
564
    if (r != 0) return r;
2042
564
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
2043
2044
564
    r = add_op(reg, OP_JUMP);
2045
564
    if (r != 0) return r;
2046
564
    COP(reg)->jump.addr = SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL;
2047
2048
564
    r = add_op(reg, OP_UPDATE_VAR);
2049
564
    if (r != 0) return r;
2050
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2051
564
    COP(reg)->update_var.id    = mid1;
2052
564
    COP(reg)->update_var.clear = FALSE;
2053
564
    r = add_op(reg, OP_FAIL);
2054
564
    if (r != 0) return r;
2055
2056
564
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2057
62
      ID_ENTRY(env, mid3);
2058
62
      r = add_op(reg, OP_SAVE_VAL);
2059
62
      if (r != 0) return r;
2060
62
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2061
62
      COP(reg)->save_val.id   = mid3;
2062
62
    }
2063
2064
564
    r = add_op(reg, OP_STEP_BACK_START);
2065
564
    if (r != 0) return r;
2066
2067
564
    if (node->char_max_len != INFINITE_LEN)
2068
190
      diff = node->char_max_len - node->char_min_len;
2069
374
    else
2070
374
      diff = INFINITE_LEN;
2071
2072
564
    COP(reg)->step_back_start.initial   = node->char_min_len;
2073
564
    COP(reg)->step_back_start.remaining = diff;
2074
564
    COP(reg)->step_back_start.addr      = 2;
2075
2076
564
    r = add_op(reg, OP_STEP_BACK_NEXT);
2077
564
    if (r != 0) return r;
2078
2079
564
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2080
564
    if (r != 0) return r;
2081
2082
564
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2083
62
      r = add_op(reg, OP_UPDATE_VAR);
2084
62
      if (r != 0) return r;
2085
62
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2086
62
      COP(reg)->update_var.id    = mid3;
2087
62
      COP(reg)->update_var.clear = FALSE;
2088
62
    }
2089
2090
564
    r = add_op(reg, OP_CHECK_POSITION);
2091
564
    if (r != 0) return r;
2092
564
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2093
2094
564
    r = add_op(reg, OP_CUT_TO_MARK);
2095
564
    if (r != 0) return r;
2096
564
    COP(reg)->cut_to_mark.id = mid2;
2097
564
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2098
2099
564
    r = add_op(reg, OP_UPDATE_VAR);
2100
564
    if (r != 0) return r;
2101
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2102
564
    COP(reg)->update_var.id    = mid1;
2103
564
    COP(reg)->update_var.clear = TRUE;
2104
564
  }
2105
2106
3.19k
  return r;
2107
3.19k
}
2108
2109
static int
2110
compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg,
2111
                                    ParseEnv* env)
2112
2.68k
{
2113
2.68k
  int r;
2114
2.68k
  int len;
2115
2116
2.68k
  len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2117
2118
2.68k
  if (node->char_min_len == node->char_max_len) {
2119
2.38k
    MemNumType mid;
2120
2121
2.38k
    ID_ENTRY(env, mid);
2122
2.38k
    r = add_op(reg, OP_MARK);
2123
2.38k
    if (r != 0) return r;
2124
2.38k
    COP(reg)->mark.id = mid;
2125
2.38k
    COP(reg)->mark.save_pos = FALSE;
2126
2127
2.38k
    r = add_op(reg, OP_PUSH);
2128
2.38k
    if (r != 0) return r;
2129
2.38k
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL;
2130
2131
2.38k
    r = add_op(reg, OP_STEP_BACK_START);
2132
2.38k
    if (r != 0) return r;
2133
2.38k
    COP(reg)->step_back_start.initial   = node->char_min_len;
2134
2.38k
    COP(reg)->step_back_start.remaining = 0;
2135
2.38k
    COP(reg)->step_back_start.addr      = 1;
2136
2137
2.38k
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2138
2.38k
    if (r != 0) return r;
2139
2140
2.38k
    r = add_op(reg, OP_POP_TO_MARK);
2141
2.38k
    if (r != 0) return r;
2142
2.38k
    COP(reg)->pop_to_mark.id = mid;
2143
2.38k
    r = add_op(reg, OP_FAIL);
2144
2.38k
    if (r != 0) return r;
2145
2.38k
    r = add_op(reg, OP_POP);
2146
2.38k
  }
2147
301
  else {
2148
301
    OnigLen diff;
2149
301
    MemNumType mid1, mid2;
2150
301
    MemNumType mid3 = 0; /* ignore uninitialized warning */
2151
2152
301
    ID_ENTRY(env, mid1);
2153
301
    r = add_op(reg, OP_SAVE_VAL);
2154
301
    if (r != 0) return r;
2155
301
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2156
301
    COP(reg)->save_val.id   = mid1;
2157
2158
301
    r = add_op(reg, OP_UPDATE_VAR);
2159
301
    if (r != 0) return r;
2160
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2161
2162
301
    ID_ENTRY(env, mid2);
2163
301
    r = add_op(reg, OP_MARK);
2164
301
    if (r != 0) return r;
2165
301
    COP(reg)->mark.id = mid2;
2166
301
    COP(reg)->mark.save_pos = FALSE;
2167
2168
301
    r = add_op(reg, OP_PUSH);
2169
301
    if (r != 0) return r;
2170
2171
301
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + len + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL;
2172
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
2173
58
      COP(reg)->push.addr += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
2174
2175
301
    if (IS_NOT_NULL(node->lead_node)) {
2176
182
      int clen;
2177
182
      MinMaxCharLen ci;
2178
2179
182
      clen = compile_length_tree(node->lead_node, reg, env);
2180
182
      COP(reg)->push.addr += OPSIZE_MOVE + clen;
2181
2182
182
      r = node_char_len(node->lead_node, reg, &ci, env);
2183
182
      if (r < 0) return r;
2184
182
      r = add_op(reg, OP_MOVE);
2185
182
      if (r != 0) return r;
2186
182
      COP(reg)->move.n = -((RelPositionType )ci.min);
2187
2188
182
      r = compile_tree(node->lead_node, reg, env);
2189
182
      if (r != 0) return r;
2190
182
    }
2191
2192
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2193
58
      ID_ENTRY(env, mid3);
2194
58
      r = add_op(reg, OP_SAVE_VAL);
2195
58
      if (r != 0) return r;
2196
58
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2197
58
      COP(reg)->save_val.id   = mid3;
2198
58
    }
2199
2200
301
    r = add_op(reg, OP_STEP_BACK_START);
2201
301
    if (r != 0) return r;
2202
2203
301
    if (node->char_max_len != INFINITE_LEN)
2204
121
      diff = node->char_max_len - node->char_min_len;
2205
180
    else
2206
180
      diff = INFINITE_LEN;
2207
2208
301
    COP(reg)->step_back_start.initial   = node->char_min_len;
2209
301
    COP(reg)->step_back_start.remaining = diff;
2210
301
    COP(reg)->step_back_start.addr      = 2;
2211
2212
301
    r = add_op(reg, OP_STEP_BACK_NEXT);
2213
301
    if (r != 0) return r;
2214
2215
301
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2216
301
    if (r != 0) return r;
2217
2218
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2219
58
      r = add_op(reg, OP_UPDATE_VAR);
2220
58
      if (r != 0) return r;
2221
58
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2222
58
      COP(reg)->update_var.id    = mid3;
2223
58
      COP(reg)->update_var.clear = FALSE;
2224
58
    }
2225
2226
301
    r = add_op(reg, OP_CHECK_POSITION);
2227
301
    if (r != 0) return r;
2228
301
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2229
2230
301
    r = add_op(reg, OP_POP_TO_MARK);
2231
301
    if (r != 0) return r;
2232
301
    COP(reg)->pop_to_mark.id = mid2;
2233
2234
301
    r = add_op(reg, OP_UPDATE_VAR);
2235
301
    if (r != 0) return r;
2236
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2237
301
    COP(reg)->update_var.id   = mid1;
2238
301
    COP(reg)->update_var.clear = FALSE;
2239
2240
301
    r = add_op(reg, OP_POP); /* pop save val */
2241
301
    if (r != 0) return r;
2242
301
    r = add_op(reg, OP_FAIL);
2243
301
    if (r != 0) return r;
2244
2245
301
    r = add_op(reg, OP_UPDATE_VAR);
2246
301
    if (r != 0) return r;
2247
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2248
301
    COP(reg)->update_var.id   = mid1;
2249
301
    COP(reg)->update_var.clear = FALSE;
2250
2251
301
    r = add_op(reg, OP_POP); /* pop mark */
2252
301
    if (r != 0) return r;
2253
301
    r = add_op(reg, OP_POP); /* pop save val */
2254
301
  }
2255
2256
2.68k
  return r;
2257
2.68k
}
2258
2259
static int
2260
compile_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
2261
19.1k
{
2262
19.1k
  int r, len;
2263
19.1k
  enum OpCode op;
2264
19.1k
  MemNumType mid;
2265
2266
19.1k
  switch (node->type) {
2267
1.30k
  case ANCR_BEGIN_BUF:      r = add_op(reg, OP_BEGIN_BUF);      break;
2268
394
  case ANCR_END_BUF:        r = add_op(reg, OP_END_BUF);        break;
2269
1.27k
  case ANCR_BEGIN_LINE:     r = add_op(reg, OP_BEGIN_LINE);     break;
2270
1.07k
  case ANCR_END_LINE:       r = add_op(reg, OP_END_LINE);       break;
2271
1.56k
  case ANCR_SEMI_END_BUF:   r = add_op(reg, OP_SEMI_END_BUF);   break;
2272
258
  case ANCR_BEGIN_POSITION:
2273
258
    r = add_op(reg, OP_CHECK_POSITION);
2274
258
    if (r != 0) return r;
2275
258
    COP(reg)->check_position.type = CHECK_POSITION_SEARCH_START;
2276
258
    break;
2277
2278
511
  case ANCR_WORD_BOUNDARY:
2279
511
    op = OP_WORD_BOUNDARY;
2280
1.33k
  word:
2281
1.33k
    r = add_op(reg, op);
2282
1.33k
    if (r != 0) return r;
2283
1.33k
    COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
2284
1.33k
    break;
2285
2286
378
  case ANCR_NO_WORD_BOUNDARY:
2287
378
    op = OP_NO_WORD_BOUNDARY; goto word;
2288
0
    break;
2289
0
#ifdef USE_WORD_BEGIN_END
2290
192
  case ANCR_WORD_BEGIN:
2291
192
    op = OP_WORD_BEGIN; goto word;
2292
0
    break;
2293
258
  case ANCR_WORD_END:
2294
258
    op = OP_WORD_END; goto word;
2295
0
    break;
2296
0
#endif
2297
2298
532
  case ANCR_TEXT_SEGMENT_BOUNDARY:
2299
4.64k
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
2300
4.64k
    {
2301
4.64k
      enum TextSegmentBoundaryType type;
2302
2303
4.64k
      r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
2304
4.64k
      if (r != 0) return r;
2305
2306
4.64k
      type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
2307
4.64k
#ifdef USE_UNICODE_WORD_BREAK
2308
4.64k
      if (ND_IS_TEXT_SEGMENT_WORD(node))
2309
1.96k
        type = WORD_BOUNDARY;
2310
4.64k
#endif
2311
2312
4.64k
      COP(reg)->text_segment_boundary.type = type;
2313
4.64k
      COP(reg)->text_segment_boundary.not =
2314
4.64k
        (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
2315
4.64k
    }
2316
0
    break;
2317
2318
1.09k
  case ANCR_PREC_READ:
2319
1.09k
    {
2320
1.09k
      ID_ENTRY(env, mid);
2321
1.09k
      r = add_op(reg, OP_MARK);
2322
1.09k
      if (r != 0) return r;
2323
1.09k
      COP(reg)->mark.id = mid;
2324
1.09k
      COP(reg)->mark.save_pos = TRUE;
2325
2326
1.09k
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2327
1.09k
      if (r != 0) return r;
2328
2329
1.09k
      r = add_op(reg, OP_CUT_TO_MARK);
2330
1.09k
      if (r != 0) return r;
2331
1.09k
      COP(reg)->cut_to_mark.id = mid;
2332
1.09k
      COP(reg)->cut_to_mark.restore_pos = TRUE;
2333
1.09k
    }
2334
0
    break;
2335
2336
309
  case ANCR_PREC_READ_NOT:
2337
309
    {
2338
309
      len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2339
309
      if (len < 0) return len;
2340
2341
309
      ID_ENTRY(env, mid);
2342
309
      r = add_op(reg, OP_PUSH);
2343
309
      if (r != 0) return r;
2344
309
      COP(reg)->push.addr = SIZE_INC + OPSIZE_MARK + len +
2345
309
                            OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
2346
2347
309
      r = add_op(reg, OP_MARK);
2348
309
      if (r != 0) return r;
2349
309
      COP(reg)->mark.id = mid;
2350
309
      COP(reg)->mark.save_pos = FALSE;
2351
2352
309
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2353
309
      if (r != 0) return r;
2354
2355
309
      r = add_op(reg, OP_POP_TO_MARK);
2356
309
      if (r != 0) return r;
2357
309
      COP(reg)->pop_to_mark.id = mid;
2358
2359
309
      r = add_op(reg, OP_POP);
2360
309
      if (r != 0) return r;
2361
309
      r = add_op(reg, OP_FAIL);
2362
309
    }
2363
0
    break;
2364
2365
3.19k
  case ANCR_LOOK_BEHIND:
2366
3.19k
    r = compile_anchor_look_behind_node(node, reg, env);
2367
3.19k
    break;
2368
2369
2.68k
  case ANCR_LOOK_BEHIND_NOT:
2370
2.68k
    r = compile_anchor_look_behind_not_node(node, reg, env);
2371
2.68k
    break;
2372
2373
0
  default:
2374
0
    return ONIGERR_TYPE_BUG;
2375
0
    break;
2376
19.1k
  }
2377
2378
19.1k
  return r;
2379
19.1k
}
2380
2381
static int
2382
compile_gimmick_node(GimmickNode* node, regex_t* reg)
2383
11.9k
{
2384
11.9k
  int r = 0;
2385
2386
11.9k
  switch (node->type) {
2387
2.68k
  case GIMMICK_FAIL:
2388
2.68k
    r = add_op(reg, OP_FAIL);
2389
2.68k
    break;
2390
2391
4.17k
  case GIMMICK_SAVE:
2392
4.17k
    r = add_op(reg, OP_SAVE_VAL);
2393
4.17k
    if (r != 0) return r;
2394
4.17k
    COP(reg)->save_val.type = node->detail_type;
2395
4.17k
    COP(reg)->save_val.id   = node->id;
2396
4.17k
    break;
2397
2398
3.61k
  case GIMMICK_UPDATE_VAR:
2399
3.61k
    r = add_op(reg, OP_UPDATE_VAR);
2400
3.61k
    if (r != 0) return r;
2401
3.61k
    COP(reg)->update_var.type = node->detail_type;
2402
3.61k
    COP(reg)->update_var.id   = node->id;
2403
3.61k
    COP(reg)->update_var.clear = FALSE;
2404
3.61k
    break;
2405
2406
0
#ifdef USE_CALLOUT
2407
1.47k
  case GIMMICK_CALLOUT:
2408
1.47k
    switch (node->detail_type) {
2409
273
    case ONIG_CALLOUT_OF_CONTENTS:
2410
1.47k
    case ONIG_CALLOUT_OF_NAME:
2411
1.47k
      {
2412
1.47k
        if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
2413
1.20k
          r = add_op(reg, OP_CALLOUT_NAME);
2414
1.20k
          if (r != 0) return r;
2415
1.20k
          COP(reg)->callout_name.id  = node->id;
2416
1.20k
          COP(reg)->callout_name.num = node->num;
2417
1.20k
        }
2418
273
        else {
2419
273
          r = add_op(reg, OP_CALLOUT_CONTENTS);
2420
273
          if (r != 0) return r;
2421
273
          COP(reg)->callout_contents.num = node->num;
2422
273
        }
2423
1.47k
      }
2424
1.47k
      break;
2425
2426
1.47k
    default:
2427
0
      r = ONIGERR_TYPE_BUG;
2428
0
      break;
2429
1.47k
    }
2430
11.9k
#endif
2431
11.9k
  }
2432
2433
11.9k
  return r;
2434
11.9k
}
2435
2436
static int
2437
compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
2438
53.3k
{
2439
53.3k
  int len;
2440
2441
53.3k
  switch (node->type) {
2442
13.6k
  case GIMMICK_FAIL:
2443
13.6k
    len = OPSIZE_FAIL;
2444
13.6k
    break;
2445
2446
20.9k
  case GIMMICK_SAVE:
2447
20.9k
    len = OPSIZE_SAVE_VAL;
2448
20.9k
    break;
2449
2450
17.9k
  case GIMMICK_UPDATE_VAR:
2451
17.9k
    len = OPSIZE_UPDATE_VAR;
2452
17.9k
    break;
2453
2454
0
#ifdef USE_CALLOUT
2455
776
  case GIMMICK_CALLOUT:
2456
776
    switch (node->detail_type) {
2457
519
    case ONIG_CALLOUT_OF_CONTENTS:
2458
519
      len = OPSIZE_CALLOUT_CONTENTS;
2459
519
      break;
2460
257
    case ONIG_CALLOUT_OF_NAME:
2461
257
      len = OPSIZE_CALLOUT_NAME;
2462
257
      break;
2463
2464
0
    default:
2465
0
      len = ONIGERR_TYPE_BUG;
2466
0
      break;
2467
776
    }
2468
776
    break;
2469
776
#endif
2470
2471
776
  default:
2472
0
    return ONIGERR_TYPE_BUG;
2473
0
    break;
2474
53.3k
  }
2475
2476
53.3k
  return len;
2477
53.3k
}
2478
2479
static int
2480
compile_length_tree(Node* node, regex_t* reg, ParseEnv* env)
2481
1.23M
{
2482
1.23M
  int len, r;
2483
2484
1.23M
  switch (ND_TYPE(node)) {
2485
274k
  case ND_LIST:
2486
274k
    len = 0;
2487
684k
    do {
2488
684k
      r = compile_length_tree(ND_CAR(node), reg, env);
2489
684k
      if (r < 0) return r;
2490
684k
      len += r;
2491
684k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
2492
274k
    r = len;
2493
274k
    break;
2494
2495
19.9k
  case ND_ALT:
2496
19.9k
    {
2497
19.9k
      int n;
2498
2499
19.9k
      n = r = 0;
2500
133k
      do {
2501
133k
        r += compile_length_tree(ND_CAR(node), reg, env);
2502
133k
        n++;
2503
133k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2504
19.9k
      r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
2505
19.9k
    }
2506
19.9k
    break;
2507
2508
264k
  case ND_STRING:
2509
264k
    if (ND_STRING_IS_CRUDE(node))
2510
7.76k
      r = compile_length_string_crude_node(STR_(node), reg);
2511
256k
    else
2512
256k
      r = compile_length_string_node(node, reg);
2513
264k
    break;
2514
2515
411k
  case ND_CCLASS:
2516
411k
    r = compile_length_cclass_node(CCLASS_(node), reg);
2517
411k
    break;
2518
2519
49.2k
  case ND_CTYPE:
2520
49.2k
    r = SIZE_OPCODE;
2521
49.2k
    break;
2522
2523
8.02k
  case ND_BACKREF:
2524
8.02k
    r = OPSIZE_BACKREF;
2525
8.02k
    break;
2526
2527
0
#ifdef USE_CALL
2528
16.3k
  case ND_CALL:
2529
16.3k
    r = OPSIZE_CALL;
2530
16.3k
    break;
2531
0
#endif
2532
2533
58.7k
  case ND_QUANT:
2534
58.7k
    r = compile_length_quantifier_node(QUANT_(node), reg, env);
2535
58.7k
    break;
2536
2537
51.4k
  case ND_BAG:
2538
51.4k
    r = compile_length_bag_node(BAG_(node), reg, env);
2539
51.4k
    break;
2540
2541
29.1k
  case ND_ANCHOR:
2542
29.1k
    r = compile_length_anchor_node(ANCHOR_(node), reg, env);
2543
29.1k
    break;
2544
2545
53.3k
  case ND_GIMMICK:
2546
53.3k
    r = compile_length_gimmick_node(GIMMICK_(node), reg);
2547
53.3k
    break;
2548
2549
0
  default:
2550
0
    return ONIGERR_TYPE_BUG;
2551
0
    break;
2552
1.23M
  }
2553
2554
1.23M
  return r;
2555
1.23M
}
2556
2557
static int
2558
compile_tree(Node* node, regex_t* reg, ParseEnv* env)
2559
517k
{
2560
517k
  int n, len, pos, r = 0;
2561
2562
517k
  switch (ND_TYPE(node)) {
2563
100k
  case ND_LIST:
2564
290k
    do {
2565
290k
      r = compile_tree(ND_CAR(node), reg, env);
2566
290k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2567
100k
    break;
2568
2569
11.7k
  case ND_ALT:
2570
11.7k
    {
2571
11.7k
      Node* x = node;
2572
11.7k
      len = 0;
2573
110k
      do {
2574
110k
        len += compile_length_tree(ND_CAR(x), reg, env);
2575
110k
        if (IS_NOT_NULL(ND_CDR(x))) {
2576
98.8k
          len += OPSIZE_PUSH + OPSIZE_JUMP;
2577
98.8k
        }
2578
110k
      } while (IS_NOT_NULL(x = ND_CDR(x)));
2579
11.7k
      pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */
2580
2581
110k
      do {
2582
110k
        len = compile_length_tree(ND_CAR(node), reg, env);
2583
110k
        if (IS_NOT_NULL(ND_CDR(node))) {
2584
98.8k
          enum OpCode push = ND_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
2585
98.8k
          r = add_op(reg, push);
2586
98.8k
          if (r != 0) break;
2587
98.8k
          COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
2588
98.8k
        }
2589
110k
        r = compile_tree(ND_CAR(node), reg, env);
2590
110k
        if (r != 0) break;
2591
110k
        if (IS_NOT_NULL(ND_CDR(node))) {
2592
98.8k
          len = pos - (COP_CURR_OFFSET(reg) + 1);
2593
98.8k
          r = add_op(reg, OP_JUMP);
2594
98.8k
          if (r != 0) break;
2595
98.8k
          COP(reg)->jump.addr = len;
2596
98.8k
        }
2597
110k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2598
11.7k
    }
2599
0
    break;
2600
2601
114k
  case ND_STRING:
2602
114k
    if (ND_STRING_IS_CRUDE(node))
2603
1.78k
      r = compile_string_crude_node(STR_(node), reg);
2604
113k
    else
2605
113k
      r = compile_string_node(node, reg);
2606
114k
    break;
2607
2608
154k
  case ND_CCLASS:
2609
154k
    r = compile_cclass_node(CCLASS_(node), reg);
2610
154k
    break;
2611
2612
20.9k
  case ND_CTYPE:
2613
20.9k
    {
2614
20.9k
      int op;
2615
2616
20.9k
      switch (CTYPE_(node)->ctype) {
2617
15.6k
      case CTYPE_ANYCHAR:
2618
15.6k
        r = add_op(reg, ND_IS_MULTILINE(node) ? OP_ANYCHAR_ML : OP_ANYCHAR);
2619
15.6k
        break;
2620
2621
5.34k
      case ONIGENC_CTYPE_WORD:
2622
5.34k
        if (CTYPE_(node)->ascii_mode == 0) {
2623
3.06k
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
2624
3.06k
        }
2625
2.27k
        else {
2626
2.27k
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
2627
2.27k
        }
2628
5.34k
        r = add_op(reg, op);
2629
5.34k
        break;
2630
2631
0
      default:
2632
0
        return ONIGERR_TYPE_BUG;
2633
0
        break;
2634
20.9k
      }
2635
20.9k
    }
2636
20.9k
    break;
2637
2638
20.9k
  case ND_BACKREF:
2639
7.01k
    {
2640
7.01k
      BackRefNode* br = BACKREF_(node);
2641
2642
7.01k
      if (ND_IS_CHECKER(node)) {
2643
426
#ifdef USE_BACKREF_WITH_LEVEL
2644
426
        if (ND_IS_NEST_LEVEL(node)) {
2645
222
          r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
2646
222
          if (r != 0) return r;
2647
222
          COP(reg)->backref_general.nest_level = br->nest_level;
2648
222
        }
2649
204
        else
2650
204
#endif
2651
204
          {
2652
204
            r = add_op(reg, OP_BACKREF_CHECK);
2653
204
            if (r != 0) return r;
2654
204
          }
2655
426
        goto add_bacref_mems;
2656
426
      }
2657
6.58k
      else {
2658
6.58k
#ifdef USE_BACKREF_WITH_LEVEL
2659
6.58k
        if (ND_IS_NEST_LEVEL(node)) {
2660
644
          if (ND_IS_IGNORECASE(node))
2661
344
            r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
2662
300
          else
2663
300
            r = add_op(reg, OP_BACKREF_WITH_LEVEL);
2664
2665
644
          if (r != 0) return r;
2666
644
          COP(reg)->backref_general.nest_level = br->nest_level;
2667
644
          goto add_bacref_mems;
2668
644
        }
2669
5.94k
        else
2670
5.94k
#endif
2671
5.94k
        if (br->back_num == 1) {
2672
4.87k
          n = br->back_static[0];
2673
4.87k
          if (ND_IS_IGNORECASE(node)) {
2674
2.63k
            r = add_op(reg, OP_BACKREF_N_IC);
2675
2.63k
            if (r != 0) return r;
2676
2.63k
            COP(reg)->backref_n.n1 = n;
2677
2.63k
          }
2678
2.24k
          else {
2679
2.24k
            switch (n) {
2680
1.11k
            case 1:  r = add_op(reg, OP_BACKREF1); break;
2681
632
            case 2:  r = add_op(reg, OP_BACKREF2); break;
2682
503
            default:
2683
503
              r = add_op(reg, OP_BACKREF_N);
2684
503
              if (r != 0) return r;
2685
503
              COP(reg)->backref_n.n1 = n;
2686
503
              break;
2687
2.24k
            }
2688
2.24k
          }
2689
4.87k
        }
2690
1.06k
        else {
2691
1.06k
          int num;
2692
1.06k
          int* p;
2693
2694
1.06k
          r = add_op(reg, ND_IS_IGNORECASE(node) ?
2695
657
                     OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
2696
1.06k
          if (r != 0) return r;
2697
2698
2.13k
        add_bacref_mems:
2699
2.13k
          num = br->back_num;
2700
2.13k
          COP(reg)->backref_general.num = num;
2701
2.13k
          if (num == 1) {
2702
1.01k
            COP(reg)->backref_general.n1 = br->back_static[0];
2703
1.01k
          }
2704
1.11k
          else {
2705
1.11k
            int i, j;
2706
1.11k
            MemNumType* ns;
2707
2708
1.11k
            ns = xmalloc(sizeof(MemNumType) * num);
2709
1.11k
            CHECK_NULL_RETURN_MEMERR(ns);
2710
1.11k
            COP(reg)->backref_general.ns = ns;
2711
1.11k
            p = BACKREFS_P(br);
2712
5.86k
            for (i = num - 1, j = 0; i >= 0; i--, j++) {
2713
4.74k
              ns[j] = p[i];
2714
4.74k
            }
2715
1.11k
          }
2716
2.13k
        }
2717
6.58k
      }
2718
7.01k
    }
2719
7.01k
    break;
2720
2721
7.01k
#ifdef USE_CALL
2722
7.01k
  case ND_CALL:
2723
3.96k
    r = compile_call(CALL_(node), reg, env);
2724
3.96k
    break;
2725
0
#endif
2726
2727
39.5k
  case ND_QUANT:
2728
39.5k
    r = compile_quantifier_node(QUANT_(node), reg, env);
2729
39.5k
    break;
2730
2731
33.1k
  case ND_BAG:
2732
33.1k
    r = compile_bag_node(BAG_(node), reg, env);
2733
33.1k
    break;
2734
2735
19.1k
  case ND_ANCHOR:
2736
19.1k
    r = compile_anchor_node(ANCHOR_(node), reg, env);
2737
19.1k
    break;
2738
2739
11.9k
  case ND_GIMMICK:
2740
11.9k
    r = compile_gimmick_node(GIMMICK_(node), reg);
2741
11.9k
    break;
2742
2743
0
  default:
2744
#ifdef ONIG_DEBUG
2745
    fprintf(DBGFP, "compile_tree: undefined node type %d\n", ND_TYPE(node));
2746
#endif
2747
0
    break;
2748
517k
  }
2749
2750
517k
  return r;
2751
517k
}
2752
2753
static int
2754
make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
2755
17.1k
{
2756
17.1k
  int r;
2757
17.1k
  Node* node = *plink;
2758
2759
17.1k
  switch (ND_TYPE(node)) {
2760
2.37k
  case ND_LIST:
2761
2.77k
  case ND_ALT:
2762
10.8k
    do {
2763
10.8k
      r = make_named_capture_number_map(&(ND_CAR(node)), map, counter);
2764
10.8k
    } while (r >= 0 && IS_NOT_NULL(node = ND_CDR(node)));
2765
2.77k
    if (r < 0) return r;
2766
2.71k
    break;
2767
2768
2.71k
  case ND_QUANT:
2769
2.17k
    {
2770
2.17k
      Node** ptarget = &(ND_BODY(node));
2771
2.17k
      r = make_named_capture_number_map(ptarget, map, counter);
2772
2.17k
      if (r < 0) return r;
2773
2.15k
      if (r == 1 && ND_TYPE(*ptarget) == ND_QUANT) {
2774
92
        return onig_reduce_nested_quantifier(node);
2775
92
      }
2776
2.15k
    }
2777
2.06k
    break;
2778
2779
3.20k
  case ND_BAG:
2780
3.20k
    {
2781
3.20k
      BagNode* en = BAG_(node);
2782
3.20k
      if (en->type == BAG_MEMORY) {
2783
2.20k
        if (ND_IS_NAMED_GROUP(node)) {
2784
1.16k
          (*counter)++;
2785
1.16k
          map[en->m.regnum].new_val = *counter;
2786
1.16k
          en->m.regnum = *counter;
2787
1.16k
          r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2788
1.16k
          if (r < 0) return r;
2789
1.16k
        }
2790
1.04k
        else {
2791
1.04k
          *plink = ND_BODY(node);
2792
1.04k
          ND_BODY(node) = NULL_NODE;
2793
1.04k
          onig_node_free(node);
2794
1.04k
          r = make_named_capture_number_map(plink, map, counter);
2795
1.04k
          if (r < 0) return r;
2796
1.03k
          return 1;
2797
1.04k
        }
2798
2.20k
      }
2799
1.00k
      else if (en->type == BAG_IF_ELSE) {
2800
278
        r = make_named_capture_number_map(&(ND_BAG_BODY(en)), map, counter);
2801
278
        if (r < 0) return r;
2802
276
        if (IS_NOT_NULL(en->te.Then)) {
2803
82
          r = make_named_capture_number_map(&(en->te.Then), map, counter);
2804
82
          if (r < 0) return r;
2805
82
        }
2806
274
        if (IS_NOT_NULL(en->te.Else)) {
2807
204
          r = make_named_capture_number_map(&(en->te.Else), map, counter);
2808
204
          if (r < 0) return r;
2809
204
        }
2810
274
      }
2811
726
      else {
2812
726
        r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2813
726
        if (r < 0) return r;
2814
726
      }
2815
3.20k
    }
2816
2.14k
    break;
2817
2818
2.14k
  case ND_ANCHOR:
2819
890
    if (IS_NOT_NULL(ND_BODY(node))) {
2820
248
      r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2821
248
      if (r < 0) return r;
2822
248
    }
2823
888
    break;
2824
2825
8.10k
  default:
2826
8.10k
    break;
2827
17.1k
  }
2828
2829
15.9k
  return 0;
2830
17.1k
}
2831
2832
static int
2833
renumber_backref_node(Node* node, GroupNumMap* map)
2834
568
{
2835
568
  int i, pos, n, old_num;
2836
568
  int *backs;
2837
568
  BackRefNode* bn = BACKREF_(node);
2838
2839
568
  if (! ND_IS_BY_NAME(node))
2840
26
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2841
2842
542
  old_num = bn->back_num;
2843
542
  if (IS_NULL(bn->back_dynamic))
2844
414
    backs = bn->back_static;
2845
128
  else
2846
128
    backs = bn->back_dynamic;
2847
2848
2.42k
  for (i = 0, pos = 0; i < old_num; i++) {
2849
1.88k
    n = map[backs[i]].new_val;
2850
1.88k
    if (n > 0) {
2851
1.88k
      backs[pos] = n;
2852
1.88k
      pos++;
2853
1.88k
    }
2854
1.88k
  }
2855
2856
542
  bn->back_num = pos;
2857
542
  return 0;
2858
568
}
2859
2860
static int
2861
renumber_backref_traverse(Node* node, GroupNumMap* map)
2862
15.5k
{
2863
15.5k
  int r = 0;
2864
2865
15.5k
  switch (ND_TYPE(node)) {
2866
2.31k
  case ND_LIST:
2867
2.68k
  case ND_ALT:
2868
10.5k
    do {
2869
10.5k
      r = renumber_backref_traverse(ND_CAR(node), map);
2870
10.5k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2871
2.68k
    break;
2872
2873
2.00k
  case ND_QUANT:
2874
2.00k
    r = renumber_backref_traverse(ND_BODY(node), map);
2875
2.00k
    break;
2876
2877
2.11k
  case ND_BAG:
2878
2.11k
    {
2879
2.11k
      BagNode* en = BAG_(node);
2880
2881
2.11k
      r = renumber_backref_traverse(ND_BODY(node), map);
2882
2.11k
      if (r != 0) return r;
2883
2884
2.08k
      if (en->type == BAG_IF_ELSE) {
2885
272
        if (IS_NOT_NULL(en->te.Then)) {
2886
78
          r = renumber_backref_traverse(en->te.Then, map);
2887
78
          if (r != 0) return r;
2888
78
        }
2889
264
        if (IS_NOT_NULL(en->te.Else)) {
2890
202
          r = renumber_backref_traverse(en->te.Else, map);
2891
202
          if (r != 0) return r;
2892
202
        }
2893
264
      }
2894
2.08k
    }
2895
2.06k
    break;
2896
2897
2.06k
  case ND_BACKREF:
2898
568
    r = renumber_backref_node(node, map);
2899
568
    break;
2900
2901
880
  case ND_ANCHOR:
2902
880
    if (IS_NOT_NULL(ND_BODY(node)))
2903
246
      r = renumber_backref_traverse(ND_BODY(node), map);
2904
880
    break;
2905
2906
7.27k
  default:
2907
7.27k
    break;
2908
15.5k
  }
2909
2910
15.4k
  return r;
2911
15.5k
}
2912
2913
static int
2914
numbered_ref_check(Node* node)
2915
14.7k
{
2916
14.7k
  int r = 0;
2917
2918
14.7k
  switch (ND_TYPE(node)) {
2919
2.56k
  case ND_LIST:
2920
3.01k
  case ND_ALT:
2921
9.90k
    do {
2922
9.90k
      r = numbered_ref_check(ND_CAR(node));
2923
9.90k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2924
3.01k
    break;
2925
2926
1.01k
  case ND_ANCHOR:
2927
1.01k
    if (IS_NULL(ND_BODY(node)))
2928
760
      break;
2929
    /* fall */
2930
1.93k
  case ND_QUANT:
2931
1.93k
    r = numbered_ref_check(ND_BODY(node));
2932
1.93k
    break;
2933
2934
2.07k
  case ND_BAG:
2935
2.07k
    {
2936
2.07k
      BagNode* en = BAG_(node);
2937
2938
2.07k
      r = numbered_ref_check(ND_BODY(node));
2939
2.07k
      if (r != 0) return r;
2940
2941
2.03k
      if (en->type == BAG_IF_ELSE) {
2942
272
        if (IS_NOT_NULL(en->te.Then)) {
2943
76
          r = numbered_ref_check(en->te.Then);
2944
76
          if (r != 0) return r;
2945
76
        }
2946
264
        if (IS_NOT_NULL(en->te.Else)) {
2947
202
          r = numbered_ref_check(en->te.Else);
2948
202
          if (r != 0) return r;
2949
202
        }
2950
264
      }
2951
2.03k
    }
2952
2953
2.02k
    break;
2954
2955
2.02k
  case ND_BACKREF:
2956
752
    if (! ND_IS_BY_NAME(node))
2957
24
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2958
728
    break;
2959
2960
6.23k
  default:
2961
6.23k
    break;
2962
14.7k
  }
2963
2964
14.6k
  return r;
2965
14.7k
}
2966
2967
static int
2968
disable_noname_group_capture(Node** root, regex_t* reg, ParseEnv* env)
2969
372
{
2970
372
  int r, i, pos, counter;
2971
372
  MemStatusType loc;
2972
372
  GroupNumMap* map;
2973
2974
372
  map = (GroupNumMap* )xalloca(sizeof(GroupNumMap) * (env->num_mem + 1));
2975
372
  CHECK_NULL_RETURN_MEMERR(map);
2976
2.57k
  for (i = 1; i <= env->num_mem; i++) {
2977
2.19k
    map[i].new_val = 0;
2978
2.19k
  }
2979
372
  counter = 0;
2980
372
  r = make_named_capture_number_map(root, map, &counter);
2981
372
  if (r < 0) return r;
2982
2983
346
  r = renumber_backref_traverse(*root, map);
2984
346
  if (r != 0) return r;
2985
2986
2.33k
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2987
2.01k
    if (map[i].new_val > 0) {
2988
1.04k
      PARSEENV_MEMENV(env)[pos] = PARSEENV_MEMENV(env)[i];
2989
1.04k
      pos++;
2990
1.04k
    }
2991
2.01k
  }
2992
2993
320
  loc = env->cap_history;
2994
320
  MEM_STATUS_CLEAR(env->cap_history);
2995
10.2k
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2996
9.92k
    if (MEM_STATUS_AT(loc, i)) {
2997
0
      MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
2998
0
    }
2999
9.92k
  }
3000
3001
320
  env->num_mem = env->num_named;
3002
320
  reg->num_mem = env->num_named;
3003
3004
320
  return onig_renumber_name_table(reg, map);
3005
346
}
3006
3007
#ifdef USE_CALL
3008
static int
3009
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
3010
1.39k
{
3011
1.39k
  int i, offset;
3012
1.39k
  BagNode* en;
3013
1.39k
  AbsAddrType addr;
3014
1.39k
  AbsAddrType* paddr;
3015
3016
5.36k
  for (i = 0; i < uslist->num; i++) {
3017
3.96k
    if (! ND_IS_FIXED_ADDR(uslist->us[i].target)) {
3018
37
      if (ND_IS_CALLED(uslist->us[i].target))
3019
0
        return ONIGERR_PARSER_BUG;
3020
37
      else {
3021
        /* CASE: called node doesn't have called address.
3022
           ex. /((|a\g<1>)(.){0}){0}\g<3>/
3023
           group-1 doesn't called, but compiled into bytecodes,
3024
           because group-3 is referred from outside.
3025
        */
3026
37
        continue;
3027
37
      }
3028
37
    }
3029
3030
3.93k
    en = BAG_(uslist->us[i].target);
3031
3.93k
    addr   = en->m.called_addr;
3032
3.93k
    offset = uslist->us[i].offset;
3033
3034
3.93k
    paddr = (AbsAddrType* )((char* )reg->ops + offset);
3035
3.93k
    *paddr = addr;
3036
3.93k
  }
3037
1.39k
  return 0;
3038
1.39k
}
3039
#endif
3040
3041
/* x is not included y ==>  1 : 0 */
3042
static int
3043
is_exclusive(Node* x, Node* y, regex_t* reg)
3044
13.6k
{
3045
13.6k
  int i, len;
3046
13.6k
  OnigCodePoint code;
3047
13.6k
  UChar *p;
3048
13.6k
  NodeType ytype;
3049
3050
17.1k
 retry:
3051
17.1k
  ytype = ND_TYPE(y);
3052
17.1k
  switch (ND_TYPE(x)) {
3053
2.85k
  case ND_CTYPE:
3054
2.85k
    {
3055
2.85k
      if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
3056
2.85k
          CTYPE_(y)->ctype == CTYPE_ANYCHAR)
3057
0
        break;
3058
3059
2.85k
      switch (ytype) {
3060
778
      case ND_CTYPE:
3061
778
        if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
3062
778
            CTYPE_(y)->not   != CTYPE_(x)->not &&
3063
778
            CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
3064
140
          return 1;
3065
638
        else
3066
638
          return 0;
3067
0
        break;
3068
3069
1.29k
      case ND_CCLASS:
3070
3.45k
      swap:
3071
3.45k
        {
3072
3.45k
          Node* tmp;
3073
3.45k
          tmp = x; x = y; y = tmp;
3074
3.45k
          goto retry;
3075
1.29k
        }
3076
0
        break;
3077
3078
784
      case ND_STRING:
3079
784
        goto swap;
3080
0
        break;
3081
3082
0
      default:
3083
0
        break;
3084
2.85k
      }
3085
2.85k
    }
3086
0
    break;
3087
3088
6.55k
  case ND_CCLASS:
3089
6.55k
    {
3090
6.55k
      int range;
3091
6.55k
      CClassNode* xc = CCLASS_(x);
3092
3093
6.55k
      switch (ytype) {
3094
2.53k
      case ND_CTYPE:
3095
2.53k
        switch (CTYPE_(y)->ctype) {
3096
0
        case CTYPE_ANYCHAR:
3097
0
          return 0;
3098
0
          break;
3099
3100
2.53k
        case ONIGENC_CTYPE_WORD:
3101
2.53k
          if (CTYPE_(y)->not == 0) {
3102
846
            if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
3103
584
              range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3104
86.6k
              for (i = 0; i < range; i++) {
3105
86.4k
                if (BITSET_AT(xc->bs, i)) {
3106
972
                  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
3107
972
                }
3108
86.4k
              }
3109
208
              return 1;
3110
584
            }
3111
262
            return 0;
3112
846
          }
3113
1.68k
          else {
3114
1.68k
            if (IS_NOT_NULL(xc->mbuf)) return 0;
3115
1.48k
            if (IS_NCCLASS_NOT(xc)) return 0;
3116
3117
1.27k
            range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3118
204k
            for (i = 0; i < range; i++) {
3119
203k
              if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
3120
107k
                if (BITSET_AT(xc->bs, i))
3121
122
                  return 0;
3122
107k
              }
3123
203k
            }
3124
74.2k
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
3125
73.5k
              if (BITSET_AT(xc->bs, i)) return 0;
3126
73.5k
            }
3127
624
            return 1;
3128
1.15k
          }
3129
0
          break;
3130
3131
0
        default:
3132
0
          break;
3133
2.53k
        }
3134
0
        break;
3135
3136
2.64k
      case ND_CCLASS:
3137
2.64k
        {
3138
2.64k
          int v;
3139
2.64k
          CClassNode* yc = CCLASS_(y);
3140
3141
515k
          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
3142
513k
            v = BITSET_AT(xc->bs, i);
3143
513k
            if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
3144
66.9k
              v = BITSET_AT(yc->bs, i);
3145
66.9k
              if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
3146
66.9k
                  (v == 0 && IS_NCCLASS_NOT(yc)))
3147
1.05k
                return 0;
3148
66.9k
            }
3149
513k
          }
3150
1.58k
          if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
3151
1.58k
              (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
3152
1.06k
            return 1;
3153
516
          return 0;
3154
1.58k
        }
3155
0
        break;
3156
3157
1.38k
      case ND_STRING:
3158
1.38k
        goto swap;
3159
0
        break;
3160
3161
0
      default:
3162
0
        break;
3163
6.55k
      }
3164
6.55k
    }
3165
0
    break;
3166
3167
7.74k
  case ND_STRING:
3168
7.74k
    {
3169
7.74k
      StrNode* xs = STR_(x);
3170
3171
7.74k
      if (ND_STRING_LEN(x) == 0)
3172
0
        break;
3173
3174
7.74k
      switch (ytype) {
3175
1.44k
      case ND_CTYPE:
3176
1.44k
        switch (CTYPE_(y)->ctype) {
3177
0
        case CTYPE_ANYCHAR:
3178
0
          break;
3179
3180
1.44k
        case ONIGENC_CTYPE_WORD:
3181
1.44k
          if (CTYPE_(y)->ascii_mode == 0) {
3182
792
            if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
3183
486
              return CTYPE_(y)->not;
3184
306
            else
3185
306
              return !(CTYPE_(y)->not);
3186
792
          }
3187
656
          else {
3188
656
            if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
3189
244
              return CTYPE_(y)->not;
3190
412
            else
3191
412
              return !(CTYPE_(y)->not);
3192
656
          }
3193
0
          break;
3194
0
        default:
3195
0
          break;
3196
1.44k
        }
3197
0
        break;
3198
3199
2.33k
      case ND_CCLASS:
3200
2.33k
        {
3201
2.33k
          CClassNode* cc = CCLASS_(y);
3202
3203
2.33k
          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
3204
2.33k
                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
3205
2.33k
          return onig_is_code_in_cc(reg->enc, code, cc) == 0;
3206
1.44k
        }
3207
0
        break;
3208
3209
3.96k
      case ND_STRING:
3210
3.96k
        {
3211
3.96k
          UChar *q;
3212
3.96k
          StrNode* ys = STR_(y);
3213
3214
3.96k
          len = ND_STRING_LEN(x);
3215
3.96k
          if (len > ND_STRING_LEN(y)) len = ND_STRING_LEN(y);
3216
3217
6.14k
          for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
3218
4.60k
            if (*p != *q) return 1;
3219
4.60k
          }
3220
3.96k
        }
3221
1.53k
        break;
3222
3223
1.53k
      default:
3224
0
        break;
3225
7.74k
      }
3226
7.74k
    }
3227
1.53k
    break;
3228
3229
1.53k
  default:
3230
0
    break;
3231
17.1k
  }
3232
3233
1.53k
  return 0;
3234
17.1k
}
3235
3236
static Node*
3237
get_tree_head_literal(Node* node, int exact, regex_t* reg)
3238
174k
{
3239
174k
  Node* n = NULL_NODE;
3240
3241
174k
  switch (ND_TYPE(node)) {
3242
2.87k
  case ND_BACKREF:
3243
6.17k
  case ND_ALT:
3244
6.17k
#ifdef USE_CALL
3245
7.15k
  case ND_CALL:
3246
7.15k
#endif
3247
7.15k
    break;
3248
3249
28.8k
  case ND_CTYPE:
3250
28.8k
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
3251
16.7k
      break;
3252
    /* fall */
3253
40.2k
  case ND_CCLASS:
3254
40.2k
    if (exact == 0) {
3255
16.2k
      n = node;
3256
16.2k
    }
3257
40.2k
    break;
3258
3259
15.1k
  case ND_LIST:
3260
15.1k
    n = get_tree_head_literal(ND_CAR(node), exact, reg);
3261
15.1k
    break;
3262
3263
42.9k
  case ND_STRING:
3264
42.9k
    {
3265
42.9k
      StrNode* sn = STR_(node);
3266
3267
42.9k
      if (sn->end <= sn->s)
3268
1.34k
        break;
3269
3270
41.5k
      if (exact == 0 || !ND_IS_REAL_IGNORECASE(node)) {
3271
41.5k
        n = node;
3272
41.5k
      }
3273
41.5k
    }
3274
0
    break;
3275
3276
28.0k
  case ND_QUANT:
3277
28.0k
    {
3278
28.0k
      QuantNode* qn = QUANT_(node);
3279
28.0k
      if (qn->lower > 0) {
3280
22.6k
        if (IS_NOT_NULL(qn->head_exact))
3281
5.76k
          n = qn->head_exact;
3282
16.9k
        else
3283
16.9k
          n = get_tree_head_literal(ND_BODY(node), exact, reg);
3284
22.6k
      }
3285
28.0k
    }
3286
28.0k
    break;
3287
3288
14.5k
  case ND_BAG:
3289
14.5k
    {
3290
14.5k
      BagNode* en = BAG_(node);
3291
14.5k
      switch (en->type) {
3292
62
      case BAG_OPTION:
3293
4.60k
      case BAG_MEMORY:
3294
13.8k
      case BAG_STOP_BACKTRACK:
3295
13.8k
        n = get_tree_head_literal(ND_BODY(node), exact, reg);
3296
13.8k
        break;
3297
758
      default:
3298
758
        break;
3299
14.5k
      }
3300
14.5k
    }
3301
14.5k
    break;
3302
3303
14.5k
  case ND_ANCHOR:
3304
8.73k
    if (ANCHOR_(node)->type == ANCR_PREC_READ)
3305
980
      n = get_tree_head_literal(ND_BODY(node), exact, reg);
3306
8.73k
    break;
3307
3308
628
  case ND_GIMMICK:
3309
628
  default:
3310
628
    break;
3311
174k
  }
3312
3313
174k
  return n;
3314
174k
}
3315
3316
enum GetValue {
3317
  GET_VALUE_NONE   = -1,
3318
  GET_VALUE_IGNORE =  0,
3319
  GET_VALUE_FOUND  =  1
3320
};
3321
3322
44.2k
#define MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL  16
3323
3324
static int
3325
get_tree_tail_literal(Node* node, Node** rnode, regex_t* reg, int nest_level)
3326
44.2k
{
3327
44.2k
  int r;
3328
3329
44.2k
  nest_level++;
3330
44.2k
  if (nest_level >= MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL) {
3331
368
    return GET_VALUE_NONE;
3332
368
  }
3333
3334
43.8k
  switch (ND_TYPE(node)) {
3335
28.4k
  case ND_LIST:
3336
28.4k
    if (IS_NULL(ND_CDR(node))) {
3337
9.65k
      r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3338
9.65k
    }
3339
18.7k
    else {
3340
18.7k
      r = get_tree_tail_literal(ND_CDR(node), rnode, reg, nest_level);
3341
18.7k
      if (r == GET_VALUE_IGNORE) {
3342
714
        r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3343
714
      }
3344
18.7k
    }
3345
28.4k
    break;
3346
3347
0
#ifdef USE_CALL
3348
274
  case ND_CALL:
3349
274
    r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3350
274
    break;
3351
0
#endif
3352
3353
460
  case ND_CTYPE:
3354
460
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) {
3355
354
      r = GET_VALUE_NONE;
3356
354
      break;
3357
354
    }
3358
    /* fall */
3359
5.62k
  case ND_CCLASS:
3360
5.62k
    *rnode = node;
3361
5.62k
    r = GET_VALUE_FOUND;
3362
5.62k
    break;
3363
3364
4.67k
  case ND_STRING:
3365
4.67k
    {
3366
4.67k
      StrNode* sn = STR_(node);
3367
3368
4.67k
      if (sn->end <= sn->s) {
3369
350
        r = GET_VALUE_IGNORE;
3370
350
        break;
3371
350
      }
3372
3373
4.32k
      if (ND_IS_REAL_IGNORECASE(node)) {
3374
82
        r = GET_VALUE_NONE;
3375
82
        break;
3376
82
      }
3377
3378
4.24k
      *rnode = node;
3379
4.24k
      r = GET_VALUE_FOUND;
3380
4.24k
    }
3381
0
    break;
3382
3383
1.77k
  case ND_QUANT:
3384
1.77k
    {
3385
1.77k
      QuantNode* qn = QUANT_(node);
3386
1.77k
      if (qn->lower != 0) {
3387
976
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3388
976
      }
3389
794
      else
3390
794
        r = GET_VALUE_NONE;
3391
1.77k
    }
3392
1.77k
    break;
3393
3394
1.21k
  case ND_BAG:
3395
1.21k
    {
3396
1.21k
      BagNode* en = BAG_(node);
3397
3398
1.21k
      if (en->type == BAG_MEMORY) {
3399
698
        if (ND_IS_MARK1(node))
3400
14
          r = GET_VALUE_NONE;
3401
684
        else {
3402
684
          ND_STATUS_ADD(node, MARK1);
3403
684
          r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3404
684
          ND_STATUS_REMOVE(node, MARK1);
3405
684
        }
3406
698
      }
3407
514
      else {
3408
514
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3409
514
      }
3410
1.21k
    }
3411
1.21k
    break;
3412
3413
682
  case ND_ANCHOR:
3414
888
  case ND_GIMMICK:
3415
888
    r = GET_VALUE_IGNORE;
3416
888
    break;
3417
3418
454
  case ND_ALT:
3419
644
  case ND_BACKREF:
3420
644
  default:
3421
644
    r = GET_VALUE_NONE;
3422
644
    break;
3423
43.8k
  }
3424
3425
43.8k
  return r;
3426
43.8k
}
3427
3428
static int
3429
check_called_node_in_look_behind(Node* node, int not)
3430
20.9k
{
3431
20.9k
  int r;
3432
3433
20.9k
  r = 0;
3434
3435
20.9k
  switch (ND_TYPE(node)) {
3436
2.33k
  case ND_LIST:
3437
3.54k
  case ND_ALT:
3438
14.0k
    do {
3439
14.0k
      r = check_called_node_in_look_behind(ND_CAR(node), not);
3440
14.0k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3441
3.54k
    break;
3442
3443
2.25k
  case ND_QUANT:
3444
2.25k
    r = check_called_node_in_look_behind(ND_BODY(node), not);
3445
2.25k
    break;
3446
3447
2.32k
  case ND_BAG:
3448
2.32k
    {
3449
2.32k
      BagNode* en = BAG_(node);
3450
3451
2.32k
      if (en->type == BAG_MEMORY) {
3452
1.06k
        if (ND_IS_MARK1(node))
3453
0
          return 0;
3454
1.06k
        else {
3455
1.06k
          ND_STATUS_ADD(node, MARK1);
3456
1.06k
          r = check_called_node_in_look_behind(ND_BODY(node), not);
3457
1.06k
          ND_STATUS_REMOVE(node, MARK1);
3458
1.06k
        }
3459
1.06k
      }
3460
1.25k
      else {
3461
1.25k
        r = check_called_node_in_look_behind(ND_BODY(node), not);
3462
1.25k
        if (r == 0 && en->type == BAG_IF_ELSE) {
3463
454
          if (IS_NOT_NULL(en->te.Then)) {
3464
230
            r = check_called_node_in_look_behind(en->te.Then, not);
3465
230
            if (r != 0) break;
3466
230
          }
3467
448
          if (IS_NOT_NULL(en->te.Else)) {
3468
224
            r = check_called_node_in_look_behind(en->te.Else, not);
3469
224
          }
3470
448
        }
3471
1.25k
      }
3472
2.32k
    }
3473
2.31k
    break;
3474
3475
2.31k
  case ND_ANCHOR:
3476
1.97k
    if (IS_NOT_NULL(ND_BODY(node)))
3477
1.11k
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3478
1.97k
    break;
3479
3480
472
  case ND_GIMMICK:
3481
472
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3482
14
      return 1;
3483
458
    break;
3484
3485
10.3k
  default:
3486
10.3k
    break;
3487
20.9k
  }
3488
3489
20.9k
  return r;
3490
20.9k
}
3491
3492
/* allowed node types in look-behind */
3493
#define ALLOWED_TYPE_IN_LB \
3494
184k
  ( ND_BIT_LIST | ND_BIT_ALT | ND_BIT_STRING | ND_BIT_CCLASS \
3495
184k
  | ND_BIT_CTYPE | ND_BIT_ANCHOR | ND_BIT_BAG | ND_BIT_QUANT \
3496
184k
  | ND_BIT_CALL | ND_BIT_BACKREF | ND_BIT_GIMMICK)
3497
3498
184k
#define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3499
184k
#define ALLOWED_BAG_IN_LB_NOT   ( 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3500
3501
#define ALLOWED_ANCHOR_IN_LB \
3502
184k
  ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
3503
184k
  | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
3504
184k
  | ANCR_WORD_BEGIN | ANCR_WORD_END \
3505
184k
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3506
3507
#define ALLOWED_ANCHOR_IN_LB_NOT \
3508
184k
  ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
3509
184k
  | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
3510
184k
  | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
3511
184k
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3512
3513
3514
static int
3515
check_node_in_look_behind(Node* node, int not, int* used)
3516
184k
{
3517
184k
  static unsigned int
3518
184k
    bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT };
3519
3520
184k
  static unsigned int
3521
184k
    anchor_mask[2] = { ALLOWED_ANCHOR_IN_LB, ALLOWED_ANCHOR_IN_LB_NOT };
3522
3523
184k
  NodeType type;
3524
184k
  int r = 0;
3525
3526
184k
  type = ND_TYPE(node);
3527
184k
  if ((ND_TYPE2BIT(type) & ALLOWED_TYPE_IN_LB) == 0)
3528
0
    return 1;
3529
3530
184k
  switch (type) {
3531
34.0k
  case ND_LIST:
3532
38.3k
  case ND_ALT:
3533
128k
    do {
3534
128k
      r = check_node_in_look_behind(ND_CAR(node), not, used);
3535
128k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3536
38.3k
    break;
3537
3538
22.4k
  case ND_QUANT:
3539
22.4k
    r = check_node_in_look_behind(ND_BODY(node), not, used);
3540
22.4k
    break;
3541
3542
10.7k
  case ND_BAG:
3543
10.7k
    {
3544
10.7k
      BagNode* en = BAG_(node);
3545
10.7k
      if (((1<<en->type) & bag_mask[not]) == 0)
3546
6
        return 1;
3547
3548
10.7k
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3549
10.7k
      if (r != 0) break;
3550
3551
10.7k
      if (en->type == BAG_MEMORY) {
3552
4.70k
        if (ND_IS_BACKREF(node) || ND_IS_CALLED(node)
3553
4.70k
         || ND_IS_REFERENCED(node))
3554
1.29k
          *used = TRUE;
3555
4.70k
      }
3556
6.05k
      else if (en->type == BAG_IF_ELSE) {
3557
1.83k
        if (IS_NOT_NULL(en->te.Then)) {
3558
928
          r = check_node_in_look_behind(en->te.Then, not, used);
3559
928
          if (r != 0) break;
3560
928
        }
3561
1.83k
        if (IS_NOT_NULL(en->te.Else)) {
3562
1.31k
          r = check_node_in_look_behind(en->te.Else, not, used);
3563
1.31k
        }
3564
1.83k
      }
3565
10.7k
    }
3566
10.7k
    break;
3567
3568
10.7k
  case ND_ANCHOR:
3569
7.41k
    type = ANCHOR_(node)->type;
3570
7.41k
    if ((type & anchor_mask[not]) == 0)
3571
22
      return 1;
3572
3573
7.39k
    if (IS_NOT_NULL(ND_BODY(node)))
3574
2.79k
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3575
7.39k
    break;
3576
3577
4.38k
  case ND_GIMMICK:
3578
4.38k
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3579
8
      return 1;
3580
3581
4.37k
    {
3582
4.37k
      GimmickNode* g = GIMMICK_(node);
3583
4.37k
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
3584
810
        *used = TRUE;
3585
4.37k
    }
3586
4.37k
    break;
3587
3588
1.39k
  case ND_CALL:
3589
1.39k
    if (ND_IS_RECURSION(node)) {
3590
      /* fix: Issue 38040 in oss-fuzz */
3591
      /* This node should be removed before recursive call check. */
3592
660
      *used = TRUE;
3593
660
    }
3594
732
    else
3595
732
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3596
1.39k
    break;
3597
3598
99.1k
  default:
3599
99.1k
    break;
3600
184k
  }
3601
183k
  return r;
3602
184k
}
3603
3604
static OnigLen
3605
node_min_byte_len(Node* node, ParseEnv* env)
3606
476k
{
3607
476k
  OnigLen len;
3608
476k
  OnigLen tmin;
3609
3610
476k
  len = 0;
3611
476k
  switch (ND_TYPE(node)) {
3612
10.8k
  case ND_BACKREF:
3613
10.8k
    if (! ND_IS_CHECKER(node)) {
3614
10.1k
      int i;
3615
10.1k
      int* backs;
3616
10.1k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3617
10.1k
      BackRefNode* br = BACKREF_(node);
3618
10.1k
      if (ND_IS_RECURSION(node)) break;
3619
3620
8.03k
      backs = BACKREFS_P(br);
3621
8.03k
      len = node_min_byte_len(mem_env[backs[0]].mem_node, env);
3622
14.8k
      for (i = 1; i < br->back_num; i++) {
3623
6.82k
        tmin = node_min_byte_len(mem_env[backs[i]].mem_node, env);
3624
6.82k
        if (len > tmin) len = tmin;
3625
6.82k
      }
3626
8.03k
    }
3627
8.71k
    break;
3628
3629
8.71k
#ifdef USE_CALL
3630
8.71k
  case ND_CALL:
3631
7.61k
    {
3632
7.61k
      Node* t = ND_BODY(node);
3633
7.61k
      if (ND_IS_FIXED_MIN(t))
3634
4.02k
        len = BAG_(t)->min_len;
3635
3.58k
      else
3636
3.58k
        len = node_min_byte_len(t, env);
3637
7.61k
    }
3638
7.61k
    break;
3639
0
#endif
3640
3641
84.0k
  case ND_LIST:
3642
210k
    do {
3643
210k
      tmin = node_min_byte_len(ND_CAR(node), env);
3644
210k
      len = distance_add(len, tmin);
3645
210k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3646
84.0k
    break;
3647
3648
8.86k
  case ND_ALT:
3649
8.86k
    {
3650
8.86k
      Node *x, *y;
3651
8.86k
      y = node;
3652
77.4k
      do {
3653
77.4k
        x = ND_CAR(y);
3654
77.4k
        tmin = node_min_byte_len(x, env);
3655
77.4k
        if (y == node) len = tmin;
3656
68.5k
        else if (len > tmin) len = tmin;
3657
77.4k
      } while (IS_NOT_NULL(y = ND_CDR(y)));
3658
8.86k
    }
3659
8.86k
    break;
3660
3661
98.8k
  case ND_STRING:
3662
98.8k
    {
3663
98.8k
      StrNode* sn = STR_(node);
3664
98.8k
      len = (int )(sn->end - sn->s);
3665
98.8k
    }
3666
98.8k
    break;
3667
3668
29.5k
  case ND_CTYPE:
3669
137k
  case ND_CCLASS:
3670
137k
    len = ONIGENC_MBC_MINLEN(env->enc);
3671
137k
    break;
3672
3673
40.6k
  case ND_QUANT:
3674
40.6k
    {
3675
40.6k
      QuantNode* qn = QUANT_(node);
3676
3677
40.6k
      if (qn->lower > 0) {
3678
22.9k
        len = node_min_byte_len(ND_BODY(node), env);
3679
22.9k
        len = distance_multiply(len, qn->lower);
3680
22.9k
      }
3681
40.6k
    }
3682
40.6k
    break;
3683
3684
57.2k
  case ND_BAG:
3685
57.2k
    {
3686
57.2k
      BagNode* en = BAG_(node);
3687
57.2k
      switch (en->type) {
3688
39.0k
      case BAG_MEMORY:
3689
39.0k
        if (ND_IS_FIXED_MIN(node))
3690
18.2k
          len = en->min_len;
3691
20.8k
        else {
3692
20.8k
          if (ND_IS_MARK1(node))
3693
2.53k
            len = 0;  /* recursive */
3694
18.2k
          else {
3695
18.2k
            ND_STATUS_ADD(node, MARK1);
3696
18.2k
            len = node_min_byte_len(ND_BODY(node), env);
3697
18.2k
            ND_STATUS_REMOVE(node, MARK1);
3698
3699
18.2k
            en->min_len = len;
3700
18.2k
            ND_STATUS_ADD(node, FIXED_MIN);
3701
18.2k
          }
3702
20.8k
        }
3703
39.0k
        break;
3704
3705
515
      case BAG_OPTION:
3706
13.9k
      case BAG_STOP_BACKTRACK:
3707
13.9k
        len = node_min_byte_len(ND_BODY(node), env);
3708
13.9k
        break;
3709
4.23k
      case BAG_IF_ELSE:
3710
4.23k
        {
3711
4.23k
          OnigLen elen;
3712
3713
4.23k
          len = node_min_byte_len(ND_BODY(node), env);
3714
4.23k
          if (IS_NOT_NULL(en->te.Then))
3715
2.88k
            len += node_min_byte_len(en->te.Then, env);
3716
4.23k
          if (IS_NOT_NULL(en->te.Else))
3717
1.92k
            elen = node_min_byte_len(en->te.Else, env);
3718
2.31k
          else elen = 0;
3719
3720
4.23k
          if (elen < len) len = elen;
3721
4.23k
        }
3722
4.23k
        break;
3723
57.2k
      }
3724
57.2k
    }
3725
57.2k
    break;
3726
3727
57.2k
  case ND_GIMMICK:
3728
18.8k
    {
3729
18.8k
      GimmickNode* g = GIMMICK_(node);
3730
18.8k
      if (g->type == GIMMICK_FAIL) {
3731
4.53k
        len = INFINITE_LEN;
3732
4.53k
        break;
3733
4.53k
      }
3734
18.8k
    }
3735
    /* fall */
3736
26.1k
  case ND_ANCHOR:
3737
26.1k
  default:
3738
26.1k
    break;
3739
476k
  }
3740
3741
476k
  return len;
3742
476k
}
3743
3744
static int
3745
check_backrefs(Node* node, ParseEnv* env)
3746
865k
{
3747
865k
  int r;
3748
3749
865k
  switch (ND_TYPE(node)) {
3750
188k
  case ND_LIST:
3751
204k
  case ND_ALT:
3752
679k
    do {
3753
679k
      r = check_backrefs(ND_CAR(node), env);
3754
679k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3755
204k
    break;
3756
3757
29.7k
  case ND_ANCHOR:
3758
29.7k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3759
22.8k
      r = 0;
3760
22.8k
      break;
3761
22.8k
    }
3762
    /* fall */
3763
85.0k
  case ND_QUANT:
3764
85.0k
    r = check_backrefs(ND_BODY(node), env);
3765
85.0k
    break;
3766
3767
55.0k
  case ND_BAG:
3768
55.0k
    r = check_backrefs(ND_BODY(node), env);
3769
55.0k
    {
3770
55.0k
      BagNode* en = BAG_(node);
3771
3772
55.0k
      if (en->type == BAG_IF_ELSE) {
3773
6.46k
        if (r != 0) return r;
3774
6.45k
        if (IS_NOT_NULL(en->te.Then)) {
3775
2.74k
          r = check_backrefs(en->te.Then, env);
3776
2.74k
          if (r != 0) return r;
3777
2.74k
        }
3778
6.44k
        if (IS_NOT_NULL(en->te.Else)) {
3779
4.20k
          r = check_backrefs(en->te.Else, env);
3780
4.20k
        }
3781
6.44k
      }
3782
55.0k
    }
3783
55.0k
    break;
3784
3785
55.0k
  case ND_BACKREF:
3786
10.4k
    {
3787
10.4k
      int i;
3788
10.4k
      BackRefNode* br = BACKREF_(node);
3789
10.4k
      int* backs = BACKREFS_P(br);
3790
10.4k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3791
3792
25.9k
      for (i = 0; i < br->back_num; i++) {
3793
15.8k
        if (backs[i] > env->num_mem)
3794
314
          return ONIGERR_INVALID_BACKREF;
3795
3796
15.5k
        ND_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
3797
15.5k
      }
3798
10.1k
      r = 0;
3799
10.1k
    }
3800
0
    break;
3801
3802
488k
  default:
3803
488k
    r = 0;
3804
488k
    break;
3805
865k
  }
3806
3807
865k
  return r;
3808
865k
}
3809
3810
static int
3811
set_empty_repeat_node_trav(Node* node, Node* empty, ParseEnv* env)
3812
160k
{
3813
160k
  int r;
3814
3815
160k
  switch (ND_TYPE(node)) {
3816
24.3k
  case ND_LIST:
3817
28.9k
  case ND_ALT:
3818
110k
    do {
3819
110k
      r = set_empty_repeat_node_trav(ND_CAR(node), empty, env);
3820
110k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3821
28.9k
    break;
3822
3823
6.13k
  case ND_ANCHOR:
3824
6.13k
    {
3825
6.13k
      AnchorNode* an = ANCHOR_(node);
3826
3827
6.13k
      if (! ANCHOR_HAS_BODY(an)) {
3828
2.51k
        r = 0;
3829
2.51k
        break;
3830
2.51k
      }
3831
3832
3.62k
      switch (an->type) {
3833
896
      case ANCR_PREC_READ:
3834
2.06k
      case ANCR_LOOK_BEHIND:
3835
2.06k
        empty = NULL_NODE;
3836
2.06k
        break;
3837
1.55k
      default:
3838
1.55k
        break;
3839
3.62k
      }
3840
3.62k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3841
3.62k
    }
3842
0
    break;
3843
3844
14.8k
  case ND_QUANT:
3845
14.8k
    {
3846
14.8k
      QuantNode* qn = QUANT_(node);
3847
3848
14.8k
      if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
3849
14.8k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3850
14.8k
    }
3851
14.8k
    break;
3852
3853
24.3k
  case ND_BAG:
3854
24.3k
    if (IS_NOT_NULL(ND_BODY(node))) {
3855
24.3k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3856
24.3k
      if (r != 0) return r;
3857
24.3k
    }
3858
24.3k
    {
3859
24.3k
      BagNode* en = BAG_(node);
3860
3861
24.3k
      r = 0;
3862
24.3k
      if (en->type == BAG_MEMORY) {
3863
19.3k
        if (ND_IS_BACKREF(node)) {
3864
9.62k
          if (IS_NOT_NULL(empty))
3865
4.19k
            PARSEENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
3866
9.62k
        }
3867
19.3k
      }
3868
4.96k
      else if (en->type == BAG_IF_ELSE) {
3869
1.63k
        if (IS_NOT_NULL(en->te.Then)) {
3870
842
          r = set_empty_repeat_node_trav(en->te.Then, empty, env);
3871
842
          if (r != 0) return r;
3872
842
        }
3873
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3874
930
          r = set_empty_repeat_node_trav(en->te.Else, empty, env);
3875
930
        }
3876
1.63k
      }
3877
24.3k
    }
3878
24.3k
    break;
3879
3880
86.0k
  default:
3881
86.0k
    r = 0;
3882
86.0k
    break;
3883
160k
  }
3884
3885
160k
  return r;
3886
160k
}
3887
3888
static int
3889
is_ancestor_node(Node* node, Node* me)
3890
5.01k
{
3891
5.01k
  Node* parent;
3892
3893
11.9k
  while ((parent = ND_PARENT(me)) != NULL_NODE) {
3894
6.95k
    if (parent == node) return 1;
3895
6.89k
    me = parent;
3896
6.89k
  }
3897
4.94k
  return 0;
3898
5.01k
}
3899
3900
static void
3901
set_empty_status_check_trav(Node* node, ParseEnv* env)
3902
160k
{
3903
160k
  switch (ND_TYPE(node)) {
3904
24.3k
  case ND_LIST:
3905
28.9k
  case ND_ALT:
3906
110k
    do {
3907
110k
      set_empty_status_check_trav(ND_CAR(node), env);
3908
110k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3909
28.9k
    break;
3910
3911
6.13k
  case ND_ANCHOR:
3912
6.13k
    {
3913
6.13k
      AnchorNode* an = ANCHOR_(node);
3914
3915
6.13k
      if (! ANCHOR_HAS_BODY(an)) break;
3916
3.62k
      set_empty_status_check_trav(ND_BODY(node), env);
3917
3.62k
    }
3918
0
    break;
3919
3920
14.8k
  case ND_QUANT:
3921
14.8k
    set_empty_status_check_trav(ND_BODY(node), env);
3922
14.8k
    break;
3923
3924
24.3k
  case ND_BAG:
3925
24.3k
    if (IS_NOT_NULL(ND_BODY(node)))
3926
24.3k
      set_empty_status_check_trav(ND_BODY(node), env);
3927
24.3k
    {
3928
24.3k
      BagNode* en = BAG_(node);
3929
3930
24.3k
      if (en->type == BAG_IF_ELSE) {
3931
1.63k
        if (IS_NOT_NULL(en->te.Then)) {
3932
842
          set_empty_status_check_trav(en->te.Then, env);
3933
842
        }
3934
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3935
930
          set_empty_status_check_trav(en->te.Else, env);
3936
930
        }
3937
1.63k
      }
3938
24.3k
    }
3939
24.3k
    break;
3940
3941
9.84k
  case ND_BACKREF:
3942
9.84k
    {
3943
9.84k
      int i;
3944
9.84k
      int* backs;
3945
9.84k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3946
9.84k
      BackRefNode* br = BACKREF_(node);
3947
9.84k
      backs = BACKREFS_P(br);
3948
24.9k
      for (i = 0; i < br->back_num; i++) {
3949
15.1k
        Node* ernode = mem_env[backs[i]].empty_repeat_node;
3950
15.1k
        if (IS_NOT_NULL(ernode)) {
3951
5.01k
          if (! is_ancestor_node(ernode, node)) {
3952
4.94k
            MEM_STATUS_LIMIT_ON(QUANT_(ernode)->empty_status_mem, backs[i]);
3953
4.94k
            ND_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
3954
4.94k
            ND_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
3955
4.94k
          }
3956
5.01k
        }
3957
15.1k
      }
3958
9.84k
    }
3959
9.84k
    break;
3960
3961
76.1k
  default:
3962
76.1k
    break;
3963
160k
  }
3964
160k
}
3965
3966
static void
3967
set_parent_node_trav(Node* node, Node* parent)
3968
160k
{
3969
160k
  ND_PARENT(node) = parent;
3970
3971
160k
  switch (ND_TYPE(node)) {
3972
24.3k
  case ND_LIST:
3973
28.9k
  case ND_ALT:
3974
110k
    do {
3975
110k
      set_parent_node_trav(ND_CAR(node), node);
3976
110k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3977
28.9k
    break;
3978
3979
6.13k
  case ND_ANCHOR:
3980
6.13k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
3981
3.62k
    set_parent_node_trav(ND_BODY(node), node);
3982
3.62k
    break;
3983
3984
14.8k
  case ND_QUANT:
3985
14.8k
    set_parent_node_trav(ND_BODY(node), node);
3986
14.8k
    break;
3987
3988
24.3k
  case ND_BAG:
3989
24.3k
    if (IS_NOT_NULL(ND_BODY(node)))
3990
24.3k
      set_parent_node_trav(ND_BODY(node), node);
3991
24.3k
    {
3992
24.3k
      BagNode* en = BAG_(node);
3993
3994
24.3k
      if (en->type == BAG_IF_ELSE) {
3995
1.63k
        if (IS_NOT_NULL(en->te.Then))
3996
842
          set_parent_node_trav(en->te.Then, node);
3997
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3998
930
          set_parent_node_trav(en->te.Else, node);
3999
930
        }
4000
1.63k
      }
4001
24.3k
    }
4002
24.3k
    break;
4003
4004
86.0k
  default:
4005
86.0k
    break;
4006
160k
  }
4007
160k
}
4008
4009
4010
#ifdef USE_CALL
4011
4012
54.5k
#define RECURSION_EXIST        (1<<0)
4013
64.0k
#define RECURSION_MUST         (1<<1)
4014
215k
#define RECURSION_INFINITE     (1<<2)
4015
4016
static int
4017
infinite_recursive_call_check(Node* node, ParseEnv* env, int head)
4018
329k
{
4019
329k
  int ret;
4020
329k
  int r = 0;
4021
4022
329k
  switch (ND_TYPE(node)) {
4023
46.1k
  case ND_LIST:
4024
46.1k
    {
4025
46.1k
      Node *x;
4026
46.1k
      OnigLen min;
4027
4028
46.1k
      x = node;
4029
172k
      do {
4030
172k
        ret = infinite_recursive_call_check(ND_CAR(x), env, head);
4031
172k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4032
172k
        r |= ret;
4033
172k
        if (head != 0) {
4034
16.7k
          min = node_min_byte_len(ND_CAR(x), env);
4035
16.7k
          if (min != 0) head = 0;
4036
16.7k
        }
4037
172k
      } while (IS_NOT_NULL(x = ND_CDR(x)));
4038
46.1k
    }
4039
46.0k
    break;
4040
4041
46.0k
  case ND_ALT:
4042
11.5k
    {
4043
11.5k
      int must;
4044
4045
11.5k
      must = RECURSION_MUST;
4046
31.7k
      do {
4047
31.7k
        ret = infinite_recursive_call_check(ND_CAR(node), env, head);
4048
31.7k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4049
4050
31.7k
        r    |= (ret & RECURSION_EXIST);
4051
31.7k
        must &= ret;
4052
31.7k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4053
11.4k
      r |= must;
4054
11.4k
    }
4055
0
    break;
4056
4057
21.3k
  case ND_QUANT:
4058
21.3k
    if (QUANT_(node)->upper == 0) break;
4059
4060
20.8k
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4061
20.8k
    if (r < 0) return r;
4062
20.8k
    if ((r & RECURSION_MUST) != 0) {
4063
5.14k
      if (QUANT_(node)->lower == 0)
4064
2.41k
        r &= ~RECURSION_MUST;
4065
5.14k
    }
4066
20.8k
    break;
4067
4068
10.7k
  case ND_ANCHOR:
4069
10.7k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
4070
5.10k
      break;
4071
    /* fall */
4072
50.3k
  case ND_CALL:
4073
50.3k
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4074
50.3k
    break;
4075
4076
84.8k
  case ND_BAG:
4077
84.8k
    {
4078
84.8k
      BagNode* en = BAG_(node);
4079
4080
84.8k
      if (en->type == BAG_MEMORY) {
4081
78.0k
        if (ND_IS_MARK2(node))
4082
17.2k
          return 0;
4083
60.8k
        else if (ND_IS_MARK1(node))
4084
21.0k
          return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
4085
21.0k
                  : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
4086
39.7k
        else {
4087
39.7k
          ND_STATUS_ADD(node, MARK2);
4088
39.7k
          r = infinite_recursive_call_check(ND_BODY(node), env, head);
4089
39.7k
          ND_STATUS_REMOVE(node, MARK2);
4090
39.7k
        }
4091
78.0k
      }
4092
6.80k
      else if (en->type == BAG_IF_ELSE) {
4093
3.46k
        int eret;
4094
4095
3.46k
        ret = infinite_recursive_call_check(ND_BODY(node), env, head);
4096
3.46k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4097
3.45k
        r |= ret;
4098
3.45k
        if (IS_NOT_NULL(en->te.Then)) {
4099
2.11k
          OnigLen min;
4100
2.11k
          if (head != 0) {
4101
1.01k
            min = node_min_byte_len(ND_BODY(node), env);
4102
1.01k
          }
4103
1.09k
          else min = 0;
4104
4105
2.11k
          ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
4106
2.11k
          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4107
2.10k
          r |= ret;
4108
2.10k
        }
4109
3.44k
        if (IS_NOT_NULL(en->te.Else)) {
4110
1.79k
          eret = infinite_recursive_call_check(en->te.Else, env, head);
4111
1.79k
          if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
4112
1.79k
          r |= (eret & RECURSION_EXIST);
4113
1.79k
          if ((eret & RECURSION_MUST) == 0)
4114
1.44k
            r &= ~RECURSION_MUST;
4115
1.79k
        }
4116
1.64k
        else {
4117
1.64k
          r &= ~RECURSION_MUST;
4118
1.64k
        }
4119
3.44k
      }
4120
3.34k
      else {
4121
3.34k
        r = infinite_recursive_call_check(ND_BODY(node), env, head);
4122
3.34k
      }
4123
84.8k
    }
4124
46.5k
    break;
4125
4126
110k
  default:
4127
110k
    break;
4128
329k
  }
4129
4130
291k
  return r;
4131
329k
}
4132
4133
static int
4134
infinite_recursive_call_check_trav(Node* node, ParseEnv* env)
4135
125k
{
4136
125k
  int r;
4137
4138
125k
  switch (ND_TYPE(node)) {
4139
18.8k
  case ND_LIST:
4140
23.4k
  case ND_ALT:
4141
86.8k
    do {
4142
86.8k
      r = infinite_recursive_call_check_trav(ND_CAR(node), env);
4143
86.8k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4144
23.4k
    break;
4145
4146
5.19k
  case ND_ANCHOR:
4147
5.19k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4148
2.72k
      r = 0;
4149
2.72k
      break;
4150
2.72k
    }
4151
    /* fall */
4152
15.9k
  case ND_QUANT:
4153
15.9k
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4154
15.9k
    break;
4155
4156
18.2k
  case ND_BAG:
4157
18.2k
    {
4158
18.2k
      BagNode* en = BAG_(node);
4159
4160
18.2k
      if (en->type == BAG_MEMORY) {
4161
13.8k
        if (ND_IS_RECURSION(node) && ND_IS_CALLED(node)) {
4162
3.28k
          int ret;
4163
4164
3.28k
          ND_STATUS_ADD(node, MARK1);
4165
4166
3.28k
          ret = infinite_recursive_call_check(ND_BODY(node), env, 1);
4167
3.28k
          if (ret < 0) return ret;
4168
3.28k
          else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
4169
228
            return ONIGERR_NEVER_ENDING_RECURSION;
4170
4171
3.05k
          ND_STATUS_REMOVE(node, MARK1);
4172
3.05k
        }
4173
13.8k
      }
4174
4.43k
      else if (en->type == BAG_IF_ELSE) {
4175
1.38k
        if (IS_NOT_NULL(en->te.Then)) {
4176
616
          r = infinite_recursive_call_check_trav(en->te.Then, env);
4177
616
          if (r != 0) return r;
4178
616
        }
4179
1.36k
        if (IS_NOT_NULL(en->te.Else)) {
4180
864
          r = infinite_recursive_call_check_trav(en->te.Else, env);
4181
864
          if (r != 0) return r;
4182
864
        }
4183
1.36k
      }
4184
18.2k
    }
4185
4186
18.0k
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4187
18.0k
    break;
4188
4189
64.9k
  default:
4190
64.9k
    r = 0;
4191
64.9k
    break;
4192
125k
  }
4193
4194
125k
  return r;
4195
125k
}
4196
4197
static int
4198
recursive_call_check(Node* node)
4199
477k
{
4200
477k
  int r;
4201
4202
477k
  switch (ND_TYPE(node)) {
4203
63.6k
  case ND_LIST:
4204
79.2k
  case ND_ALT:
4205
79.2k
    r = 0;
4206
291k
    do {
4207
291k
      r |= recursive_call_check(ND_CAR(node));
4208
291k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4209
79.2k
    break;
4210
4211
16.5k
  case ND_ANCHOR:
4212
16.5k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4213
7.66k
      r = 0;
4214
7.66k
      break;
4215
7.66k
    }
4216
    /* fall */
4217
47.2k
  case ND_QUANT:
4218
47.2k
    r = recursive_call_check(ND_BODY(node));
4219
47.2k
    break;
4220
4221
55.2k
  case ND_CALL:
4222
55.2k
    r = recursive_call_check(ND_BODY(node));
4223
55.2k
    if (r != 0) {
4224
32.2k
      if (ND_IS_MARK1(ND_BODY(node)))
4225
13.4k
        ND_STATUS_ADD(node, RECURSION);
4226
32.2k
    }
4227
55.2k
    break;
4228
4229
115k
  case ND_BAG:
4230
115k
    {
4231
115k
      BagNode* en = BAG_(node);
4232
4233
115k
      if (en->type == BAG_MEMORY) {
4234
105k
        if (ND_IS_MARK2(node))
4235
22.7k
          return 0;
4236
82.5k
        else if (ND_IS_MARK1(node))
4237
24.4k
          return 1; /* recursion */
4238
58.0k
        else {
4239
58.0k
          ND_STATUS_ADD(node, MARK2);
4240
58.0k
          r = recursive_call_check(ND_BODY(node));
4241
58.0k
          ND_STATUS_REMOVE(node, MARK2);
4242
58.0k
        }
4243
105k
      }
4244
10.4k
      else if (en->type == BAG_IF_ELSE) {
4245
4.81k
        r = 0;
4246
4.81k
        if (IS_NOT_NULL(en->te.Then)) {
4247
2.71k
          r |= recursive_call_check(en->te.Then);
4248
2.71k
        }
4249
4.81k
        if (IS_NOT_NULL(en->te.Else)) {
4250
2.65k
          r |= recursive_call_check(en->te.Else);
4251
2.65k
        }
4252
4.81k
        r |= recursive_call_check(ND_BODY(node));
4253
4.81k
      }
4254
5.59k
      else {
4255
5.59k
        r = recursive_call_check(ND_BODY(node));
4256
5.59k
      }
4257
115k
    }
4258
68.4k
    break;
4259
4260
172k
  default:
4261
172k
    r = 0;
4262
172k
    break;
4263
477k
  }
4264
4265
429k
  return r;
4266
477k
}
4267
4268
13.9k
#define IN_RECURSION         (1<<0)
4269
124k
#define FOUND_CALLED_NODE    1
4270
4271
static int
4272
recursive_call_check_trav(Node* node, ParseEnv* env, int state)
4273
133k
{
4274
133k
  int r = 0;
4275
4276
133k
  switch (ND_TYPE(node)) {
4277
20.8k
  case ND_LIST:
4278
25.6k
  case ND_ALT:
4279
25.6k
    {
4280
25.6k
      int ret;
4281
93.5k
      do {
4282
93.5k
        ret = recursive_call_check_trav(ND_CAR(node), env, state);
4283
93.5k
        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
4284
89.7k
        else if (ret < 0) return ret;
4285
93.5k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4286
25.6k
    }
4287
25.6k
    break;
4288
4289
25.6k
  case ND_QUANT:
4290
13.6k
    r = recursive_call_check_trav(ND_BODY(node), env, state);
4291
13.6k
    if (QUANT_(node)->upper == 0) {
4292
706
      if (r == FOUND_CALLED_NODE)
4293
156
        QUANT_(node)->include_referred = 1;
4294
706
    }
4295
13.6k
    break;
4296
4297
5.27k
  case ND_ANCHOR:
4298
5.27k
    {
4299
5.27k
      AnchorNode* an = ANCHOR_(node);
4300
5.27k
      if (ANCHOR_HAS_BODY(an))
4301
2.46k
        r = recursive_call_check_trav(ND_ANCHOR_BODY(an), env, state);
4302
5.27k
    }
4303
5.27k
    break;
4304
4305
18.6k
  case ND_BAG:
4306
18.6k
    {
4307
18.6k
      int ret;
4308
18.6k
      int state1;
4309
18.6k
      BagNode* en = BAG_(node);
4310
4311
18.6k
      if (en->type == BAG_MEMORY) {
4312
13.9k
        if (ND_IS_CALLED(node)) {
4313
4.70k
          r = FOUND_CALLED_NODE;
4314
4.70k
          goto check_recursion;
4315
4.70k
        }
4316
9.28k
        else if ((state & IN_RECURSION) != 0) {
4317
9.29k
        check_recursion:
4318
9.29k
          if (! ND_IS_RECURSION(node)) {
4319
9.29k
            ND_STATUS_ADD(node, MARK1);
4320
9.29k
            ret = recursive_call_check(ND_BODY(node));
4321
9.29k
            if (ret != 0) {
4322
4.68k
              ND_STATUS_ADD(node, RECURSION);
4323
4.68k
              MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
4324
4.68k
            }
4325
9.29k
            ND_STATUS_REMOVE(node, MARK1);
4326
9.29k
          }
4327
9.29k
        }
4328
13.9k
      }
4329
4330
18.6k
      state1 = state;
4331
18.6k
      if (ND_IS_RECURSION(node))
4332
4.68k
        state1 |= IN_RECURSION;
4333
4334
18.6k
      ret = recursive_call_check_trav(ND_BODY(node), env, state1);
4335
18.6k
      if (ret == FOUND_CALLED_NODE)
4336
1.84k
        r = FOUND_CALLED_NODE;
4337
4338
18.6k
      if (en->type == BAG_IF_ELSE) {
4339
1.49k
        if (IS_NOT_NULL(en->te.Then)) {
4340
658
          ret = recursive_call_check_trav(en->te.Then, env, state1);
4341
658
          if (ret == FOUND_CALLED_NODE)
4342
24
            r = FOUND_CALLED_NODE;
4343
658
        }
4344
1.49k
        if (IS_NOT_NULL(en->te.Else)) {
4345
940
          ret = recursive_call_check_trav(en->te.Else, env, state1);
4346
940
          if (ret == FOUND_CALLED_NODE)
4347
34
            r = FOUND_CALLED_NODE;
4348
940
        }
4349
1.49k
      }
4350
18.6k
    }
4351
0
    break;
4352
4353
69.8k
  default:
4354
69.8k
    break;
4355
133k
  }
4356
4357
133k
  return r;
4358
133k
}
4359
4360
#endif
4361
4362
static void
4363
remove_from_list(Node* prev, Node* a)
4364
6.02k
{
4365
6.02k
  if (ND_CDR(prev) != a) return ;
4366
4367
6.02k
  ND_CDR(prev) = ND_CDR(a);
4368
6.02k
  ND_CDR(a) = NULL_NODE;
4369
6.02k
}
4370
4371
static int
4372
reduce_string_list(Node* node, OnigEncoding enc)
4373
776k
{
4374
776k
  int r = 0;
4375
4376
776k
  switch (ND_TYPE(node)) {
4377
188k
  case ND_LIST:
4378
188k
    {
4379
188k
      Node* prev;
4380
188k
      Node* curr;
4381
188k
      Node* prev_node;
4382
188k
      Node* next_node;
4383
4384
188k
      prev = NULL_NODE;
4385
519k
      do {
4386
519k
        next_node = ND_CDR(node);
4387
519k
        curr = ND_CAR(node);
4388
519k
        if (ND_TYPE(curr) == ND_STRING) {
4389
100k
          if (IS_NULL(prev)
4390
100k
              || STR_(curr)->flag  != STR_(prev)->flag
4391
100k
              || ND_STATUS(curr) != ND_STATUS(prev)) {
4392
94.0k
            prev = curr;
4393
94.0k
            prev_node = node;
4394
94.0k
          }
4395
6.02k
          else {
4396
6.02k
            r = node_str_node_cat(prev, curr);
4397
6.02k
            if (r != 0) return r;
4398
6.02k
            remove_from_list(prev_node, node);
4399
6.02k
            onig_node_free(node);
4400
6.02k
          }
4401
100k
        }
4402
419k
        else {
4403
419k
          if (IS_NOT_NULL(prev)) {
4404
42.8k
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4405
42.8k
            StrNode* sn = STR_(prev);
4406
42.8k
            if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4407
4
              return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4408
42.8k
#endif
4409
42.8k
            prev = NULL_NODE;
4410
42.8k
          }
4411
419k
          r = reduce_string_list(curr, enc);
4412
419k
          if (r != 0) return r;
4413
419k
          prev_node = node;
4414
419k
        }
4415
4416
519k
        node = next_node;
4417
519k
      } while (r == 0 && IS_NOT_NULL(node));
4418
4419
188k
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4420
188k
      if (IS_NOT_NULL(prev)) {
4421
51.1k
        StrNode* sn = STR_(prev);
4422
51.1k
        if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4423
30
          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4424
51.1k
      }
4425
188k
#endif
4426
188k
    }
4427
188k
    break;
4428
4429
188k
  case ND_ALT:
4430
169k
    do {
4431
169k
      r = reduce_string_list(ND_CAR(node), enc);
4432
169k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4433
16.0k
    break;
4434
4435
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4436
78.8k
  case ND_STRING:
4437
78.8k
    {
4438
78.8k
      StrNode* sn = STR_(node);
4439
78.8k
      if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4440
138
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4441
78.8k
    }
4442
78.7k
    break;
4443
78.7k
#endif
4444
4445
78.7k
  case ND_ANCHOR:
4446
29.8k
    if (IS_NULL(ND_BODY(node)))
4447
23.0k
      break;
4448
    /* fall */
4449
85.4k
  case ND_QUANT:
4450
85.4k
    r = reduce_string_list(ND_BODY(node), enc);
4451
85.4k
    break;
4452
4453
56.4k
  case ND_BAG:
4454
56.4k
    {
4455
56.4k
      BagNode* en = BAG_(node);
4456
4457
56.4k
      r = reduce_string_list(ND_BODY(node), enc);
4458
56.4k
      if (r != 0) return r;
4459
4460
56.3k
      if (en->type == BAG_IF_ELSE) {
4461
6.51k
        if (IS_NOT_NULL(en->te.Then)) {
4462
2.78k
          r = reduce_string_list(en->te.Then, enc);
4463
2.78k
          if (r != 0) return r;
4464
2.78k
        }
4465
6.50k
        if (IS_NOT_NULL(en->te.Else)) {
4466
4.22k
          r = reduce_string_list(en->te.Else, enc);
4467
4.22k
          if (r != 0) return r;
4468
4.22k
        }
4469
6.50k
      }
4470
56.3k
    }
4471
56.3k
    break;
4472
4473
328k
  default:
4474
328k
    break;
4475
776k
  }
4476
4477
776k
  return r;
4478
776k
}
4479
4480
4481
261k
#define IN_ALT          (1<<0)
4482
46.4k
#define IN_NOT          (1<<1)
4483
312k
#define IN_REAL_REPEAT  (1<<2)
4484
160k
#define IN_VAR_REPEAT   (1<<3)
4485
22.8k
#define IN_ZERO_REPEAT  (1<<4)
4486
124k
#define IN_MULTI_ENTRY  (1<<5)
4487
87.9k
#define IN_PREC_READ    (1<<6)
4488
164k
#define IN_LOOK_BEHIND  (1<<7)
4489
188k
#define IN_PEEK         (1<<8)
4490
4491
/* divide different length alternatives in look-behind.
4492
  (?<=A|B) ==> (?<=A)|(?<=B)
4493
  (?<!A|B) ==> (?<!A)(?<!B)
4494
*/
4495
static int
4496
divide_look_behind_alternatives(Node* node)
4497
1.38k
{
4498
1.38k
  int r;
4499
1.38k
  int anc_type;
4500
1.38k
  Node *head, *np, *insert_node;
4501
1.38k
  AnchorNode* an;
4502
4503
1.38k
  an = ANCHOR_(node);
4504
1.38k
  anc_type = an->type;
4505
4506
1.38k
  head = ND_ANCHOR_BODY(an);
4507
1.38k
  np = ND_CAR(head);
4508
1.38k
  node_swap(node, head);
4509
1.38k
  ND_CAR(node) = head;
4510
1.38k
  ND_BODY(head) = np;
4511
4512
1.38k
  np = node;
4513
11.2k
  while (IS_NOT_NULL(np = ND_CDR(np))) {
4514
9.86k
    r = onig_node_copy(&insert_node, head);
4515
9.86k
    if (r != 0) return r;
4516
9.86k
    CHECK_NULL_RETURN_MEMERR(insert_node);
4517
9.86k
    ND_BODY(insert_node) = ND_CAR(np);
4518
9.86k
    ND_CAR(np) = insert_node;
4519
9.86k
  }
4520
4521
1.38k
  if (anc_type == ANCR_LOOK_BEHIND_NOT) {
4522
420
    np = node;
4523
5.54k
    do {
4524
5.54k
      ND_SET_TYPE(np, ND_LIST);  /* alt -> list */
4525
5.54k
    } while (IS_NOT_NULL(np = ND_CDR(np)));
4526
420
  }
4527
1.38k
  return 0;
4528
1.38k
}
4529
4530
static int
4531
node_reduce_in_look_behind(Node* node)
4532
23.1k
{
4533
23.1k
  NodeType type;
4534
23.1k
  Node* body;
4535
4536
23.1k
  if (ND_TYPE(node) != ND_QUANT) return 0;
4537
4538
4.86k
  body = ND_BODY(node);
4539
4.86k
  type = ND_TYPE(body);
4540
4.86k
  if (type == ND_STRING || type == ND_CTYPE ||
4541
4.86k
      type == ND_CCLASS || type == ND_BACKREF) {
4542
3.86k
    QuantNode* qn = QUANT_(node);
4543
3.86k
    qn->upper = qn->lower;
4544
3.86k
    if (qn->upper == 0)
4545
1.61k
      return 1; /* removed */
4546
3.86k
  }
4547
4548
3.24k
  return 0;
4549
4.86k
}
4550
4551
static int
4552
list_reduce_in_look_behind(Node* node)
4553
29.7k
{
4554
29.7k
  int r;
4555
4556
29.7k
  switch (ND_TYPE(node)) {
4557
2.46k
  case ND_QUANT:
4558
2.46k
    r = node_reduce_in_look_behind(node);
4559
2.46k
    if (r > 0) r = 0;
4560
2.46k
    break;
4561
4562
19.0k
  case ND_LIST:
4563
20.6k
    do {
4564
20.6k
      r = node_reduce_in_look_behind(ND_CAR(node));
4565
20.6k
      if (r <= 0) break;
4566
20.6k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4567
0
    break;
4568
4569
8.24k
  default:
4570
8.24k
    r = 0;
4571
8.24k
    break;
4572
29.7k
  }
4573
4574
29.7k
  return r;
4575
29.7k
}
4576
4577
static int
4578
alt_reduce_in_look_behind(Node* node, regex_t* reg, ParseEnv* env)
4579
17.1k
{
4580
17.1k
  int r;
4581
4582
17.1k
  switch (ND_TYPE(node)) {
4583
2.06k
  case ND_ALT:
4584
14.7k
    do {
4585
14.7k
      r = list_reduce_in_look_behind(ND_CAR(node));
4586
14.7k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4587
2.06k
    break;
4588
4589
15.0k
  default:
4590
15.0k
    r = list_reduce_in_look_behind(node);
4591
15.0k
    break;
4592
17.1k
  }
4593
4594
17.1k
  return r;
4595
17.1k
}
4596
4597
static int tune_tree(Node* node, regex_t* reg, int state, ParseEnv* env);
4598
4599
static int
4600
tune_look_behind(Node* node, regex_t* reg, int state, ParseEnv* env)
4601
17.1k
{
4602
17.1k
  int r;
4603
17.1k
  int state1;
4604
17.1k
  int used;
4605
17.1k
  MinMaxCharLen ci;
4606
17.1k
  Node* body;
4607
17.1k
  AnchorNode* an = ANCHOR_(node);
4608
4609
17.1k
  used = FALSE;
4610
17.1k
  r = check_node_in_look_behind(ND_ANCHOR_BODY(an),
4611
17.1k
                                an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0,
4612
17.1k
                                &used);
4613
17.1k
  if (r < 0) return r;
4614
17.1k
  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4615
4616
17.1k
  if (an->type == ANCR_LOOK_BEHIND_NOT)
4617
7.18k
    state1 = state | IN_NOT | IN_LOOK_BEHIND;
4618
9.96k
  else
4619
9.96k
    state1 = state | IN_LOOK_BEHIND;
4620
4621
17.1k
  body = ND_ANCHOR_BODY(an);
4622
  /* Execute tune_tree(body) before call node_char_len().
4623
     Because case-fold expansion must be done before node_char_len().
4624
   */
4625
17.1k
  r = tune_tree(body, reg, state1, env);
4626
17.1k
  if (r != 0) return r;
4627
4628
17.1k
  r = alt_reduce_in_look_behind(body, reg, env);
4629
17.1k
  if (r != 0) return r;
4630
4631
17.1k
  r = node_char_len(body, reg, &ci, env);
4632
17.1k
  if (r >= 0) {
4633
    /* #177: overflow in onigenc_step_back() */
4634
17.0k
    if ((ci.max != INFINITE_LEN && ci.max > LOOK_BEHIND_MAX_CHAR_LEN)
4635
17.0k
      || ci.min > LOOK_BEHIND_MAX_CHAR_LEN) {
4636
430
      return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4637
430
    }
4638
4639
16.6k
    if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) {
4640
1.35k
      if (an->type == ANCR_LOOK_BEHIND_NOT)
4641
834
        r = onig_node_reset_fail(node);
4642
522
      else
4643
522
        r = onig_node_reset_empty(node);
4644
4645
1.35k
      return r;
4646
1.35k
    }
4647
4648
15.2k
    if (r == CHAR_LEN_TOP_ALT_FIXED) {
4649
1.40k
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) {
4650
1.38k
        r = divide_look_behind_alternatives(node);
4651
1.38k
        if (r == 0)
4652
1.38k
          r = tune_tree(node, reg, state, env);
4653
1.38k
      }
4654
16
      else if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND))
4655
0
        goto normal;
4656
16
      else
4657
16
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4658
1.40k
    }
4659
13.8k
    else { /* CHAR_LEN_NORMAL */
4660
13.8k
    normal:
4661
13.8k
      if (ci.min == INFINITE_LEN) {
4662
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4663
0
      }
4664
13.8k
      else {
4665
13.8k
        if (ci.min != ci.max &&
4666
13.8k
            ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND)) {
4667
52
          r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4668
52
        }
4669
13.8k
        else {
4670
13.8k
          Node* tail;
4671
4672
          /* check lead_node is already set by double call after
4673
             divide_look_behind_alternatives() */
4674
13.8k
          if (IS_NULL(an->lead_node)) {
4675
12.6k
            an->char_min_len = ci.min;
4676
12.6k
            an->char_max_len = ci.max;
4677
12.6k
            r = get_tree_tail_literal(body, &tail, reg, 0);
4678
12.6k
            if (r == GET_VALUE_FOUND) {
4679
9.87k
              r = onig_node_copy(&(an->lead_node), tail);
4680
9.87k
              if (r != 0) return r;
4681
9.87k
            }
4682
12.6k
          }
4683
13.8k
          r = ONIG_NORMAL;
4684
13.8k
        }
4685
13.8k
      }
4686
13.8k
    }
4687
15.2k
  }
4688
4689
15.3k
  return r;
4690
17.1k
}
4691
4692
static int
4693
tune_next(Node* node, Node* next_node, regex_t* reg)
4694
350k
{
4695
350k
  int called;
4696
350k
  NodeType type;
4697
4698
350k
  called = FALSE;
4699
4700
372k
 retry:
4701
372k
  type = ND_TYPE(node);
4702
372k
  if (type == ND_QUANT) {
4703
42.8k
    QuantNode* qn = QUANT_(node);
4704
42.8k
    if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
4705
28.9k
#ifdef USE_QUANT_PEEK_NEXT
4706
28.9k
      if (called == FALSE) {
4707
28.8k
        Node* n = get_tree_head_literal(next_node, 1, reg);
4708
        /* '\0': for UTF-16BE etc... */
4709
28.8k
        if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
4710
8.69k
          qn->next_head_exact = n;
4711
8.69k
        }
4712
28.8k
      }
4713
28.9k
#endif
4714
      /* automatic posseivation a*b ==> (?>a*)b */
4715
28.9k
      if (qn->lower <= 1) {
4716
28.2k
        if (is_strict_real_node(ND_BODY(node))) {
4717
20.2k
          Node *x, *y;
4718
20.2k
          x = get_tree_head_literal(ND_BODY(node), 0, reg);
4719
20.2k
          if (IS_NOT_NULL(x)) {
4720
16.9k
            y = get_tree_head_literal(next_node,  0, reg);
4721
16.9k
            if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
4722
6.86k
              Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
4723
6.86k
              CHECK_NULL_RETURN_MEMERR(en);
4724
6.86k
              ND_STATUS_ADD(en, STRICT_REAL_REPEAT);
4725
6.86k
              node_swap(node, en);
4726
6.86k
              ND_BODY(node) = en;
4727
6.86k
            }
4728
16.9k
          }
4729
20.2k
        }
4730
28.2k
      }
4731
28.9k
    }
4732
42.8k
  }
4733
329k
  else if (type == ND_BAG) {
4734
31.5k
    BagNode* en = BAG_(node);
4735
31.5k
    if (en->type == BAG_MEMORY) {
4736
21.3k
      if (ND_IS_CALLED(node))
4737
1.81k
        called = TRUE;
4738
21.3k
      node = ND_BODY(node);
4739
21.3k
      goto retry;
4740
21.3k
    }
4741
31.5k
  }
4742
350k
  return 0;
4743
372k
}
4744
4745
4746
static int
4747
is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])
4748
62.8k
{
4749
62.8k
  int i;
4750
4751
123k
  for (i = 0; i < n; i++) {
4752
70.0k
    OnigCaseFoldCodeItem* item = items + i;
4753
70.0k
    if (item->code_len != 1) return 0;
4754
70.0k
  }
4755
4756
53.7k
  return 1;
4757
62.8k
}
4758
4759
static int
4760
get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[],
4761
                                     OnigLen* rmin, OnigLen* rmax)
4762
62.8k
{
4763
62.8k
  int i;
4764
62.8k
  OnigLen len, minlen, maxlen;
4765
4766
62.8k
  minlen = INFINITE_LEN;
4767
62.8k
  maxlen = 0;
4768
171k
  for (i = 0; i < n; i++) {
4769
108k
    OnigCaseFoldCodeItem* item = items + i;
4770
4771
108k
    len = item->byte_len;
4772
108k
    if (len < minlen) minlen = len;
4773
108k
    if (len > maxlen) maxlen = len;
4774
108k
  }
4775
4776
62.8k
  *rmin = minlen;
4777
62.8k
  *rmax = maxlen;
4778
62.8k
  return 0;
4779
62.8k
}
4780
4781
static int
4782
make_code_list_to_string(Node** rnode, OnigEncoding enc,
4783
                         int n, OnigCodePoint codes[])
4784
53.6k
{
4785
53.6k
  int r, i, len;
4786
53.6k
  Node* node;
4787
53.6k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4788
4789
53.6k
  *rnode = NULL_NODE;
4790
53.6k
  node = onig_node_new_str(NULL, NULL);
4791
53.6k
  CHECK_NULL_RETURN_MEMERR(node);
4792
4793
163k
  for (i = 0; i < n; i++) {
4794
109k
    len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
4795
109k
    if (len < 0) {
4796
0
      r = len;
4797
0
      goto err;
4798
0
    }
4799
4800
109k
    r = onig_node_str_cat(node, buf, buf + len);
4801
109k
    if (r != 0) goto err;
4802
109k
  }
4803
4804
53.6k
  *rnode = node;
4805
53.6k
  return 0;
4806
4807
0
 err:
4808
0
  onig_node_free(node);
4809
0
  return r;
4810
53.6k
}
4811
4812
static int
4813
unravel_cf_node_add(Node** rlist, Node* add)
4814
123k
{
4815
123k
  Node *list;
4816
4817
123k
  list = *rlist;
4818
123k
  if (IS_NULL(list)) {
4819
62.5k
    list = onig_node_new_list(add, NULL);
4820
62.5k
    CHECK_NULL_RETURN_MEMERR(list);
4821
62.5k
    *rlist = list;
4822
62.5k
  }
4823
60.6k
  else {
4824
60.6k
    Node* r = node_list_add(list, add);
4825
60.6k
    CHECK_NULL_RETURN_MEMERR(r);
4826
60.6k
  }
4827
4828
123k
  return 0;
4829
123k
}
4830
4831
static int
4832
unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end,
4833
                      unsigned int flag)
4834
106k
{
4835
106k
  int r;
4836
106k
  Node *sn, *list;
4837
4838
106k
  list = *rlist;
4839
106k
  sn   = *rsn;
4840
4841
106k
  if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) {
4842
54.4k
    r = onig_node_str_cat(sn, s, end);
4843
54.4k
  }
4844
51.6k
  else {
4845
51.6k
    sn = onig_node_new_str(s, end);
4846
51.6k
    CHECK_NULL_RETURN_MEMERR(sn);
4847
4848
51.6k
    STR_(sn)->flag = flag;
4849
51.6k
    r = unravel_cf_node_add(&list, sn);
4850
51.6k
  }
4851
4852
106k
  if (r == 0) {
4853
106k
    *rlist = list;
4854
106k
    *rsn = sn;
4855
106k
  }
4856
106k
  return r;
4857
106k
}
4858
4859
static int
4860
unravel_cf_string_alt_or_cc_add(Node** rlist, int n,
4861
            OnigCaseFoldCodeItem items[], OnigEncoding enc,
4862
            OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
4863
62.8k
{
4864
62.8k
  int r, i;
4865
62.8k
  Node* node;
4866
4867
62.8k
  if (is_all_code_len_1_items(n, items)) {
4868
53.7k
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4869
4870
53.7k
    codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end);
4871
108k
    for (i = 0; i < n; i++) {
4872
55.0k
      OnigCaseFoldCodeItem* item = items + i;
4873
55.0k
      codes[i+1] = item->code[0];
4874
55.0k
    }
4875
53.7k
    r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes);
4876
53.7k
    if (r != 0) return r;
4877
53.7k
  }
4878
9.11k
  else {
4879
9.11k
    Node *snode, *alt, *curr;
4880
4881
9.11k
    snode = onig_node_new_str(s, end);
4882
9.11k
    CHECK_NULL_RETURN_MEMERR(snode);
4883
9.11k
    node = curr = onig_node_new_alt(snode, NULL_NODE);
4884
9.11k
    if (IS_NULL(curr)) {
4885
0
      onig_node_free(snode);
4886
0
      return ONIGERR_MEMORY;
4887
0
    }
4888
4889
9.11k
    r = 0;
4890
62.7k
    for (i = 0; i < n; i++) {
4891
53.6k
      OnigCaseFoldCodeItem* item = items + i;
4892
53.6k
      r = make_code_list_to_string(&snode, enc, item->code_len, item->code);
4893
53.6k
      if (r != 0) {
4894
0
        onig_node_free(node);
4895
0
        return r;
4896
0
      }
4897
4898
53.6k
      alt = onig_node_new_alt(snode, NULL_NODE);
4899
53.6k
      if (IS_NULL(alt)) {
4900
0
        onig_node_free(snode);
4901
0
        onig_node_free(node);
4902
0
        return ONIGERR_MEMORY;
4903
0
      }
4904
4905
53.6k
      ND_CDR(curr) = alt;
4906
53.6k
      curr = alt;
4907
53.6k
    }
4908
9.11k
  }
4909
4910
62.8k
  r = unravel_cf_node_add(rlist, node);
4911
62.8k
  if (r != 0) onig_node_free(node);
4912
62.8k
  return r;
4913
62.8k
}
4914
4915
static int
4916
unravel_cf_look_behind_add(Node** rlist, Node** rsn,
4917
                int n, OnigCaseFoldCodeItem items[], OnigEncoding enc,
4918
                UChar* s, OnigLen one_len)
4919
9.60k
{
4920
9.60k
  int r, i, found;
4921
4922
9.60k
  found = FALSE;
4923
12.9k
  for (i = 0; i < n; i++) {
4924
12.1k
    OnigCaseFoldCodeItem* item = items + i;
4925
12.1k
    if (item->byte_len == one_len) {
4926
12.1k
      if (item->code_len == 1) {
4927
8.72k
        found = TRUE;
4928
8.72k
        break;
4929
8.72k
      }
4930
12.1k
    }
4931
12.1k
  }
4932
4933
9.60k
  if (found == FALSE) {
4934
878
    r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */);
4935
878
  }
4936
8.72k
  else {
4937
8.72k
    Node* node;
4938
8.72k
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4939
4940
8.72k
    found = 0;
4941
8.72k
    codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len);
4942
18.0k
    for (i = 0; i < n; i++) {
4943
9.30k
      OnigCaseFoldCodeItem* item = items + i;
4944
9.30k
      if (item->byte_len == one_len) {
4945
9.30k
        if (item->code_len == 1) {
4946
9.23k
          codes[found++] = item->code[0];
4947
9.23k
        }
4948
9.30k
      }
4949
9.30k
    }
4950
8.72k
    r = onig_new_cclass_with_code_list(&node, enc, found, codes);
4951
8.72k
    if (r != 0) return r;
4952
4953
8.72k
    r = unravel_cf_node_add(rlist, node);
4954
8.72k
    if (r != 0) onig_node_free(node);
4955
4956
8.72k
    *rsn = NULL_NODE;
4957
8.72k
  }
4958
4959
9.60k
  return r;
4960
9.60k
}
4961
4962
static int
4963
unravel_case_fold_string(Node* node, regex_t* reg, int state)
4964
62.6k
{
4965
62.6k
  int r, n, in_look_behind;
4966
62.6k
  OnigLen min_len, max_len, one_len;
4967
62.6k
  UChar *start, *end, *p, *q;
4968
62.6k
  StrNode* snode;
4969
62.6k
  Node *sn, *list;
4970
62.6k
  OnigEncoding enc;
4971
62.6k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4972
4973
62.6k
  if (ND_STRING_IS_CASE_EXPANDED(node)) return 0;
4974
4975
62.6k
  ND_STATUS_REMOVE(node, IGNORECASE);
4976
62.6k
  snode = STR_(node);
4977
62.6k
  start = snode->s;
4978
62.6k
  end   = snode->end;
4979
62.6k
  if (start >= end) return 0;
4980
4981
62.5k
  in_look_behind = (state & IN_LOOK_BEHIND) != 0;
4982
62.5k
  enc = reg->enc;
4983
4984
62.5k
  list = sn = NULL_NODE;
4985
62.5k
  p = start;
4986
240k
  while (p < end) {
4987
177k
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end,
4988
177k
                                           items);
4989
177k
    if (n < 0) {
4990
0
      r = n;
4991
0
      goto err;
4992
0
    }
4993
4994
177k
    one_len = (OnigLen )enclen(enc, p);
4995
177k
    if (n == 0) {
4996
105k
      q = p + one_len;
4997
105k
      if (q > end) q = end;
4998
105k
      r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */);
4999
105k
      if (r != 0) goto err;
5000
105k
    }
5001
72.4k
    else {
5002
72.4k
      if (in_look_behind != 0) {
5003
9.60k
        q = p + one_len;
5004
9.60k
        if (items[0].byte_len != one_len) {
5005
1.86k
          r = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, q,
5006
1.86k
                                                 items);
5007
1.86k
          if (r < 0) goto err;
5008
1.86k
          n = r;
5009
1.86k
        }
5010
9.60k
        r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len);
5011
9.60k
        if (r != 0) goto err;
5012
9.60k
      }
5013
62.8k
      else {
5014
62.8k
        get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len);
5015
62.8k
        if (min_len != max_len) {
5016
0
          r = ONIGERR_PARSER_BUG;
5017
0
          goto err;
5018
0
        }
5019
5020
62.8k
        q = p + max_len;
5021
62.8k
        r = unravel_cf_string_alt_or_cc_add(&list, n, items, enc,
5022
62.8k
                                            reg->case_fold_flag, p, q);
5023
62.8k
        if (r != 0) goto err;
5024
62.8k
        sn = NULL_NODE;
5025
62.8k
      }
5026
72.4k
    }
5027
5028
177k
    p = q;
5029
177k
  }
5030
5031
62.5k
  if (IS_NOT_NULL(list)) {
5032
62.5k
    if (node_list_len(list) == 1) {
5033
46.9k
      node_swap(node, ND_CAR(list));
5034
46.9k
    }
5035
15.5k
    else {
5036
15.5k
      node_swap(node, list);
5037
15.5k
    }
5038
62.5k
    onig_node_free(list);
5039
62.5k
  }
5040
0
  else {
5041
0
    node_swap(node, sn);
5042
0
    onig_node_free(sn);
5043
0
  }
5044
62.5k
  return 0;
5045
5046
0
 err:
5047
0
  if (IS_NOT_NULL(list))
5048
0
    onig_node_free(list);
5049
0
  else if (IS_NOT_NULL(sn))
5050
0
    onig_node_free(sn);
5051
5052
0
  return r;
5053
62.5k
}
5054
5055
#ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
5056
static enum BodyEmptyType
5057
quantifiers_memory_node_info(Node* node)
5058
74.2k
{
5059
74.2k
  int r = BODY_MAY_BE_EMPTY;
5060
5061
74.2k
  switch (ND_TYPE(node)) {
5062
7.84k
  case ND_LIST:
5063
11.3k
  case ND_ALT:
5064
11.3k
    {
5065
11.3k
      int v;
5066
33.7k
      do {
5067
33.7k
        v = quantifiers_memory_node_info(ND_CAR(node));
5068
33.7k
        if (v > r) r = v;
5069
33.7k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
5070
11.3k
    }
5071
11.3k
    break;
5072
5073
0
#ifdef USE_CALL
5074
1.10k
  case ND_CALL:
5075
1.10k
    if (ND_IS_RECURSION(node)) {
5076
782
      return BODY_MAY_BE_EMPTY_REC; /* tiny version */
5077
782
    }
5078
320
    else
5079
320
      r = quantifiers_memory_node_info(ND_BODY(node));
5080
320
    break;
5081
320
#endif
5082
5083
16.9k
  case ND_QUANT:
5084
16.9k
    {
5085
16.9k
      QuantNode* qn = QUANT_(node);
5086
16.9k
      if (qn->upper != 0) {
5087
16.1k
        r = quantifiers_memory_node_info(ND_BODY(node));
5088
16.1k
      }
5089
16.9k
    }
5090
16.9k
    break;
5091
5092
14.2k
  case ND_BAG:
5093
14.2k
    {
5094
14.2k
      BagNode* en = BAG_(node);
5095
14.2k
      switch (en->type) {
5096
6.89k
      case BAG_MEMORY:
5097
6.89k
        if (ND_IS_RECURSION(node)) {
5098
1.02k
          return BODY_MAY_BE_EMPTY_REC;
5099
1.02k
        }
5100
5.87k
        return BODY_MAY_BE_EMPTY_MEM;
5101
0
        break;
5102
5103
354
      case BAG_OPTION:
5104
5.91k
      case BAG_STOP_BACKTRACK:
5105
5.91k
        r = quantifiers_memory_node_info(ND_BODY(node));
5106
5.91k
        break;
5107
1.40k
      case BAG_IF_ELSE:
5108
1.40k
        {
5109
1.40k
          int v;
5110
1.40k
          r = quantifiers_memory_node_info(ND_BODY(node));
5111
1.40k
          if (IS_NOT_NULL(en->te.Then)) {
5112
894
            v = quantifiers_memory_node_info(en->te.Then);
5113
894
            if (v > r) r = v;
5114
894
          }
5115
1.40k
          if (IS_NOT_NULL(en->te.Else)) {
5116
624
            v = quantifiers_memory_node_info(en->te.Else);
5117
624
            if (v > r) r = v;
5118
624
          }
5119
1.40k
        }
5120
1.40k
        break;
5121
14.2k
      }
5122
14.2k
    }
5123
7.32k
    break;
5124
5125
7.32k
  case ND_BACKREF:
5126
12.4k
  case ND_STRING:
5127
16.9k
  case ND_CTYPE:
5128
19.0k
  case ND_CCLASS:
5129
20.0k
  case ND_ANCHOR:
5130
30.7k
  case ND_GIMMICK:
5131
30.7k
  default:
5132
30.7k
    break;
5133
74.2k
  }
5134
5135
66.6k
  return r;
5136
74.2k
}
5137
#endif /* USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT */
5138
5139
5140
#ifdef USE_CALL
5141
5142
#ifdef __GNUC__
5143
__inline
5144
#endif
5145
static int
5146
check_call_reference(CallNode* cn, ParseEnv* env, int state)
5147
8.11k
{
5148
8.11k
  MemEnv* mem_env = PARSEENV_MEMENV(env);
5149
5150
8.11k
  if (cn->by_number != 0) {
5151
8.02k
    int gnum = cn->called_gnum;
5152
5153
8.02k
    if (env->num_named > 0 &&
5154
8.02k
        IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5155
8.02k
        ! OPTON_CAPTURE_GROUP(env->options)) {
5156
8
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
5157
8
    }
5158
5159
8.01k
    if (gnum > env->num_mem) {
5160
322
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
5161
322
                                     cn->name, cn->name_end);
5162
322
      return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5163
322
    }
5164
5165
7.73k
  set_call_attr:
5166
7.73k
    ND_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node;
5167
7.73k
    if (IS_NULL(ND_CALL_BODY(cn))) {
5168
0
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5169
0
                                     cn->name, cn->name_end);
5170
0
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5171
0
    }
5172
5173
7.73k
    ND_STATUS_ADD(ND_CALL_BODY(cn), REFERENCED);
5174
7.73k
  }
5175
88
  else {
5176
88
    int *refs;
5177
5178
88
    int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
5179
88
    if (n <= 0) {
5180
38
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5181
38
                                     cn->name, cn->name_end);
5182
38
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5183
38
    }
5184
50
    else if (n > 1) {
5185
4
      onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
5186
4
                                     cn->name, cn->name_end);
5187
4
      return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
5188
4
    }
5189
46
    else {
5190
46
      cn->called_gnum = refs[0];
5191
46
      goto set_call_attr;
5192
46
    }
5193
88
  }
5194
5195
7.73k
  return 0;
5196
8.11k
}
5197
5198
#ifdef USE_WHOLE_OPTIONS
5199
static int
5200
check_whole_options_position(Node* node /* root */)
5201
20
{
5202
20
  int is_list;
5203
5204
20
  is_list = FALSE;
5205
5206
30
 start:
5207
30
  switch (ND_TYPE(node)) {
5208
10
  case ND_LIST:
5209
10
    if (IS_NOT_NULL(ND_CDR(node)))
5210
10
      is_list = TRUE;
5211
5212
10
    node = ND_CAR(node);
5213
10
    goto start;
5214
0
    break;
5215
5216
16
  case ND_BAG:
5217
16
    {
5218
16
      BagNode* en = BAG_(node);
5219
5220
16
      if (en->type == BAG_OPTION) {
5221
14
        if (ND_IS_WHOLE_OPTIONS(node)) {
5222
12
          if (is_list == TRUE && IS_NOT_NULL(ND_BODY(node)))
5223
2
            break;
5224
5225
10
          return 0;
5226
12
        }
5227
14
      }
5228
16
    }
5229
4
    break;
5230
5231
4
  default:
5232
4
    break;
5233
30
  }
5234
5235
10
  return ONIGERR_INVALID_GROUP_OPTION;
5236
30
}
5237
#endif
5238
5239
static void
5240
tune_call2_call(Node* node)
5241
675k
{
5242
675k
  switch (ND_TYPE(node)) {
5243
81.7k
  case ND_LIST:
5244
104k
  case ND_ALT:
5245
417k
    do {
5246
417k
      tune_call2_call(ND_CAR(node));
5247
417k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5248
104k
    break;
5249
5250
43.2k
  case ND_QUANT:
5251
43.2k
    tune_call2_call(ND_BODY(node));
5252
43.2k
    break;
5253
5254
20.5k
  case ND_ANCHOR:
5255
20.5k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5256
10.7k
      tune_call2_call(ND_BODY(node));
5257
20.5k
    break;
5258
5259
171k
  case ND_BAG:
5260
171k
    {
5261
171k
      BagNode* en = BAG_(node);
5262
5263
171k
      if (en->type == BAG_MEMORY) {
5264
157k
        if (! ND_IS_MARK1(node)) {
5265
78.6k
          ND_STATUS_ADD(node, MARK1);
5266
78.6k
          tune_call2_call(ND_BODY(node));
5267
78.6k
          ND_STATUS_REMOVE(node, MARK1);
5268
78.6k
        }
5269
157k
      }
5270
13.8k
      else if (en->type == BAG_IF_ELSE) {
5271
6.42k
        tune_call2_call(ND_BODY(node));
5272
6.42k
        if (IS_NOT_NULL(en->te.Then))
5273
3.49k
          tune_call2_call(en->te.Then);
5274
6.42k
        if (IS_NOT_NULL(en->te.Else))
5275
3.69k
          tune_call2_call(en->te.Else);
5276
6.42k
      }
5277
7.46k
      else {
5278
7.46k
        tune_call2_call(ND_BODY(node));
5279
7.46k
      }
5280
171k
    }
5281
171k
    break;
5282
5283
110k
  case ND_CALL:
5284
110k
    if (! ND_IS_MARK1(node)) {
5285
97.3k
      ND_STATUS_ADD(node, MARK1);
5286
97.3k
      {
5287
97.3k
        CallNode* cn = CALL_(node);
5288
97.3k
        Node* called = ND_CALL_BODY(cn);
5289
5290
97.3k
        cn->entry_count++;
5291
5292
97.3k
        ND_STATUS_ADD(called, CALLED);
5293
97.3k
        BAG_(called)->m.entry_count++;
5294
97.3k
        tune_call2_call(called);
5295
97.3k
      }
5296
97.3k
      ND_STATUS_REMOVE(node, MARK1);
5297
97.3k
    }
5298
110k
    break;
5299
5300
224k
  default:
5301
224k
    break;
5302
675k
  }
5303
675k
}
5304
5305
static int
5306
tune_call(Node* node, ParseEnv* env, int state)
5307
134k
{
5308
134k
  int r;
5309
5310
134k
  switch (ND_TYPE(node)) {
5311
21.1k
  case ND_LIST:
5312
26.0k
  case ND_ALT:
5313
94.6k
    do {
5314
94.6k
      r = tune_call(ND_CAR(node), env, state);
5315
94.6k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5316
26.0k
    break;
5317
5318
13.7k
  case ND_QUANT:
5319
13.7k
    if (QUANT_(node)->upper == 0)
5320
706
      state |= IN_ZERO_REPEAT;
5321
5322
13.7k
    r = tune_call(ND_BODY(node), env, state);
5323
13.7k
    break;
5324
5325
5.28k
  case ND_ANCHOR:
5326
5.28k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5327
2.46k
      r = tune_call(ND_BODY(node), env, state);
5328
2.81k
    else
5329
2.81k
      r = 0;
5330
5.28k
    break;
5331
5332
18.7k
  case ND_BAG:
5333
18.7k
    {
5334
18.7k
      BagNode* en = BAG_(node);
5335
5336
18.7k
      if (en->type == BAG_MEMORY) {
5337
14.0k
        if ((state & IN_ZERO_REPEAT) != 0) {
5338
396
          ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5339
396
          BAG_(node)->m.entry_count--;
5340
396
        }
5341
14.0k
        r = tune_call(ND_BODY(node), env, state);
5342
14.0k
      }
5343
4.70k
      else if (en->type == BAG_IF_ELSE) {
5344
1.51k
        r = tune_call(ND_BODY(node), env, state);
5345
1.51k
        if (r != 0) return r;
5346
1.50k
        if (IS_NOT_NULL(en->te.Then)) {
5347
666
          r = tune_call(en->te.Then, env, state);
5348
666
          if (r != 0) return r;
5349
666
        }
5350
1.49k
        if (IS_NOT_NULL(en->te.Else))
5351
940
          r = tune_call(en->te.Else, env, state);
5352
1.49k
      }
5353
3.18k
      else
5354
3.18k
        r = tune_call(ND_BODY(node), env, state);
5355
18.7k
    }
5356
18.6k
    break;
5357
5358
18.6k
  case ND_CALL:
5359
8.11k
    if ((state & IN_ZERO_REPEAT) != 0) {
5360
494
      ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5361
494
      CALL_(node)->entry_count--;
5362
494
    }
5363
5364
8.11k
    r = check_call_reference(CALL_(node), env, state);
5365
8.11k
    break;
5366
5367
62.7k
  default:
5368
62.7k
    r = 0;
5369
62.7k
    break;
5370
134k
  }
5371
5372
134k
  return r;
5373
134k
}
5374
5375
static int
5376
tune_call2(Node* node)
5377
129k
{
5378
129k
  int r = 0;
5379
5380
129k
  switch (ND_TYPE(node)) {
5381
20.4k
  case ND_LIST:
5382
25.1k
  case ND_ALT:
5383
91.8k
    do {
5384
91.8k
      r = tune_call2(ND_CAR(node));
5385
91.8k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5386
25.1k
    break;
5387
5388
13.2k
  case ND_QUANT:
5389
13.2k
    if (QUANT_(node)->upper != 0)
5390
12.6k
      r = tune_call2(ND_BODY(node));
5391
13.2k
    break;
5392
5393
5.05k
  case ND_ANCHOR:
5394
5.05k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5395
2.32k
      r = tune_call2(ND_BODY(node));
5396
5.05k
    break;
5397
5398
18.0k
  case ND_BAG:
5399
18.0k
    if (! ND_IS_IN_ZERO_REPEAT(node))
5400
18.0k
      r = tune_call2(ND_BODY(node));
5401
5402
18.0k
    {
5403
18.0k
      BagNode* en = BAG_(node);
5404
5405
18.0k
      if (r != 0) return r;
5406
18.0k
      if (en->type == BAG_IF_ELSE) {
5407
1.45k
        if (IS_NOT_NULL(en->te.Then)) {
5408
624
          r = tune_call2(en->te.Then);
5409
624
          if (r != 0) return r;
5410
624
        }
5411
1.45k
        if (IS_NOT_NULL(en->te.Else))
5412
936
          r = tune_call2(en->te.Else);
5413
1.45k
      }
5414
18.0k
    }
5415
18.0k
    break;
5416
5417
18.0k
  case ND_CALL:
5418
7.23k
    if (! ND_IS_IN_ZERO_REPEAT(node)) {
5419
7.23k
      tune_call2_call(node);
5420
7.23k
    }
5421
7.23k
    break;
5422
5423
60.8k
  default:
5424
60.8k
    break;
5425
129k
  }
5426
5427
129k
  return r;
5428
129k
}
5429
5430
5431
static void
5432
tune_called_state_call(Node* node, int state)
5433
682k
{
5434
682k
  switch (ND_TYPE(node)) {
5435
22.4k
  case ND_ALT:
5436
22.4k
    state |= IN_ALT;
5437
    /* fall */
5438
102k
  case ND_LIST:
5439
410k
    do {
5440
410k
      tune_called_state_call(ND_CAR(node), state);
5441
410k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5442
102k
    break;
5443
5444
45.7k
  case ND_QUANT:
5445
45.7k
    {
5446
45.7k
      QuantNode* qn = QUANT_(node);
5447
5448
45.7k
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5449
31.5k
        state |= IN_REAL_REPEAT;
5450
45.7k
      if (qn->lower != qn->upper)
5451
41.9k
        state |= IN_VAR_REPEAT;
5452
45.7k
      if ((state & IN_PEEK) != 0)
5453
14.7k
        ND_STATUS_ADD(node, INPEEK);
5454
5455
45.7k
      tune_called_state_call(ND_QUANT_BODY(qn), state);
5456
45.7k
    }
5457
45.7k
    break;
5458
5459
21.1k
  case ND_ANCHOR:
5460
21.1k
    {
5461
21.1k
      AnchorNode* an = ANCHOR_(node);
5462
5463
21.1k
      switch (an->type) {
5464
2.25k
      case ANCR_PREC_READ_NOT:
5465
3.12k
      case ANCR_LOOK_BEHIND_NOT:
5466
3.12k
        state |= (IN_NOT | IN_PEEK);
5467
3.12k
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5468
3.12k
        break;
5469
6.51k
      case ANCR_PREC_READ:
5470
7.77k
      case ANCR_LOOK_BEHIND:
5471
7.77k
        state |= IN_PEEK;
5472
7.77k
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5473
7.77k
        break;
5474
10.2k
      default:
5475
10.2k
        break;
5476
21.1k
      }
5477
21.1k
    }
5478
21.1k
    break;
5479
5480
182k
  case ND_BAG:
5481
182k
    {
5482
182k
      BagNode* en = BAG_(node);
5483
5484
182k
      if (en->type == BAG_MEMORY) {
5485
167k
        if (ND_IS_MARK1(node)) {
5486
90.5k
          if ((~en->m.called_state & state) != 0) {
5487
442
            en->m.called_state |= state;
5488
442
            tune_called_state_call(ND_BODY(node), state);
5489
442
          }
5490
90.5k
        }
5491
77.3k
        else {
5492
77.3k
          ND_STATUS_ADD(node, MARK1);
5493
77.3k
          en->m.called_state |= state;
5494
77.3k
          tune_called_state_call(ND_BODY(node), state);
5495
77.3k
          ND_STATUS_REMOVE(node, MARK1);
5496
77.3k
        }
5497
167k
      }
5498
14.4k
      else if (en->type == BAG_IF_ELSE) {
5499
6.26k
        state |= IN_ALT;
5500
6.26k
        tune_called_state_call(ND_BODY(node), state);
5501
6.26k
        if (IS_NOT_NULL(en->te.Then)) {
5502
3.45k
          tune_called_state_call(en->te.Then, state);
5503
3.45k
        }
5504
6.26k
        if (IS_NOT_NULL(en->te.Else))
5505
3.57k
          tune_called_state_call(en->te.Else, state);
5506
6.26k
      }
5507
8.18k
      else {
5508
8.18k
        tune_called_state_call(ND_BODY(node), state);
5509
8.18k
      }
5510
182k
    }
5511
182k
    break;
5512
5513
108k
  case ND_CALL:
5514
108k
    if ((state & IN_PEEK) != 0)
5515
28.5k
      ND_STATUS_ADD(node, INPEEK);
5516
108k
    if ((state & IN_REAL_REPEAT) != 0)
5517
66.6k
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5518
5519
108k
    tune_called_state_call(ND_BODY(node), state);
5520
108k
    break;
5521
5522
221k
  default:
5523
221k
    break;
5524
682k
  }
5525
682k
}
5526
5527
static void
5528
tune_called_state(Node* node, int state)
5529
124k
{
5530
124k
  switch (ND_TYPE(node)) {
5531
4.59k
  case ND_ALT:
5532
4.59k
    state |= IN_ALT;
5533
    /* fall */
5534
23.2k
  case ND_LIST:
5535
86.4k
    do {
5536
86.4k
      tune_called_state(ND_CAR(node), state);
5537
86.4k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5538
23.2k
    break;
5539
5540
0
#ifdef USE_CALL
5541
7.31k
  case ND_CALL:
5542
7.31k
    if ((state & IN_PEEK) != 0)
5543
1.46k
      ND_STATUS_ADD(node, INPEEK);
5544
7.31k
    if ((state & IN_REAL_REPEAT) != 0)
5545
3.26k
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5546
5547
7.31k
    tune_called_state_call(node, state);
5548
7.31k
    break;
5549
0
#endif
5550
5551
17.8k
  case ND_BAG:
5552
17.8k
    {
5553
17.8k
      BagNode* en = BAG_(node);
5554
5555
17.8k
      switch (en->type) {
5556
13.5k
      case BAG_MEMORY:
5557
13.5k
        if (en->m.entry_count > 1)
5558
4.35k
          state |= IN_MULTI_ENTRY;
5559
5560
13.5k
        en->m.called_state |= state;
5561
        /* fall */
5562
13.6k
      case BAG_OPTION:
5563
16.5k
      case BAG_STOP_BACKTRACK:
5564
16.5k
        tune_called_state(ND_BODY(node), state);
5565
16.5k
        break;
5566
1.35k
      case BAG_IF_ELSE:
5567
1.35k
        state |= IN_ALT;
5568
1.35k
        tune_called_state(ND_BODY(node), state);
5569
1.35k
        if (IS_NOT_NULL(en->te.Then))
5570
594
          tune_called_state(en->te.Then, state);
5571
1.35k
        if (IS_NOT_NULL(en->te.Else))
5572
858
          tune_called_state(en->te.Else, state);
5573
1.35k
        break;
5574
17.8k
      }
5575
17.8k
    }
5576
17.8k
    break;
5577
5578
17.8k
  case ND_QUANT:
5579
13.3k
    {
5580
13.3k
      QuantNode* qn = QUANT_(node);
5581
5582
13.3k
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5583
9.51k
        state |= IN_REAL_REPEAT;
5584
13.3k
      if (qn->lower != qn->upper)
5585
11.7k
        state |= IN_VAR_REPEAT;
5586
13.3k
      if ((state & IN_PEEK) != 0)
5587
2.28k
        ND_STATUS_ADD(node, INPEEK);
5588
5589
13.3k
      tune_called_state(ND_QUANT_BODY(qn), state);
5590
13.3k
    }
5591
13.3k
    break;
5592
5593
5.18k
  case ND_ANCHOR:
5594
5.18k
    {
5595
5.18k
      AnchorNode* an = ANCHOR_(node);
5596
5597
5.18k
      switch (an->type) {
5598
264
      case ANCR_PREC_READ_NOT:
5599
686
      case ANCR_LOOK_BEHIND_NOT:
5600
686
        state |= (IN_NOT | IN_PEEK);
5601
686
        tune_called_state(ND_ANCHOR_BODY(an), state);
5602
686
        break;
5603
1.44k
      case ANCR_PREC_READ:
5604
1.77k
      case ANCR_LOOK_BEHIND:
5605
1.77k
        state |= IN_PEEK;
5606
1.77k
        tune_called_state(ND_ANCHOR_BODY(an), state);
5607
1.77k
        break;
5608
2.72k
      default:
5609
2.72k
        break;
5610
5.18k
      }
5611
5.18k
    }
5612
5.18k
    break;
5613
5614
5.18k
  case ND_BACKREF:
5615
33.4k
  case ND_STRING:
5616
41.6k
  case ND_CTYPE:
5617
51.4k
  case ND_CCLASS:
5618
57.4k