Coverage Report

Created: 2024-02-25 06:12

/src/oniguruma/src/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "regparse.h"
31
32
25.6k
#define OPS_INIT_SIZE  8
33
34
#define ND_IS_REAL_IGNORECASE(node) \
35
284k
  (ND_IS_IGNORECASE(node) && !ND_STRING_IS_CRUDE(node))
36
37
typedef struct {
38
  OnigLen min;
39
  OnigLen max;
40
} MinMaxLen;
41
42
typedef struct {
43
  OnigLen min;
44
  OnigLen max;
45
  int min_is_sure;
46
} MinMaxCharLen;
47
48
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
49
50
static OnigLen node_min_byte_len(Node* node, ParseEnv* env);
51
52
static int
53
ops_init(regex_t* reg, int init_alloc_size)
54
25.6k
{
55
25.6k
  Operation* p;
56
25.6k
  size_t size;
57
58
25.6k
  if (init_alloc_size <= 0)
59
0
    return ONIGERR_PARSER_BUG;
60
61
25.6k
  size = sizeof(Operation) * init_alloc_size;
62
25.6k
  p = (Operation* )xrealloc(reg->ops, size);
63
25.6k
  CHECK_NULL_RETURN_MEMERR(p);
64
25.6k
  reg->ops = p;
65
25.6k
#ifdef USE_DIRECT_THREADED_CODE
66
25.6k
  {
67
25.6k
    enum OpCode* cp;
68
25.6k
    size = sizeof(enum OpCode) * init_alloc_size;
69
25.6k
    cp = (enum OpCode* )xrealloc(reg->ocs, size);
70
25.6k
    CHECK_NULL_RETURN_MEMERR(cp);
71
25.6k
    reg->ocs = cp;
72
25.6k
  }
73
0
#endif
74
75
0
  reg->ops_curr  = 0; /* !!! not yet done ops_new() */
76
25.6k
  reg->ops_alloc = init_alloc_size;
77
25.6k
  reg->ops_used  = 0;
78
79
25.6k
  return ONIG_NORMAL;
80
25.6k
}
81
82
static int
83
ops_resize(regex_t* reg, int n)
84
38.2k
{
85
38.2k
#ifdef USE_DIRECT_THREADED_CODE
86
38.2k
  enum OpCode* cp;
87
38.2k
#endif
88
38.2k
  Operation* p;
89
38.2k
  size_t size;
90
91
38.2k
  if (n == reg->ops_alloc) return ONIG_NORMAL;
92
36.9k
  if (n <= 0) return ONIGERR_PARSER_BUG;
93
94
36.9k
  size = sizeof(Operation) * n;
95
36.9k
  p = (Operation* )xrealloc(reg->ops, size);
96
36.9k
  CHECK_NULL_RETURN_MEMERR(p);
97
36.9k
  reg->ops = p;
98
99
36.9k
#ifdef USE_DIRECT_THREADED_CODE
100
36.9k
  size = sizeof(enum OpCode) * n;
101
36.9k
  cp = (enum OpCode* )xrealloc(reg->ocs, size);
102
36.9k
  CHECK_NULL_RETURN_MEMERR(cp);
103
36.9k
  reg->ocs = cp;
104
36.9k
#endif
105
106
36.9k
  reg->ops_alloc = n;
107
36.9k
  if (reg->ops_used == 0)
108
0
    reg->ops_curr = 0;
109
36.9k
  else
110
36.9k
    reg->ops_curr = reg->ops + (reg->ops_used - 1);
111
112
36.9k
  return ONIG_NORMAL;
113
36.9k
}
114
115
static int
116
ops_new(regex_t* reg)
117
729k
{
118
729k
  if (reg->ops_used >= reg->ops_alloc) {
119
19.7k
    int r = ops_resize(reg, reg->ops_alloc << 1);
120
19.7k
    if (r != ONIG_NORMAL) return r;
121
19.7k
  }
122
123
729k
  reg->ops_curr = reg->ops + reg->ops_used;
124
729k
  reg->ops_used++;
125
126
729k
  xmemset(reg->ops_curr, 0, sizeof(Operation));
127
729k
  return ONIG_NORMAL;
128
729k
}
129
130
static int
131
is_in_string_pool(regex_t* reg, UChar* s)
132
7.58k
{
133
7.58k
  return (s >= reg->string_pool && s < reg->string_pool_end);
134
7.58k
}
135
136
static void
137
ops_free(regex_t* reg)
138
51.3k
{
139
51.3k
  int i;
140
141
51.3k
  if (IS_NULL(reg->ops)) return ;
142
143
755k
  for (i = 0; i < (int )reg->ops_used; i++) {
144
729k
    enum OpCode opcode;
145
729k
    Operation* op;
146
147
729k
    op = reg->ops + i;
148
149
729k
#ifdef USE_DIRECT_THREADED_CODE
150
729k
    opcode = *(reg->ocs + i);
151
#else
152
    opcode = op->opcode;
153
#endif
154
155
729k
    switch (opcode) {
156
2.00k
    case OP_STR_MBN:
157
2.00k
      if (! is_in_string_pool(reg, op->exact_len_n.s))
158
0
        xfree(op->exact_len_n.s);
159
2.00k
      break;
160
5.58k
    case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N:
161
5.58k
      if (! is_in_string_pool(reg, op->exact_n.s))
162
0
        xfree(op->exact_n.s);
163
5.58k
      break;
164
63.5k
    case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
165
93.7k
    case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
166
96.9k
    case OP_STR_MB2N3:
167
96.9k
      break;
168
169
49.9k
    case OP_CCLASS_NOT: case OP_CCLASS:
170
49.9k
      xfree(op->cclass.bsp);
171
49.9k
      break;
172
173
98.2k
    case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
174
98.2k
      xfree(op->cclass_mb.mb);
175
98.2k
      break;
176
6.55k
    case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
177
6.55k
      xfree(op->cclass_mix.mb);
178
6.55k
      xfree(op->cclass_mix.bsp);
179
6.55k
      break;
180
181
4.87k
    case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
182
4.87k
      break;
183
1.06k
    case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC:
184
1.26k
    case OP_BACKREF_CHECK:
185
1.26k
#ifdef USE_BACKREF_WITH_LEVEL
186
1.56k
    case OP_BACKREF_WITH_LEVEL:
187
1.90k
    case OP_BACKREF_WITH_LEVEL_IC:
188
2.13k
    case OP_BACKREF_CHECK_WITH_LEVEL:
189
2.13k
#endif
190
2.13k
      if (op->backref_general.num != 1)
191
1.11k
        xfree(op->backref_general.ns);
192
2.13k
      break;
193
194
463k
    default:
195
463k
      break;
196
729k
    }
197
729k
  }
198
199
25.6k
  xfree(reg->ops);
200
25.6k
#ifdef USE_DIRECT_THREADED_CODE
201
25.6k
  xfree(reg->ocs);
202
25.6k
  reg->ocs = 0;
203
25.6k
#endif
204
205
25.6k
  reg->ops = 0;
206
25.6k
  reg->ops_curr  = 0;
207
25.6k
  reg->ops_alloc = 0;
208
25.6k
  reg->ops_used  = 0;
209
25.6k
}
210
211
static int
212
ops_calc_size_of_string_pool(regex_t* reg)
213
18.5k
{
214
18.5k
  int i;
215
18.5k
  int total;
216
217
18.5k
  if (IS_NULL(reg->ops)) return 0;
218
219
18.5k
  total = 0;
220
748k
  for (i = 0; i < (int )reg->ops_used; i++) {
221
729k
    enum OpCode opcode;
222
729k
    Operation* op;
223
224
729k
    op = reg->ops + i;
225
729k
#ifdef USE_DIRECT_THREADED_CODE
226
729k
    opcode = *(reg->ocs + i);
227
#else
228
    opcode = op->opcode;
229
#endif
230
231
729k
    switch (opcode) {
232
2.00k
    case OP_STR_MBN:
233
2.00k
      total += op->exact_len_n.len * op->exact_len_n.n;
234
2.00k
      break;
235
3.07k
    case OP_STR_N:
236
3.40k
    case OP_STR_MB2N:
237
3.40k
      total += op->exact_n.n * 2;
238
3.40k
      break;
239
2.17k
    case OP_STR_MB3N:
240
2.17k
      total += op->exact_n.n * 3;
241
2.17k
      break;
242
243
722k
    default:
244
722k
      break;
245
729k
    }
246
729k
  }
247
248
18.5k
  return total;
249
18.5k
}
250
251
static int
252
ops_make_string_pool(regex_t* reg)
253
18.5k
{
254
18.5k
  int i;
255
18.5k
  int len;
256
18.5k
  int size;
257
18.5k
  UChar* pool;
258
18.5k
  UChar* curr;
259
260
18.5k
  size = ops_calc_size_of_string_pool(reg);
261
18.5k
  if (size <= 0) {
262
15.5k
    return 0;
263
15.5k
  }
264
265
2.96k
  curr = pool = (UChar* )xmalloc((size_t )size);
266
2.96k
  CHECK_NULL_RETURN_MEMERR(pool);
267
268
273k
  for (i = 0; i < (int )reg->ops_used; i++) {
269
270k
    enum OpCode opcode;
270
270k
    Operation* op;
271
272
270k
    op = reg->ops + i;
273
270k
#ifdef USE_DIRECT_THREADED_CODE
274
270k
    opcode = *(reg->ocs + i);
275
#else
276
    opcode = op->opcode;
277
#endif
278
279
270k
    switch (opcode) {
280
2.00k
    case OP_STR_MBN:
281
2.00k
      len = op->exact_len_n.len * op->exact_len_n.n;
282
2.00k
      xmemcpy(curr, op->exact_len_n.s, len);
283
2.00k
      xfree(op->exact_len_n.s);
284
2.00k
      op->exact_len_n.s = curr;
285
2.00k
      curr += len;
286
2.00k
      break;
287
3.07k
    case OP_STR_N:
288
3.07k
      len = op->exact_n.n;
289
5.58k
    copy:
290
5.58k
      xmemcpy(curr, op->exact_n.s, len);
291
5.58k
      xfree(op->exact_n.s);
292
5.58k
      op->exact_n.s = curr;
293
5.58k
      curr += len;
294
5.58k
      break;
295
331
    case OP_STR_MB2N:
296
331
      len = op->exact_n.n * 2;
297
331
      goto copy;
298
0
      break;
299
2.17k
    case OP_STR_MB3N:
300
2.17k
      len = op->exact_n.n * 3;
301
2.17k
      goto copy;
302
0
      break;
303
304
262k
    default:
305
262k
      break;
306
270k
    }
307
270k
  }
308
309
2.96k
  reg->string_pool     = pool;
310
2.96k
  reg->string_pool_end = pool + size;
311
2.96k
  return 0;
312
2.96k
}
313
314
extern OnigCaseFoldType
315
onig_get_default_case_fold_flag(void)
316
0
{
317
0
  return OnigDefaultCaseFoldFlag;
318
0
}
319
320
extern int
321
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
322
0
{
323
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
324
0
  return 0;
325
0
}
326
327
static int
328
len_multiply_cmp(OnigLen x, int y, OnigLen v)
329
14.1k
{
330
14.1k
  if (x == 0 || y == 0) return -1;
331
332
14.1k
  if (x < INFINITE_LEN / y) {
333
13.9k
    OnigLen xy = x * (OnigLen )y;
334
13.9k
    if (xy > v) return 1;
335
3.95k
    else {
336
3.95k
      if (xy == v) return 0;
337
3.16k
      else return -1;
338
3.95k
    }
339
13.9k
  }
340
264
  else
341
264
    return v == INFINITE_LEN ? 0 : 1;
342
14.1k
}
343
344
extern int
345
onig_positive_int_multiply(int x, int y)
346
10.6k
{
347
10.6k
  if (x == 0 || y == 0) return 0;
348
349
9.40k
  if (x < ONIG_INT_MAX / y)
350
9.34k
    return x * y;
351
56
  else
352
56
    return -1;
353
9.40k
}
354
355
356
static void
357
node_swap(Node* a, Node* b)
358
70.7k
{
359
70.7k
  Node c;
360
361
70.7k
  c = *a; *a = *b; *b = c;
362
363
70.7k
  if (ND_TYPE(a) == ND_STRING) {
364
30.3k
    StrNode* sn = STR_(a);
365
30.3k
    if (sn->capacity == 0) {
366
30.1k
      int len = (int )(sn->end - sn->s);
367
30.1k
      sn->s   = sn->buf;
368
30.1k
      sn->end = sn->s + len;
369
30.1k
    }
370
30.3k
  }
371
372
70.7k
  if (ND_TYPE(b) == ND_STRING) {
373
62.5k
    StrNode* sn = STR_(b);
374
62.5k
    if (sn->capacity == 0) {
375
61.5k
      int len = (int )(sn->end - sn->s);
376
61.5k
      sn->s   = sn->buf;
377
61.5k
      sn->end = sn->s + len;
378
61.5k
    }
379
62.5k
  }
380
70.7k
}
381
382
static int
383
node_list_len(Node* list)
384
62.5k
{
385
62.5k
  int len;
386
387
62.5k
  len = 1;
388
123k
  while (IS_NOT_NULL(ND_CDR(list))) {
389
60.6k
    list = ND_CDR(list);
390
60.6k
    len++;
391
60.6k
  }
392
393
62.5k
  return len;
394
62.5k
}
395
396
static Node*
397
node_list_add(Node* list, Node* x)
398
60.6k
{
399
60.6k
  Node *n;
400
401
60.6k
  n = onig_node_new_list(x, NULL);
402
60.6k
  if (IS_NULL(n)) return NULL_NODE;
403
404
60.6k
  if (IS_NOT_NULL(list)) {
405
889k
    while (IS_NOT_NULL(ND_CDR(list)))
406
828k
      list = ND_CDR(list);
407
408
60.6k
    ND_CDR(list) = n;
409
60.6k
  }
410
411
60.6k
  return n;
412
60.6k
}
413
414
static int
415
node_str_node_cat(Node* node, Node* add)
416
30.9k
{
417
30.9k
  int r;
418
419
30.9k
  if (ND_STATUS(node) != ND_STATUS(add))
420
0
    return ONIGERR_TYPE_BUG;
421
422
30.9k
  if (STR_(node)->flag != STR_(add)->flag)
423
0
    return ONIGERR_TYPE_BUG;
424
425
30.9k
  r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
426
30.9k
  if (r != 0) return r;
427
428
30.9k
  return 0;
429
30.9k
}
430
431
static void
432
node_conv_to_str_node(Node* node, Node* ref_node)
433
902
{
434
902
  xmemset(node, 0, sizeof(*node));
435
902
  ND_SET_TYPE(node, ND_STRING);
436
902
  ND_STATUS(node) = ND_STATUS(ref_node);
437
438
902
  STR_(node)->flag     = STR_(ref_node)->flag;
439
902
  STR_(node)->s        = STR_(node)->buf;
440
902
  STR_(node)->end      = STR_(node)->buf;
441
902
  STR_(node)->capacity = 0;
442
902
}
443
444
static OnigLen
445
distance_add(OnigLen d1, OnigLen d2)
446
1.55M
{
447
1.55M
  if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
448
183k
    return INFINITE_LEN;
449
1.37M
  else {
450
1.37M
    if (d1 <= INFINITE_LEN - d2) return d1 + d2;
451
938
    else return INFINITE_LEN;
452
1.37M
  }
453
1.55M
}
454
455
static OnigLen
456
distance_multiply(OnigLen d, int m)
457
89.7k
{
458
89.7k
  if (m == 0) return 0;
459
460
67.8k
  if (d < INFINITE_LEN / m)
461
63.6k
    return d * m;
462
4.14k
  else
463
4.14k
    return INFINITE_LEN;
464
67.8k
}
465
466
static int
467
bitset_is_empty(BitSetRef bs)
468
101k
{
469
101k
  int i;
470
471
866k
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {
472
772k
    if (bs[i] != 0) return 0;
473
772k
  }
474
94.4k
  return 1;
475
101k
}
476
477
#ifdef USE_CALL
478
479
static int
480
unset_addr_list_init(UnsetAddrList* list, int size)
481
3.47k
{
482
3.47k
  UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
483
3.47k
  CHECK_NULL_RETURN_MEMERR(p);
484
485
3.47k
  list->num   = 0;
486
3.47k
  list->alloc = size;
487
3.47k
  list->us    = p;
488
3.47k
  return 0;
489
3.47k
}
490
491
static void
492
unset_addr_list_end(UnsetAddrList* list)
493
3.47k
{
494
3.47k
  if (IS_NOT_NULL(list->us))
495
3.47k
    xfree(list->us);
496
3.47k
}
497
498
static int
499
unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
500
3.96k
{
501
3.96k
  UnsetAddr* p;
502
3.96k
  int size;
503
504
3.96k
  if (list->num >= list->alloc) {
505
237
    size = list->alloc * 2;
506
237
    p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size);
507
237
    CHECK_NULL_RETURN_MEMERR(p);
508
237
    list->alloc = size;
509
237
    list->us    = p;
510
237
  }
511
512
3.96k
  list->us[list->num].offset = offset;
513
3.96k
  list->us[list->num].target = node;
514
3.96k
  list->num++;
515
3.96k
  return 0;
516
3.96k
}
517
#endif /* USE_CALL */
518
519
enum CharLenReturnType {
520
  CHAR_LEN_NORMAL = 0,       /* fixed or variable */
521
  CHAR_LEN_TOP_ALT_FIXED = 1
522
};
523
524
static int
525
mmcl_fixed(MinMaxCharLen* c)
526
31.2k
{
527
31.2k
  return (c->min == c->max && c->min != INFINITE_LEN);
528
31.2k
}
529
530
static void
531
mmcl_set(MinMaxCharLen* l, OnigLen len)
532
100k
{
533
100k
  l->min = len;
534
100k
  l->max = len;
535
100k
  l->min_is_sure = TRUE;
536
100k
}
537
538
static void
539
mmcl_set_min_max(MinMaxCharLen* l, OnigLen min, OnigLen max, int min_is_sure)
540
6.49k
{
541
6.49k
  l->min = min;
542
6.49k
  l->max = max;
543
6.49k
  l->min_is_sure = min_is_sure;
544
6.49k
}
545
546
static void
547
mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)
548
63.5k
{
549
63.5k
  to->min = distance_add(to->min, add->min);
550
63.5k
  to->max = distance_add(to->max, add->max);
551
552
63.5k
  to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;
553
63.5k
}
554
555
static void
556
mmcl_multiply(MinMaxCharLen* to, int m)
557
3.85k
{
558
3.85k
  to->min = distance_multiply(to->min, m);
559
3.85k
  to->max = distance_multiply(to->max, m);
560
3.85k
}
561
562
static void
563
mmcl_repeat_range_multiply(MinMaxCharLen* to, int mlow, int mhigh)
564
10.7k
{
565
10.7k
  to->min = distance_multiply(to->min, mlow);
566
567
10.7k
  if (IS_INFINITE_REPEAT(mhigh))
568
8.52k
    to->max = INFINITE_LEN;
569
2.21k
  else
570
2.21k
    to->max = distance_multiply(to->max, mhigh);
571
10.7k
}
572
573
static void
574
mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)
575
25.9k
{
576
25.9k
  if (to->min > alt->min) {
577
3.15k
    to->min         = alt->min;
578
3.15k
    to->min_is_sure = alt->min_is_sure;
579
3.15k
  }
580
22.7k
  else if (to->min == alt->min) {
581
7.26k
    if (alt->min_is_sure != FALSE)
582
3.55k
      to->min_is_sure = TRUE;
583
7.26k
  }
584
585
25.9k
  if (to->max < alt->max) to->max = alt->max;
586
25.9k
}
587
588
#ifndef ONIG_DONT_OPTIMIZE
589
590
static int
591
mml_is_equal(MinMaxLen* a, MinMaxLen* b)
592
5.99k
{
593
5.99k
  return a->min == b->min && a->max == b->max;
594
5.99k
}
595
596
static void
597
mml_set_min_max(MinMaxLen* l, OnigLen min, OnigLen max)
598
311k
{
599
311k
  l->min = min;
600
311k
  l->max = max;
601
311k
}
602
603
static void
604
mml_clear(MinMaxLen* l)
605
2.23M
{
606
2.23M
  l->min = l->max = 0;
607
2.23M
}
608
609
static void
610
mml_copy(MinMaxLen* to, MinMaxLen* from)
611
1.44M
{
612
1.44M
  to->min = from->min;
613
1.44M
  to->max = from->max;
614
1.44M
}
615
616
static void
617
mml_add(MinMaxLen* to, MinMaxLen* add)
618
572k
{
619
572k
  to->min = distance_add(to->min, add->min);
620
572k
  to->max = distance_add(to->max, add->max);
621
572k
}
622
623
static void
624
mml_alt_merge(MinMaxLen* to, MinMaxLen* alt)
625
127k
{
626
127k
  if (to->min > alt->min) to->min = alt->min;
627
127k
  if (to->max < alt->max) to->max = alt->max;
628
127k
}
629
630
#endif
631
632
/* fixed size pattern node only */
633
static int
634
node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env,
635
               int level)
636
167k
{
637
167k
  MinMaxCharLen tci;
638
167k
  int r = CHAR_LEN_NORMAL;
639
640
167k
  level++;
641
642
167k
  switch (ND_TYPE(node)) {
643
31.5k
  case ND_LIST:
644
31.5k
    {
645
31.5k
      int first = TRUE;
646
94.4k
      do {
647
94.4k
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
648
94.4k
        if (r < 0) break;
649
94.3k
        if (first == TRUE) {
650
31.5k
          *ci = tci;
651
31.5k
          first = FALSE;
652
31.5k
        }
653
62.8k
        else
654
62.8k
          mmcl_add(ci, &tci);
655
94.3k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
656
31.5k
    }
657
0
    break;
658
659
4.57k
  case ND_ALT:
660
4.57k
    {
661
4.57k
      int fixed;
662
663
4.57k
      r = node_char_len1(ND_CAR(node), reg, ci, env, level);
664
4.57k
      if (r < 0) break;
665
666
4.53k
      fixed = TRUE;
667
27.3k
      while (IS_NOT_NULL(node = ND_CDR(node))) {
668
22.8k
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
669
22.8k
        if (r < 0) break;
670
22.8k
        if (! mmcl_fixed(&tci))
671
608
          fixed = FALSE;
672
22.8k
        mmcl_alt_merge(ci, &tci);
673
22.8k
      }
674
4.53k
      if (r < 0) break;
675
676
4.51k
      r = CHAR_LEN_NORMAL;
677
4.51k
      if (mmcl_fixed(ci)) break;
678
679
3.31k
      if (fixed == TRUE && level == 1) {
680
1.54k
        r = CHAR_LEN_TOP_ALT_FIXED;
681
1.54k
      }
682
3.31k
    }
683
0
    break;
684
685
33.8k
  case ND_STRING:
686
33.8k
    {
687
33.8k
      OnigLen clen;
688
33.8k
      StrNode* sn = STR_(node);
689
33.8k
      UChar *s = sn->s;
690
691
33.8k
      if (ND_IS_REAL_IGNORECASE(node) &&
692
33.8k
          CASE_FOLD_IS_NOT_ASCII_ONLY(env->case_fold_flag)) {
693
        /* Such a case is possible.
694
           ex. /(?i)(?<=\1)(a)/
695
           Backref node refer to capture group, but it doesn't tune yet.
696
         */
697
54
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
698
54
        break;
699
54
      }
700
701
33.7k
      clen = 0;
702
90.6k
      while (s < sn->end) {
703
56.9k
        s += enclen(reg->enc, s);
704
56.9k
        clen = distance_add(clen, 1);
705
56.9k
      }
706
33.7k
      mmcl_set(ci, clen);
707
33.7k
    }
708
0
    break;
709
710
16.4k
  case ND_QUANT:
711
16.4k
    {
712
16.4k
      QuantNode* qn = QUANT_(node);
713
714
16.4k
      if (qn->lower == qn->upper) {
715
5.64k
        if (qn->upper == 0) {
716
1.77k
          mmcl_set(ci, 0);
717
1.77k
        }
718
3.86k
        else {
719
3.86k
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
720
3.86k
          if (r < 0) break;
721
3.85k
          mmcl_multiply(ci, qn->lower);
722
3.85k
        }
723
5.64k
      }
724
10.8k
      else {
725
10.8k
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
726
10.8k
        if (r < 0) break;
727
10.7k
        mmcl_repeat_range_multiply(ci, qn->lower, qn->upper);
728
10.7k
      }
729
16.4k
    }
730
16.3k
    break;
731
732
16.3k
#ifdef USE_CALL
733
16.3k
  case ND_CALL:
734
938
    if (ND_IS_RECURSION(node))
735
418
      mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
736
520
    else
737
520
      r = node_char_len1(ND_BODY(node), reg, ci, env, level);
738
938
    break;
739
0
#endif
740
741
6.85k
  case ND_CTYPE:
742
53.0k
  case ND_CCLASS:
743
53.0k
    mmcl_set(ci, 1);
744
53.0k
    break;
745
746
13.3k
  case ND_BAG:
747
13.3k
    {
748
13.3k
      BagNode* en = BAG_(node);
749
750
13.3k
      switch (en->type) {
751
8.19k
      case BAG_MEMORY:
752
8.19k
        if (ND_IS_FIXED_CLEN(node)) {
753
5.56k
          mmcl_set_min_max(ci, en->min_char_len, en->max_char_len,
754
5.56k
                           ND_IS_FIXED_CLEN_MIN_SURE(node));
755
5.56k
        }
756
2.62k
        else {
757
2.62k
          if (ND_IS_MARK1(node)) {
758
204
            mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
759
204
          }
760
2.41k
          else {
761
2.41k
            ND_STATUS_ADD(node, MARK1);
762
2.41k
            r = node_char_len1(ND_BODY(node), reg, ci, env, level);
763
2.41k
            ND_STATUS_REMOVE(node, MARK1);
764
2.41k
            if (r < 0) break;
765
766
2.34k
            en->min_char_len = ci->min;
767
2.34k
            en->max_char_len = ci->max;
768
2.34k
            ND_STATUS_ADD(node, FIXED_CLEN);
769
2.34k
            if (ci->min_is_sure != FALSE)
770
2.06k
              ND_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);
771
2.34k
          }
772
2.62k
        }
773
        /* can't optimize look-behind if capture exists. */
774
8.11k
        ci->min_is_sure = FALSE;
775
8.11k
        break;
776
74
      case BAG_OPTION:
777
3.71k
      case BAG_STOP_BACKTRACK:
778
3.71k
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
779
3.71k
        break;
780
1.45k
      case BAG_IF_ELSE:
781
1.45k
        {
782
1.45k
          MinMaxCharLen eci;
783
784
1.45k
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
785
1.45k
          if (r < 0) break;
786
787
1.45k
          if (IS_NOT_NULL(en->te.Then)) {
788
768
            r = node_char_len1(en->te.Then, reg, &tci, env, level);
789
768
            if (r < 0) break;
790
762
            mmcl_add(ci, &tci);
791
762
          }
792
793
1.44k
          if (IS_NOT_NULL(en->te.Else)) {
794
1.00k
            r = node_char_len1(en->te.Else, reg, &eci, env, level);
795
1.00k
            if (r < 0) break;
796
1.00k
          }
797
438
          else {
798
438
            mmcl_set(&eci, 0);
799
438
          }
800
801
1.44k
          mmcl_alt_merge(ci, &eci);
802
1.44k
        }
803
0
        break;
804
0
      default: /* never come here */
805
0
        r = ONIGERR_PARSER_BUG;
806
0
        break;
807
13.3k
      }
808
13.3k
    }
809
13.3k
    break;
810
811
13.3k
  case ND_GIMMICK:
812
4.24k
    mmcl_set(ci, 0);
813
4.24k
    break;
814
815
7.22k
  case ND_ANCHOR:
816
7.38k
  zero:
817
7.38k
    mmcl_set(ci, 0);
818
    /* can't optimize look-behind if anchor exists. */
819
7.38k
    ci->min_is_sure = FALSE;
820
7.38k
    break;
821
822
2.74k
  case ND_BACKREF:
823
2.74k
    if (ND_IS_CHECKER(node))
824
154
      goto zero;
825
826
2.59k
    if (ND_IS_RECURSION(node)) {
827
304
#ifdef USE_BACKREF_WITH_LEVEL
828
304
      if (ND_IS_NEST_LEVEL(node)) {
829
62
        mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
830
62
        break;
831
62
      }
832
242
#endif
833
834
242
      mmcl_set_min_max(ci, 0, 0, FALSE);
835
242
      break;
836
304
    }
837
838
2.28k
    {
839
2.28k
      int i;
840
2.28k
      int* backs;
841
2.28k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
842
2.28k
      BackRefNode* br = BACKREF_(node);
843
844
2.28k
      backs = BACKREFS_P(br);
845
2.28k
      r = node_char_len1(mem_env[backs[0]].mem_node, reg, ci, env, level);
846
2.28k
      if (r < 0) break;
847
2.25k
      if (! mmcl_fixed(ci)) ci->min_is_sure = FALSE;
848
849
3.90k
      for (i = 1; i < br->back_num; i++) {
850
1.65k
        r = node_char_len1(mem_env[backs[i]].mem_node, reg, &tci, env, level);
851
1.65k
        if (r < 0) break;
852
1.65k
        if (! mmcl_fixed(&tci)) tci.min_is_sure = FALSE;
853
1.65k
        mmcl_alt_merge(ci, &tci);
854
1.65k
      }
855
2.25k
    }
856
0
    break;
857
858
0
  default: /* never come here */
859
0
    r = ONIGERR_PARSER_BUG;
860
0
    break;
861
167k
  }
862
863
167k
  return r;
864
167k
}
865
866
static int
867
node_char_len(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env)
868
17.5k
{
869
17.5k
  return node_char_len1(node, reg, ci, env, 0);
870
17.5k
}
871
872
873
static int
874
add_op(regex_t* reg, int opcode)
875
729k
{
876
729k
  int r;
877
878
729k
  r = ops_new(reg);
879
729k
  if (r != ONIG_NORMAL) return r;
880
881
729k
#ifdef USE_DIRECT_THREADED_CODE
882
729k
  *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
883
#else
884
  reg->ops_curr->opcode = opcode;
885
#endif
886
887
729k
  return 0;
888
729k
}
889
890
static int compile_length_tree(Node* node, regex_t* reg, ParseEnv* env);
891
static int compile_tree(Node* node, regex_t* reg, ParseEnv* env);
892
893
894
#define IS_NEED_STR_LEN_OP(op) \
895
102k
   ((op) == OP_STR_N    || (op) == OP_STR_MB2N ||\
896
102k
    (op) == OP_STR_MB3N || (op) == OP_STR_MBN)
897
898
static int
899
select_str_opcode(int mb_len, int str_len)
900
104k
{
901
104k
  int op;
902
903
104k
  switch (mb_len) {
904
67.8k
  case 1:
905
67.8k
    switch (str_len) {
906
37.9k
    case 1:  op = OP_STR_1; break;
907
20.1k
    case 2:  op = OP_STR_2; break;
908
3.80k
    case 3:  op = OP_STR_3; break;
909
1.70k
    case 4:  op = OP_STR_4; break;
910
1.21k
    case 5:  op = OP_STR_5; break;
911
3.07k
    default: op = OP_STR_N; break;
912
67.8k
    }
913
67.8k
    break;
914
915
67.8k
  case 2:
916
32.5k
    switch (str_len) {
917
18.3k
    case 1:  op = OP_STR_MB2N1; break;
918
10.6k
    case 2:  op = OP_STR_MB2N2; break;
919
3.25k
    case 3:  op = OP_STR_MB2N3; break;
920
331
    default: op = OP_STR_MB2N;  break;
921
32.5k
    }
922
32.5k
    break;
923
924
32.5k
  case 3:
925
2.17k
    op = OP_STR_MB3N;
926
2.17k
    break;
927
928
2.00k
  default:
929
2.00k
    op = OP_STR_MBN;
930
2.00k
    break;
931
104k
  }
932
933
104k
  return op;
934
104k
}
935
936
static int
937
is_strict_real_node(Node* node)
938
35.1k
{
939
35.1k
  switch (ND_TYPE(node)) {
940
9.49k
  case ND_STRING:
941
9.49k
    {
942
9.49k
      StrNode* sn = STR_(node);
943
9.49k
      return (sn->end != sn->s);
944
0
    }
945
0
    break;
946
947
6.33k
  case ND_CCLASS:
948
12.9k
  case ND_CTYPE:
949
12.9k
    return 1;
950
0
    break;
951
952
12.7k
  default:
953
12.7k
    return 0;
954
0
    break;
955
35.1k
  }
956
35.1k
}
957
958
static int
959
compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ParseEnv* env)
960
25.5k
{
961
25.5k
  int r;
962
25.5k
  int saved_num_empty_check;
963
25.5k
  int emptiness;
964
25.5k
  Node* body;
965
966
25.5k
  body = ND_BODY((Node* )qn);
967
25.5k
  emptiness = qn->emptiness;
968
25.5k
  saved_num_empty_check = reg->num_empty_check;
969
970
25.5k
  if (emptiness != BODY_IS_NOT_EMPTY) {
971
5.55k
    r = add_op(reg, OP_EMPTY_CHECK_START);
972
5.55k
    if (r != 0) return r;
973
5.55k
    COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
974
5.55k
    reg->num_empty_check++;
975
5.55k
  }
976
977
25.5k
  r = compile_tree(body, reg, env);
978
25.5k
  if (r != 0) return r;
979
980
25.5k
  if (emptiness != BODY_IS_NOT_EMPTY) {
981
5.55k
    if (emptiness == BODY_MAY_BE_EMPTY)
982
2.62k
      r = add_op(reg, OP_EMPTY_CHECK_END);
983
2.93k
    else if (emptiness == BODY_MAY_BE_EMPTY_MEM) {
984
2.34k
      if (ND_IS_EMPTY_STATUS_CHECK(qn) != 0 && qn->empty_status_mem != 0) {
985
495
        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
986
495
        if (r != 0) return r;
987
495
        COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
988
495
      }
989
1.85k
      else
990
1.85k
        r = add_op(reg, OP_EMPTY_CHECK_END);
991
2.34k
    }
992
583
#ifdef USE_CALL
993
583
    else if (emptiness == BODY_MAY_BE_EMPTY_REC) {
994
583
      r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
995
583
      if (r != 0) return r;
996
583
      COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
997
583
    }
998
5.55k
#endif
999
1000
5.55k
    if (r != 0) return r;
1001
5.55k
    COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
1002
5.55k
  }
1003
25.5k
  return r;
1004
25.5k
}
1005
1006
#ifdef USE_CALL
1007
static int
1008
compile_call(CallNode* node, regex_t* reg, ParseEnv* env)
1009
3.96k
{
1010
3.96k
  int r;
1011
3.96k
  int offset;
1012
1013
3.96k
  r = add_op(reg, OP_CALL);
1014
3.96k
  if (r != 0) return r;
1015
1016
3.96k
  COP(reg)->call.addr = 0; /* dummy addr. */
1017
#ifdef ONIG_DEBUG_MATCH_COUNTER
1018
  COP(reg)->call.called_mem = node->called_gnum;
1019
#endif
1020
1021
3.96k
  offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
1022
3.96k
  r = unset_addr_list_add(env->unset_addr_list, offset, ND_CALL_BODY(node));
1023
3.96k
  return r;
1024
3.96k
}
1025
#endif
1026
1027
static int
1028
compile_tree_n_times(Node* node, int n, regex_t* reg, ParseEnv* env)
1029
34.7k
{
1030
34.7k
  int i, r;
1031
1032
56.8k
  for (i = 0; i < n; i++) {
1033
22.0k
    r = compile_tree(node, reg, env);
1034
22.0k
    if (r != 0) return r;
1035
22.0k
  }
1036
34.7k
  return 0;
1037
34.7k
}
1038
1039
static int
1040
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
1041
                          regex_t* reg ARG_UNUSED)
1042
240k
{
1043
240k
  return 1;
1044
240k
}
1045
1046
static int
1047
add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
1048
104k
{
1049
104k
  int op;
1050
104k
  int r;
1051
104k
  int byte_len;
1052
104k
  UChar* p;
1053
104k
  UChar* end;
1054
1055
104k
  op = select_str_opcode(mb_len, str_len);
1056
104k
  r = add_op(reg, op);
1057
104k
  if (r != 0) return r;
1058
1059
104k
  byte_len = mb_len * str_len;
1060
104k
  end = s + byte_len;
1061
1062
104k
  if (op == OP_STR_MBN) {
1063
2.00k
    p = onigenc_strdup(reg->enc, s, end);
1064
2.00k
    CHECK_NULL_RETURN_MEMERR(p);
1065
1066
2.00k
    COP(reg)->exact_len_n.len = mb_len;
1067
2.00k
    COP(reg)->exact_len_n.n   = str_len;
1068
2.00k
    COP(reg)->exact_len_n.s   = p;
1069
2.00k
  }
1070
102k
  else if (IS_NEED_STR_LEN_OP(op)) {
1071
5.58k
    p = onigenc_strdup(reg->enc, s, end);
1072
5.58k
    CHECK_NULL_RETURN_MEMERR(p);
1073
5.58k
    COP(reg)->exact_n.n = str_len;
1074
5.58k
    COP(reg)->exact_n.s = p;
1075
5.58k
  }
1076
96.9k
  else {
1077
96.9k
    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
1078
96.9k
    xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
1079
96.9k
  }
1080
1081
104k
  return 0;
1082
104k
}
1083
1084
static int
1085
compile_length_string_node(Node* node, regex_t* reg)
1086
256k
{
1087
256k
  int rlen, r, len, prev_len, slen;
1088
256k
  UChar *p, *prev;
1089
256k
  StrNode* sn;
1090
256k
  OnigEncoding enc = reg->enc;
1091
1092
256k
  sn = STR_(node);
1093
256k
  if (sn->end <= sn->s)
1094
33.9k
    return 0;
1095
1096
222k
  p = prev = sn->s;
1097
222k
  prev_len = enclen(enc, p);
1098
222k
  p += prev_len;
1099
222k
  slen = 1;
1100
222k
  rlen = 0;
1101
1102
412k
  for (; p < sn->end; ) {
1103
189k
    len = enclen(enc, p);
1104
189k
    if (len == prev_len) {
1105
179k
      slen++;
1106
179k
    }
1107
10.0k
    else {
1108
10.0k
      r = add_compile_string_length(prev, prev_len, slen, reg);
1109
10.0k
      rlen += r;
1110
10.0k
      prev = p;
1111
10.0k
      slen = 1;
1112
10.0k
      prev_len = len;
1113
10.0k
    }
1114
189k
    p += len;
1115
189k
  }
1116
1117
222k
  r = add_compile_string_length(prev, prev_len, slen, reg);
1118
222k
  rlen += r;
1119
222k
  return rlen;
1120
256k
}
1121
1122
static int
1123
compile_length_string_crude_node(StrNode* sn, regex_t* reg)
1124
7.76k
{
1125
7.76k
  if (sn->end <= sn->s)
1126
0
    return 0;
1127
1128
7.76k
  return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
1129
7.76k
                                   reg);
1130
7.76k
}
1131
1132
static int
1133
compile_string_node(Node* node, regex_t* reg)
1134
113k
{
1135
113k
  int r, len, prev_len, slen;
1136
113k
  UChar *p, *prev, *end;
1137
113k
  StrNode* sn;
1138
113k
  OnigEncoding enc = reg->enc;
1139
1140
113k
  sn = STR_(node);
1141
113k
  if (sn->end <= sn->s)
1142
15.2k
    return 0;
1143
1144
97.8k
  end = sn->end;
1145
1146
97.8k
  p = prev = sn->s;
1147
97.8k
  prev_len = enclen(enc, p);
1148
97.8k
  p += prev_len;
1149
97.8k
  slen = 1;
1150
1151
213k
  for (; p < end; ) {
1152
115k
    len = enclen(enc, p);
1153
115k
    if (len == prev_len) {
1154
110k
      slen++;
1155
110k
    }
1156
4.94k
    else {
1157
4.94k
      r = add_compile_string(prev, prev_len, slen, reg);
1158
4.94k
      if (r != 0) return r;
1159
1160
4.94k
      prev  = p;
1161
4.94k
      slen  = 1;
1162
4.94k
      prev_len = len;
1163
4.94k
    }
1164
1165
115k
    p += len;
1166
115k
  }
1167
1168
97.8k
  return add_compile_string(prev, prev_len, slen, reg);
1169
97.8k
}
1170
1171
static int
1172
compile_string_crude_node(StrNode* sn, regex_t* reg)
1173
1.78k
{
1174
1.78k
  if (sn->end <= sn->s)
1175
0
    return 0;
1176
1177
1.78k
  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
1178
1.78k
}
1179
1180
static void*
1181
set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
1182
104k
{
1183
104k
  size_t len;
1184
104k
  void* p;
1185
1186
104k
  len = (size_t )mbuf->used;
1187
104k
  p = xmalloc(len);
1188
104k
  if (IS_NULL(p)) return NULL;
1189
1190
104k
  xmemcpy(p, mbuf->p, len);
1191
104k
  return p;
1192
104k
}
1193
1194
static int
1195
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
1196
411k
{
1197
411k
  return 1;
1198
411k
}
1199
1200
static int
1201
compile_cclass_node(CClassNode* cc, regex_t* reg)
1202
154k
{
1203
154k
  int r;
1204
1205
154k
  if (IS_NULL(cc->mbuf)) {
1206
49.9k
    r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
1207
49.9k
    if (r != 0) return r;
1208
1209
49.9k
    COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
1210
49.9k
    CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
1211
49.9k
    xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
1212
49.9k
  }
1213
104k
  else {
1214
104k
    void* p;
1215
1216
104k
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
1217
98.2k
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
1218
98.2k
      if (r != 0) return r;
1219
1220
98.2k
      p = set_multi_byte_cclass(cc->mbuf, reg);
1221
98.2k
      CHECK_NULL_RETURN_MEMERR(p);
1222
98.2k
      COP(reg)->cclass_mb.mb = p;
1223
98.2k
    }
1224
6.55k
    else {
1225
6.55k
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
1226
6.55k
      if (r != 0) return r;
1227
1228
6.55k
      COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
1229
6.55k
      CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
1230
6.55k
      xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
1231
1232
6.55k
      p = set_multi_byte_cclass(cc->mbuf, reg);
1233
6.55k
      CHECK_NULL_RETURN_MEMERR(p);
1234
6.55k
      COP(reg)->cclass_mix.mb = p;
1235
6.55k
    }
1236
104k
  }
1237
1238
154k
  return 0;
1239
154k
}
1240
1241
static void
1242
set_addr_in_repeat_range(regex_t* reg)
1243
18.5k
{
1244
18.5k
  int i;
1245
1246
23.8k
  for (i = 0; i < reg->num_repeat; i++) {
1247
5.27k
    RepeatRange* p = reg->repeat_range + i;
1248
5.27k
    int offset = p->u.offset;
1249
5.27k
    p->u.pcode = reg->ops + offset;
1250
5.27k
  }
1251
18.5k
}
1252
1253
static int
1254
entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
1255
5.27k
{
1256
5.27k
#define REPEAT_RANGE_ALLOC  4
1257
1258
5.27k
  RepeatRange* p;
1259
1260
5.27k
  if (reg->repeat_range_alloc == 0) {
1261
2.45k
    p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
1262
2.45k
    CHECK_NULL_RETURN_MEMERR(p);
1263
2.45k
    reg->repeat_range = p;
1264
2.45k
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
1265
2.45k
  }
1266
2.82k
  else if (reg->repeat_range_alloc <= id) {
1267
342
    int n;
1268
342
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
1269
342
    p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
1270
342
    CHECK_NULL_RETURN_MEMERR(p);
1271
342
    reg->repeat_range = p;
1272
342
    reg->repeat_range_alloc = n;
1273
342
  }
1274
2.47k
  else {
1275
2.47k
    p = reg->repeat_range;
1276
2.47k
  }
1277
1278
5.27k
  p[id].lower    = lower;
1279
5.27k
  p[id].upper    = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
1280
5.27k
  p[id].u.offset = ops_index;
1281
5.27k
  return 0;
1282
5.27k
}
1283
1284
static int
1285
compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
1286
                          regex_t* reg, ParseEnv* env)
1287
5.27k
{
1288
5.27k
  int r;
1289
5.27k
  int num_repeat = reg->num_repeat++;
1290
1291
5.27k
  r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
1292
5.27k
  if (r != 0) return r;
1293
1294
5.27k
  COP(reg)->repeat.id   = num_repeat;
1295
5.27k
  COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
1296
1297
5.27k
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
1298
5.27k
                         COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
1299
5.27k
  if (r != 0) return r;
1300
1301
5.27k
  r = compile_quant_body_with_empty_check(qn, reg, env);
1302
5.27k
  if (r != 0) return r;
1303
1304
5.27k
  r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
1305
5.27k
  if (r != 0) return r;
1306
1307
5.27k
  COP(reg)->repeat_inc.id = num_repeat;
1308
5.27k
  return r;
1309
5.27k
}
1310
1311
static int
1312
is_anychar_infinite_greedy(QuantNode* qn)
1313
96.9k
{
1314
96.9k
  if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
1315
96.9k
      ND_IS_ANYCHAR(ND_QUANT_BODY(qn)))
1316
10.7k
    return 1;
1317
86.1k
  else
1318
86.1k
    return 0;
1319
96.9k
}
1320
1321
40.1k
#define QUANTIFIER_EXPAND_LIMIT_SIZE   10
1322
#define CKN_ON   (ckn > 0)
1323
1324
static int
1325
compile_length_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1326
58.7k
{
1327
58.7k
  int len, mod_tlen;
1328
58.7k
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1329
58.7k
  enum BodyEmptyType emptiness = qn->emptiness;
1330
58.7k
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1331
1332
58.7k
  if (tlen < 0) return tlen;
1333
58.7k
  if (tlen == 0) return 0;
1334
1335
  /* anychar repeat */
1336
57.8k
  if (is_anychar_infinite_greedy(qn)) {
1337
5.00k
    if (qn->lower <= 1 ||
1338
5.00k
        len_multiply_cmp((OnigLen )tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
1339
4.82k
      if (IS_NOT_NULL(qn->next_head_exact))
1340
423
        return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
1341
4.40k
      else
1342
4.40k
        return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
1343
4.82k
    }
1344
5.00k
  }
1345
1346
53.0k
  mod_tlen = tlen;
1347
53.0k
  if (emptiness != BODY_IS_NOT_EMPTY)
1348
10.5k
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1349
1350
53.0k
  if (infinite &&
1351
53.0k
      (qn->lower <= 1 ||
1352
33.1k
       len_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1353
32.5k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1354
3.32k
      len = OPSIZE_JUMP;
1355
3.32k
    }
1356
29.2k
    else {
1357
29.2k
      len = tlen * qn->lower;
1358
29.2k
    }
1359
1360
32.5k
    if (qn->greedy) {
1361
30.7k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1362
30.7k
      if (IS_NOT_NULL(qn->head_exact))
1363
8.07k
        len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
1364
22.6k
      else
1365
22.6k
#endif
1366
22.6k
      if (IS_NOT_NULL(qn->next_head_exact))
1367
1.77k
        len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
1368
20.8k
      else
1369
20.8k
        len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
1370
30.7k
    }
1371
1.86k
    else
1372
1.86k
      len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
1373
32.5k
  }
1374
20.4k
  else if (qn->upper == 0) {
1375
2.02k
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1376
169
      len = OPSIZE_JUMP + tlen;
1377
169
    }
1378
1.85k
    else
1379
1.85k
      len = 0;
1380
2.02k
  }
1381
18.4k
  else if (!infinite && qn->greedy &&
1382
18.4k
           (qn->upper == 1 ||
1383
15.5k
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1384
10.7k
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1385
10.7k
    len = tlen * qn->lower;
1386
10.7k
    len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
1387
10.7k
  }
1388
7.67k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1389
899
    len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
1390
899
  }
1391
6.78k
  else {
1392
6.78k
    len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
1393
6.78k
  }
1394
1395
53.0k
  return len;
1396
57.8k
}
1397
1398
static int
1399
compile_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1400
39.5k
{
1401
39.5k
  int i, r, mod_tlen;
1402
39.5k
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1403
39.5k
  enum BodyEmptyType emptiness = qn->emptiness;
1404
39.5k
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1405
1406
39.5k
  if (tlen < 0) return tlen;
1407
39.5k
  if (tlen == 0) return 0;
1408
1409
39.0k
  if (is_anychar_infinite_greedy(qn) &&
1410
39.0k
      (qn->lower <= 1 ||
1411
5.78k
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1412
5.65k
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1413
5.65k
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1414
5.65k
    if (r != 0) return r;
1415
5.65k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1416
409
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1417
242
                 OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
1418
409
      if (r != 0) return r;
1419
1420
409
      COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
1421
409
      return 0;
1422
409
    }
1423
5.24k
    else {
1424
5.24k
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1425
2.85k
                 OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
1426
5.24k
      return r;
1427
5.24k
    }
1428
5.65k
  }
1429
1430
33.4k
  mod_tlen = tlen;
1431
33.4k
  if (emptiness != BODY_IS_NOT_EMPTY)
1432
7.39k
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1433
1434
33.4k
  if (infinite &&
1435
33.4k
      (qn->lower <= 1 ||
1436
20.7k
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1437
20.3k
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1438
20.3k
    int addr;
1439
1440
20.3k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1441
1.75k
      r = add_op(reg, OP_JUMP);
1442
1.75k
      if (r != 0) return r;
1443
1.75k
      if (qn->greedy) {
1444
1.72k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1445
1.72k
        if (IS_NOT_NULL(qn->head_exact))
1446
249
          COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
1447
1.47k
        else
1448
1.47k
#endif
1449
1.47k
        if (IS_NOT_NULL(qn->next_head_exact))
1450
223
          COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
1451
1.25k
        else
1452
1.25k
          COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
1453
1.72k
      }
1454
30
      else {
1455
30
        COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
1456
30
      }
1457
1.75k
    }
1458
18.5k
    else {
1459
18.5k
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1460
18.5k
      if (r != 0) return r;
1461
18.5k
    }
1462
1463
20.3k
    if (qn->greedy) {
1464
19.3k
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1465
19.3k
      if (IS_NOT_NULL(qn->head_exact)) {
1466
4.02k
        r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
1467
4.02k
        if (r != 0) return r;
1468
4.02k
        COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1469
4.02k
        COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0];
1470
1471
4.02k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1472
4.02k
        if (r != 0) return r;
1473
1474
4.02k
        addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
1475
4.02k
      }
1476
15.3k
      else
1477
15.3k
#endif
1478
15.3k
      if (IS_NOT_NULL(qn->next_head_exact)) {
1479
1.35k
        r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
1480
1.35k
        if (r != 0) return r;
1481
1.35k
        COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1482
1.35k
        COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0];
1483
1484
1.35k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1485
1.35k
        if (r != 0) return r;
1486
1487
1.35k
        addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
1488
1.35k
      }
1489
13.9k
      else {
1490
13.9k
        r = add_op(reg, OP_PUSH);
1491
13.9k
        if (r != 0) return r;
1492
13.9k
        COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1493
1494
13.9k
        r = compile_quant_body_with_empty_check(qn, reg, env);
1495
13.9k
        if (r != 0) return r;
1496
1497
13.9k
        addr = -(mod_tlen + (int )OPSIZE_PUSH);
1498
13.9k
      }
1499
1500
19.3k
      r = add_op(reg, OP_JUMP);
1501
19.3k
      if (r != 0) return r;
1502
19.3k
      COP(reg)->jump.addr = addr;
1503
19.3k
    }
1504
954
    else {
1505
954
      r = add_op(reg, OP_JUMP);
1506
954
      if (r != 0) return r;
1507
954
      COP(reg)->jump.addr = mod_tlen + SIZE_INC;
1508
1509
954
      r = compile_quant_body_with_empty_check(qn, reg, env);
1510
954
      if (r != 0) return r;
1511
1512
954
      r = add_op(reg, OP_PUSH);
1513
954
      if (r != 0) return r;
1514
954
      COP(reg)->push.addr = -mod_tlen;
1515
954
    }
1516
20.3k
  }
1517
13.1k
  else if (qn->upper == 0) {
1518
812
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1519
101
      r = add_op(reg, OP_JUMP);
1520
101
      if (r != 0) return r;
1521
101
      COP(reg)->jump.addr = tlen + SIZE_INC;
1522
1523
101
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1524
101
    }
1525
711
    else {
1526
      /* Nothing output */
1527
711
      r = 0;
1528
711
    }
1529
812
  }
1530
12.2k
  else if (! infinite && qn->greedy &&
1531
12.2k
           (qn->upper == 1 ||
1532
10.6k
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1533
6.48k
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1534
6.48k
    int n = qn->upper - qn->lower;
1535
1536
6.48k
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1537
6.48k
    if (r != 0) return r;
1538
1539
11.9k
    for (i = 0; i < n; i++) {
1540
5.48k
      int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
1541
5.48k
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1542
1543
5.48k
      r = add_op(reg, OP_PUSH);
1544
5.48k
      if (r != 0) return r;
1545
5.48k
      COP(reg)->push.addr = v;
1546
1547
5.48k
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1548
5.48k
      if (r != 0) return r;
1549
5.48k
    }
1550
6.48k
  }
1551
5.81k
  else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1552
532
    r = add_op(reg, OP_PUSH);
1553
532
    if (r != 0) return r;
1554
532
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
1555
1556
532
    r = add_op(reg, OP_JUMP);
1557
532
    if (r != 0) return r;
1558
532
    COP(reg)->jump.addr = tlen + SIZE_INC;
1559
1560
532
    r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1561
532
  }
1562
5.27k
  else {
1563
5.27k
    r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
1564
5.27k
  }
1565
33.4k
  return r;
1566
33.4k
}
1567
1568
static int
1569
compile_length_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1570
523
{
1571
523
  int tlen;
1572
1573
523
  tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1574
1575
523
  return tlen;
1576
523
}
1577
1578
static int
1579
compile_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1580
271
{
1581
271
  int r;
1582
1583
271
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1584
1585
271
  return r;
1586
271
}
1587
1588
static int
1589
compile_length_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1590
51.4k
{
1591
51.4k
  int len;
1592
51.4k
  int tlen;
1593
1594
51.4k
  if (node->type == BAG_OPTION)
1595
523
    return compile_length_option_node(node, reg, env);
1596
1597
50.8k
  if (ND_BAG_BODY(node)) {
1598
50.8k
    tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1599
50.8k
    if (tlen < 0) return tlen;
1600
50.8k
  }
1601
0
  else
1602
0
    tlen = 0;
1603
1604
50.8k
  switch (node->type) {
1605
28.6k
  case BAG_MEMORY:
1606
28.6k
#ifdef USE_CALL
1607
1608
28.6k
    if (node->m.regnum == 0 && ND_IS_CALLED(node)) {
1609
0
      len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1610
0
      return len;
1611
0
    }
1612
1613
28.6k
    if (ND_IS_CALLED(node)) {
1614
3.28k
      len = OPSIZE_MEM_START_PUSH + tlen
1615
3.28k
        + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1616
3.28k
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1617
478
        len += (ND_IS_RECURSION(node)
1618
478
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1619
2.80k
      else
1620
2.80k
        len += (ND_IS_RECURSION(node)
1621
2.80k
                ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1622
3.28k
    }
1623
25.4k
    else if (ND_IS_RECURSION(node)) {
1624
2.39k
      len = OPSIZE_MEM_START_PUSH;
1625
2.39k
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1626
2.39k
                     ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
1627
2.39k
    }
1628
23.0k
    else
1629
23.0k
#endif
1630
23.0k
    {
1631
23.0k
      if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1632
22.9k
        len = OPSIZE_MEM_START_PUSH;
1633
75
      else
1634
75
        len = OPSIZE_MEM_START;
1635
1636
23.0k
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1637
23.0k
                     ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
1638
23.0k
    }
1639
28.6k
    break;
1640
1641
17.0k
  case BAG_STOP_BACKTRACK:
1642
17.0k
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1643
4.03k
      int v;
1644
4.03k
      QuantNode* qn;
1645
1646
4.03k
      qn = QUANT_(ND_BAG_BODY(node));
1647
4.03k
      tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1648
4.03k
      if (tlen < 0) return tlen;
1649
1650
4.03k
      v = onig_positive_int_multiply(qn->lower, tlen);
1651
4.03k
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1652
4.03k
      len = v + OPSIZE_PUSH + tlen + OPSIZE_POP + OPSIZE_JUMP;
1653
4.03k
    }
1654
13.0k
    else {
1655
13.0k
      len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1656
13.0k
    }
1657
17.0k
    break;
1658
1659
17.0k
  case BAG_IF_ELSE:
1660
5.15k
    {
1661
5.15k
      Node* cond = ND_BAG_BODY(node);
1662
5.15k
      Node* Then = node->te.Then;
1663
5.15k
      Node* Else = node->te.Else;
1664
1665
5.15k
      len = compile_length_tree(cond, reg, env);
1666
5.15k
      if (len < 0) return len;
1667
5.15k
      len += OPSIZE_PUSH + OPSIZE_MARK + OPSIZE_CUT_TO_MARK;
1668
1669
5.15k
      if (IS_NOT_NULL(Then)) {
1670
1.97k
        tlen = compile_length_tree(Then, reg, env);
1671
1.97k
        if (tlen < 0) return tlen;
1672
1.97k
        len += tlen;
1673
1.97k
      }
1674
1675
5.15k
      len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK;
1676
1677
5.15k
      if (IS_NOT_NULL(Else)) {
1678
3.75k
        tlen = compile_length_tree(Else, reg, env);
1679
3.75k
        if (tlen < 0) return tlen;
1680
3.75k
        len += tlen;
1681
3.75k
      }
1682
5.15k
    }
1683
5.15k
    break;
1684
1685
5.15k
  case BAG_OPTION:
1686
    /* never come here, but set for escape warning */
1687
0
    len = 0;
1688
0
    break;
1689
1690
0
  default:
1691
0
    return ONIGERR_TYPE_BUG;
1692
0
    break;
1693
50.8k
  }
1694
1695
50.8k
  return len;
1696
50.8k
}
1697
1698
static int
1699
compile_bag_memory_node(BagNode* node, regex_t* reg, ParseEnv* env)
1700
17.9k
{
1701
17.9k
  int r;
1702
1703
17.9k
#ifdef USE_CALL
1704
17.9k
  if (ND_IS_CALLED(node)) {
1705
2.21k
    int len;
1706
1707
2.21k
    r = add_op(reg, OP_CALL);
1708
2.21k
    if (r != 0) return r;
1709
1710
2.21k
    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
1711
2.21k
    ND_STATUS_ADD(node, FIXED_ADDR);
1712
2.21k
    COP(reg)->call.addr = (int )node->m.called_addr;
1713
1714
2.21k
    if (node->m.regnum == 0) {
1715
847
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1716
847
      len += OPSIZE_RETURN;
1717
1718
847
      r = add_op(reg, OP_JUMP);
1719
847
      if (r != 0) return r;
1720
847
      COP(reg)->jump.addr = len + SIZE_INC;
1721
1722
847
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1723
847
      if (r != 0) return r;
1724
1725
847
      r = add_op(reg, OP_RETURN);
1726
847
      return r;
1727
847
    }
1728
1.36k
    else {
1729
1.36k
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1730
1.36k
      len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
1731
1.36k
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1732
224
        len += (ND_IS_RECURSION(node)
1733
224
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1734
1.14k
      else
1735
1.14k
        len += (ND_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1736
1737
1.36k
      r = add_op(reg, OP_JUMP);
1738
1.36k
      if (r != 0) return r;
1739
1.36k
      COP(reg)->jump.addr = len + SIZE_INC;
1740
1.36k
    }
1741
2.21k
  }
1742
17.0k
#endif
1743
1744
17.0k
  if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1745
13.8k
    r = add_op(reg, OP_MEM_START_PUSH);
1746
3.20k
  else
1747
3.20k
    r = add_op(reg, OP_MEM_START);
1748
17.0k
  if (r != 0) return r;
1749
17.0k
  COP(reg)->memory_start.num = node->m.regnum;
1750
1751
17.0k
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1752
17.0k
  if (r != 0) return r;
1753
1754
17.0k
#ifdef USE_CALL
1755
17.0k
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1756
6.24k
    r = add_op(reg, (ND_IS_RECURSION(node)
1757
6.24k
                     ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
1758
10.8k
  else
1759
10.8k
    r = add_op(reg, (ND_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
1760
17.0k
  if (r != 0) return r;
1761
17.0k
  COP(reg)->memory_end.num = node->m.regnum;
1762
1763
17.0k
  if (ND_IS_CALLED(node)) {
1764
1.36k
    r = add_op(reg, OP_RETURN);
1765
1.36k
  }
1766
#else
1767
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1768
    r = add_op(reg, OP_MEM_END_PUSH);
1769
  else
1770
    r = add_op(reg, OP_MEM_END);
1771
  if (r != 0) return r;
1772
  COP(reg)->memory_end.num = node->m.regnum;
1773
#endif
1774
1775
17.0k
  return r;
1776
17.0k
}
1777
1778
static int
1779
compile_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1780
33.1k
{
1781
33.1k
  int r, len;
1782
1783
33.1k
  switch (node->type) {
1784
17.9k
  case BAG_MEMORY:
1785
17.9k
    r = compile_bag_memory_node(node, reg, env);
1786
17.9k
    break;
1787
1788
271
  case BAG_OPTION:
1789
271
    r = compile_option_node(node, reg, env);
1790
271
    break;
1791
1792
11.7k
  case BAG_STOP_BACKTRACK:
1793
11.7k
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1794
4.09k
      QuantNode* qn = QUANT_(ND_BAG_BODY(node));
1795
4.09k
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1796
4.09k
      if (r != 0) return r;
1797
1798
4.09k
      len = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1799
4.09k
      if (len < 0) return len;
1800
1801
4.09k
      r = add_op(reg, OP_PUSH);
1802
4.09k
      if (r != 0) return r;
1803
4.09k
      COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP + OPSIZE_JUMP;
1804
1805
4.09k
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1806
4.09k
      if (r != 0) return r;
1807
4.09k
      r = add_op(reg, OP_POP);
1808
4.09k
      if (r != 0) return r;
1809
1810
4.09k
      r = add_op(reg, OP_JUMP);
1811
4.09k
      if (r != 0) return r;
1812
4.09k
      COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP);
1813
4.09k
    }
1814
7.62k
    else {
1815
7.62k
      MemNumType mid;
1816
1817
7.62k
      ID_ENTRY(env, mid);
1818
7.62k
      r = add_op(reg, OP_MARK);
1819
7.62k
      if (r != 0) return r;
1820
7.62k
      COP(reg)->mark.id = mid;
1821
7.62k
      COP(reg)->mark.save_pos = 0;
1822
1823
7.62k
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1824
7.62k
      if (r != 0) return r;
1825
7.62k
      r = add_op(reg, OP_CUT_TO_MARK);
1826
7.62k
      if (r != 0) return r;
1827
7.62k
      COP(reg)->cut_to_mark.id = mid;
1828
7.62k
      COP(reg)->cut_to_mark.restore_pos = 0;
1829
7.62k
    }
1830
11.7k
    break;
1831
1832
11.7k
  case BAG_IF_ELSE:
1833
3.19k
    {
1834
3.19k
      int cond_len, then_len, else_len, jump_len;
1835
3.19k
      MemNumType mid;
1836
3.19k
      Node* cond = ND_BAG_BODY(node);
1837
3.19k
      Node* Then = node->te.Then;
1838
3.19k
      Node* Else = node->te.Else;
1839
1840
3.19k
      ID_ENTRY(env, mid);
1841
1842
3.19k
      r = add_op(reg, OP_MARK);
1843
3.19k
      if (r != 0) return r;
1844
3.19k
      COP(reg)->mark.id = mid;
1845
3.19k
      COP(reg)->mark.save_pos = 0;
1846
1847
3.19k
      cond_len = compile_length_tree(cond, reg, env);
1848
3.19k
      if (cond_len < 0) return cond_len;
1849
3.19k
      if (IS_NOT_NULL(Then)) {
1850
1.32k
        then_len = compile_length_tree(Then, reg, env);
1851
1.32k
        if (then_len < 0) return then_len;
1852
1.32k
      }
1853
1.87k
      else
1854
1.87k
        then_len = 0;
1855
1856
3.19k
      jump_len = cond_len + then_len + OPSIZE_CUT_TO_MARK + OPSIZE_JUMP;
1857
1858
3.19k
      r = add_op(reg, OP_PUSH);
1859
3.19k
      if (r != 0) return r;
1860
3.19k
      COP(reg)->push.addr = SIZE_INC + jump_len;
1861
1862
3.19k
      r = compile_tree(cond, reg, env);
1863
3.19k
      if (r != 0) return r;
1864
3.19k
      r = add_op(reg, OP_CUT_TO_MARK);
1865
3.19k
      if (r != 0) return r;
1866
3.19k
      COP(reg)->cut_to_mark.id = mid;
1867
3.19k
      COP(reg)->cut_to_mark.restore_pos = 0;
1868
1869
3.19k
      if (IS_NOT_NULL(Then)) {
1870
1.32k
        r = compile_tree(Then, reg, env);
1871
1.32k
        if (r != 0) return r;
1872
1.32k
      }
1873
1874
3.19k
      if (IS_NOT_NULL(Else)) {
1875
2.12k
        else_len = compile_length_tree(Else, reg, env);
1876
2.12k
        if (else_len < 0) return else_len;
1877
2.12k
      }
1878
1.07k
      else
1879
1.07k
        else_len = 0;
1880
1881
3.19k
      r = add_op(reg, OP_JUMP);
1882
3.19k
      if (r != 0) return r;
1883
3.19k
      COP(reg)->jump.addr = OPSIZE_CUT_TO_MARK + else_len + SIZE_INC;
1884
1885
3.19k
      r = add_op(reg, OP_CUT_TO_MARK);
1886
3.19k
      if (r != 0) return r;
1887
3.19k
      COP(reg)->cut_to_mark.id = mid;
1888
3.19k
      COP(reg)->cut_to_mark.restore_pos = 0;
1889
1890
3.19k
      if (IS_NOT_NULL(Else)) {
1891
2.12k
        r = compile_tree(Else, reg, env);
1892
2.12k
      }
1893
3.19k
    }
1894
0
    break;
1895
1896
0
  default:
1897
0
    return ONIGERR_TYPE_BUG;
1898
0
    break;
1899
33.1k
  }
1900
1901
33.1k
  return r;
1902
33.1k
}
1903
1904
static int
1905
compile_length_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1906
29.1k
{
1907
29.1k
  int len;
1908
29.1k
  int tlen = 0;
1909
1910
29.1k
  if (IS_NOT_NULL(ND_ANCHOR_BODY(node))) {
1911
15.0k
    tlen = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
1912
15.0k
    if (tlen < 0) return tlen;
1913
15.0k
  }
1914
1915
29.1k
  switch (node->type) {
1916
2.96k
  case ANCR_PREC_READ:
1917
2.96k
    len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1918
2.96k
    break;
1919
706
  case ANCR_PREC_READ_NOT:
1920
706
    len = OPSIZE_PUSH + OPSIZE_MARK + tlen + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
1921
706
    break;
1922
9.24k
  case ANCR_LOOK_BEHIND:
1923
9.24k
    if (node->char_min_len == node->char_max_len)
1924
7.80k
      len = OPSIZE_MARK + OPSIZE_STEP_BACK_START + tlen + OPSIZE_CUT_TO_MARK;
1925
1.43k
    else {
1926
1.43k
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_UPDATE_VAR + OPSIZE_FAIL + OPSIZE_JUMP + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_CUT_TO_MARK + OPSIZE_UPDATE_VAR;
1927
1928
1.43k
      if (IS_NOT_NULL(node->lead_node)) {
1929
823
        int llen = compile_length_tree(node->lead_node, reg, env);
1930
823
        if (llen < 0) return llen;
1931
1932
823
        len += OPSIZE_MOVE + llen;
1933
823
      }
1934
1935
1.43k
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1936
398
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1937
1.43k
    }
1938
9.24k
    break;
1939
9.24k
  case ANCR_LOOK_BEHIND_NOT:
1940
2.16k
    if (node->char_min_len == node->char_max_len)
1941
1.56k
      len = OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + tlen + OPSIZE_POP_TO_MARK + OPSIZE_FAIL + OPSIZE_POP;
1942
599
    else {
1943
599
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_POP;
1944
1945
599
      if (IS_NOT_NULL(node->lead_node)) {
1946
433
        int llen = compile_length_tree(node->lead_node, reg, env);
1947
433
        if (llen < 0) return llen;
1948
1949
433
        len += OPSIZE_MOVE + llen;
1950
433
      }
1951
1952
599
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1953
214
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1954
599
    }
1955
2.16k
    break;
1956
1957
2.16k
  case ANCR_WORD_BOUNDARY:
1958
298
  case ANCR_NO_WORD_BOUNDARY:
1959
298
#ifdef USE_WORD_BEGIN_END
1960
369
  case ANCR_WORD_BEGIN:
1961
370
  case ANCR_WORD_END:
1962
370
#endif
1963
370
    len = OPSIZE_WORD_BOUNDARY;
1964
370
    break;
1965
1966
415
  case ANCR_TEXT_SEGMENT_BOUNDARY:
1967
8.39k
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1968
8.39k
    len = SIZE_OPCODE;
1969
8.39k
    break;
1970
1971
5.26k
  default:
1972
5.26k
    len = SIZE_OPCODE;
1973
5.26k
    break;
1974
29.1k
  }
1975
1976
29.1k
  return len;
1977
29.1k
}
1978
1979
static int
1980
compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1981
3.19k
{
1982
3.19k
  int r;
1983
1984
3.19k
  if (node->char_min_len == node->char_max_len) {
1985
2.63k
    MemNumType mid;
1986
1987
2.63k
    ID_ENTRY(env, mid);
1988
2.63k
    r = add_op(reg, OP_MARK);
1989
2.63k
    if (r != 0) return r;
1990
2.63k
    COP(reg)->mark.id = mid;
1991
2.63k
    COP(reg)->mark.save_pos = FALSE;
1992
1993
2.63k
    r = add_op(reg, OP_STEP_BACK_START);
1994
2.63k
    if (r != 0) return r;
1995
2.63k
    COP(reg)->step_back_start.initial   = node->char_min_len;
1996
2.63k
    COP(reg)->step_back_start.remaining = 0;
1997
2.63k
    COP(reg)->step_back_start.addr      = 1;
1998
1999
2.63k
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2000
2.63k
    if (r != 0) return r;
2001
2002
2.63k
    r = add_op(reg, OP_CUT_TO_MARK);
2003
2.63k
    if (r != 0) return r;
2004
2.63k
    COP(reg)->cut_to_mark.id = mid;
2005
2.63k
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2006
2.63k
  }
2007
564
  else {
2008
564
    OnigLen diff;
2009
564
    MemNumType mid1, mid2;
2010
564
    MemNumType mid3 = 0; /* ignore uninitialized warning */
2011
2012
564
    if (IS_NOT_NULL(node->lead_node)) {
2013
270
      MinMaxCharLen ci;
2014
2015
270
      r = node_char_len(node->lead_node, reg, &ci, env);
2016
270
      if (r < 0) return r;
2017
270
      r = add_op(reg, OP_MOVE);
2018
270
      if (r != 0) return r;
2019
270
      COP(reg)->move.n = -((RelPositionType )ci.min);
2020
270
      r = compile_tree(node->lead_node, reg, env);
2021
270
      if (r != 0) return r;
2022
270
    }
2023
2024
564
    ID_ENTRY(env, mid1);
2025
564
    r = add_op(reg, OP_SAVE_VAL);
2026
564
    if (r != 0) return r;
2027
564
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2028
564
    COP(reg)->save_val.id   = mid1;
2029
2030
564
    r = add_op(reg, OP_UPDATE_VAR);
2031
564
    if (r != 0) return r;
2032
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2033
2034
564
    ID_ENTRY(env, mid2);
2035
564
    r = add_op(reg, OP_MARK);
2036
564
    if (r != 0) return r;
2037
564
    COP(reg)->mark.id = mid2;
2038
564
    COP(reg)->mark.save_pos = FALSE;
2039
2040
564
    r = add_op(reg, OP_PUSH);
2041
564
    if (r != 0) return r;
2042
564
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
2043
2044
564
    r = add_op(reg, OP_JUMP);
2045
564
    if (r != 0) return r;
2046
564
    COP(reg)->jump.addr = SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL;
2047
2048
564
    r = add_op(reg, OP_UPDATE_VAR);
2049
564
    if (r != 0) return r;
2050
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2051
564
    COP(reg)->update_var.id    = mid1;
2052
564
    COP(reg)->update_var.clear = FALSE;
2053
564
    r = add_op(reg, OP_FAIL);
2054
564
    if (r != 0) return r;
2055
2056
564
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2057
62
      ID_ENTRY(env, mid3);
2058
62
      r = add_op(reg, OP_SAVE_VAL);
2059
62
      if (r != 0) return r;
2060
62
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2061
62
      COP(reg)->save_val.id   = mid3;
2062
62
    }
2063
2064
564
    r = add_op(reg, OP_STEP_BACK_START);
2065
564
    if (r != 0) return r;
2066
2067
564
    if (node->char_max_len != INFINITE_LEN)
2068
190
      diff = node->char_max_len - node->char_min_len;
2069
374
    else
2070
374
      diff = INFINITE_LEN;
2071
2072
564
    COP(reg)->step_back_start.initial   = node->char_min_len;
2073
564
    COP(reg)->step_back_start.remaining = diff;
2074
564
    COP(reg)->step_back_start.addr      = 2;
2075
2076
564
    r = add_op(reg, OP_STEP_BACK_NEXT);
2077
564
    if (r != 0) return r;
2078
2079
564
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2080
564
    if (r != 0) return r;
2081
2082
564
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2083
62
      r = add_op(reg, OP_UPDATE_VAR);
2084
62
      if (r != 0) return r;
2085
62
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2086
62
      COP(reg)->update_var.id    = mid3;
2087
62
      COP(reg)->update_var.clear = FALSE;
2088
62
    }
2089
2090
564
    r = add_op(reg, OP_CHECK_POSITION);
2091
564
    if (r != 0) return r;
2092
564
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2093
2094
564
    r = add_op(reg, OP_CUT_TO_MARK);
2095
564
    if (r != 0) return r;
2096
564
    COP(reg)->cut_to_mark.id = mid2;
2097
564
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2098
2099
564
    r = add_op(reg, OP_UPDATE_VAR);
2100
564
    if (r != 0) return r;
2101
564
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2102
564
    COP(reg)->update_var.id    = mid1;
2103
564
    COP(reg)->update_var.clear = TRUE;
2104
564
  }
2105
2106
3.19k
  return r;
2107
3.19k
}
2108
2109
static int
2110
compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg,
2111
                                    ParseEnv* env)
2112
2.68k
{
2113
2.68k
  int r;
2114
2.68k
  int len;
2115
2116
2.68k
  len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2117
2118
2.68k
  if (node->char_min_len == node->char_max_len) {
2119
2.38k
    MemNumType mid;
2120
2121
2.38k
    ID_ENTRY(env, mid);
2122
2.38k
    r = add_op(reg, OP_MARK);
2123
2.38k
    if (r != 0) return r;
2124
2.38k
    COP(reg)->mark.id = mid;
2125
2.38k
    COP(reg)->mark.save_pos = FALSE;
2126
2127
2.38k
    r = add_op(reg, OP_PUSH);
2128
2.38k
    if (r != 0) return r;
2129
2.38k
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL;
2130
2131
2.38k
    r = add_op(reg, OP_STEP_BACK_START);
2132
2.38k
    if (r != 0) return r;
2133
2.38k
    COP(reg)->step_back_start.initial   = node->char_min_len;
2134
2.38k
    COP(reg)->step_back_start.remaining = 0;
2135
2.38k
    COP(reg)->step_back_start.addr      = 1;
2136
2137
2.38k
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2138
2.38k
    if (r != 0) return r;
2139
2140
2.38k
    r = add_op(reg, OP_POP_TO_MARK);
2141
2.38k
    if (r != 0) return r;
2142
2.38k
    COP(reg)->pop_to_mark.id = mid;
2143
2.38k
    r = add_op(reg, OP_FAIL);
2144
2.38k
    if (r != 0) return r;
2145
2.38k
    r = add_op(reg, OP_POP);
2146
2.38k
  }
2147
301
  else {
2148
301
    OnigLen diff;
2149
301
    MemNumType mid1, mid2;
2150
301
    MemNumType mid3 = 0; /* ignore uninitialized warning */
2151
2152
301
    ID_ENTRY(env, mid1);
2153
301
    r = add_op(reg, OP_SAVE_VAL);
2154
301
    if (r != 0) return r;
2155
301
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2156
301
    COP(reg)->save_val.id   = mid1;
2157
2158
301
    r = add_op(reg, OP_UPDATE_VAR);
2159
301
    if (r != 0) return r;
2160
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2161
2162
301
    ID_ENTRY(env, mid2);
2163
301
    r = add_op(reg, OP_MARK);
2164
301
    if (r != 0) return r;
2165
301
    COP(reg)->mark.id = mid2;
2166
301
    COP(reg)->mark.save_pos = FALSE;
2167
2168
301
    r = add_op(reg, OP_PUSH);
2169
301
    if (r != 0) return r;
2170
2171
301
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + len + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL;
2172
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
2173
58
      COP(reg)->push.addr += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
2174
2175
301
    if (IS_NOT_NULL(node->lead_node)) {
2176
182
      int clen;
2177
182
      MinMaxCharLen ci;
2178
2179
182
      clen = compile_length_tree(node->lead_node, reg, env);
2180
182
      COP(reg)->push.addr += OPSIZE_MOVE + clen;
2181
2182
182
      r = node_char_len(node->lead_node, reg, &ci, env);
2183
182
      if (r < 0) return r;
2184
182
      r = add_op(reg, OP_MOVE);
2185
182
      if (r != 0) return r;
2186
182
      COP(reg)->move.n = -((RelPositionType )ci.min);
2187
2188
182
      r = compile_tree(node->lead_node, reg, env);
2189
182
      if (r != 0) return r;
2190
182
    }
2191
2192
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2193
58
      ID_ENTRY(env, mid3);
2194
58
      r = add_op(reg, OP_SAVE_VAL);
2195
58
      if (r != 0) return r;
2196
58
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2197
58
      COP(reg)->save_val.id   = mid3;
2198
58
    }
2199
2200
301
    r = add_op(reg, OP_STEP_BACK_START);
2201
301
    if (r != 0) return r;
2202
2203
301
    if (node->char_max_len != INFINITE_LEN)
2204
121
      diff = node->char_max_len - node->char_min_len;
2205
180
    else
2206
180
      diff = INFINITE_LEN;
2207
2208
301
    COP(reg)->step_back_start.initial   = node->char_min_len;
2209
301
    COP(reg)->step_back_start.remaining = diff;
2210
301
    COP(reg)->step_back_start.addr      = 2;
2211
2212
301
    r = add_op(reg, OP_STEP_BACK_NEXT);
2213
301
    if (r != 0) return r;
2214
2215
301
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2216
301
    if (r != 0) return r;
2217
2218
301
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2219
58
      r = add_op(reg, OP_UPDATE_VAR);
2220
58
      if (r != 0) return r;
2221
58
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2222
58
      COP(reg)->update_var.id    = mid3;
2223
58
      COP(reg)->update_var.clear = FALSE;
2224
58
    }
2225
2226
301
    r = add_op(reg, OP_CHECK_POSITION);
2227
301
    if (r != 0) return r;
2228
301
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2229
2230
301
    r = add_op(reg, OP_POP_TO_MARK);
2231
301
    if (r != 0) return r;
2232
301
    COP(reg)->pop_to_mark.id = mid2;
2233
2234
301
    r = add_op(reg, OP_UPDATE_VAR);
2235
301
    if (r != 0) return r;
2236
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2237
301
    COP(reg)->update_var.id   = mid1;
2238
301
    COP(reg)->update_var.clear = FALSE;
2239
2240
301
    r = add_op(reg, OP_POP); /* pop save val */
2241
301
    if (r != 0) return r;
2242
301
    r = add_op(reg, OP_FAIL);
2243
301
    if (r != 0) return r;
2244
2245
301
    r = add_op(reg, OP_UPDATE_VAR);
2246
301
    if (r != 0) return r;
2247
301
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2248
301
    COP(reg)->update_var.id   = mid1;
2249
301
    COP(reg)->update_var.clear = FALSE;
2250
2251
301
    r = add_op(reg, OP_POP); /* pop mark */
2252
301
    if (r != 0) return r;
2253
301
    r = add_op(reg, OP_POP); /* pop save val */
2254
301
  }
2255
2256
2.68k
  return r;
2257
2.68k
}
2258
2259
static int
2260
compile_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
2261
19.1k
{
2262
19.1k
  int r, len;
2263
19.1k
  enum OpCode op;
2264
19.1k
  MemNumType mid;
2265
2266
19.1k
  switch (node->type) {
2267
1.30k
  case ANCR_BEGIN_BUF:      r = add_op(reg, OP_BEGIN_BUF);      break;
2268
394
  case ANCR_END_BUF:        r = add_op(reg, OP_END_BUF);        break;
2269
1.27k
  case ANCR_BEGIN_LINE:     r = add_op(reg, OP_BEGIN_LINE);     break;
2270
1.07k
  case ANCR_END_LINE:       r = add_op(reg, OP_END_LINE);       break;
2271
1.56k
  case ANCR_SEMI_END_BUF:   r = add_op(reg, OP_SEMI_END_BUF);   break;
2272
258
  case ANCR_BEGIN_POSITION:
2273
258
    r = add_op(reg, OP_CHECK_POSITION);
2274
258
    if (r != 0) return r;
2275
258
    COP(reg)->check_position.type = CHECK_POSITION_SEARCH_START;
2276
258
    break;
2277
2278
511
  case ANCR_WORD_BOUNDARY:
2279
511
    op = OP_WORD_BOUNDARY;
2280
1.33k
  word:
2281
1.33k
    r = add_op(reg, op);
2282
1.33k
    if (r != 0) return r;
2283
1.33k
    COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
2284
1.33k
    break;
2285
2286
378
  case ANCR_NO_WORD_BOUNDARY:
2287
378
    op = OP_NO_WORD_BOUNDARY; goto word;
2288
0
    break;
2289
0
#ifdef USE_WORD_BEGIN_END
2290
192
  case ANCR_WORD_BEGIN:
2291
192
    op = OP_WORD_BEGIN; goto word;
2292
0
    break;
2293
258
  case ANCR_WORD_END:
2294
258
    op = OP_WORD_END; goto word;
2295
0
    break;
2296
0
#endif
2297
2298
532
  case ANCR_TEXT_SEGMENT_BOUNDARY:
2299
4.64k
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
2300
4.64k
    {
2301
4.64k
      enum TextSegmentBoundaryType type;
2302
2303
4.64k
      r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
2304
4.64k
      if (r != 0) return r;
2305
2306
4.64k
      type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
2307
4.64k
#ifdef USE_UNICODE_WORD_BREAK
2308
4.64k
      if (ND_IS_TEXT_SEGMENT_WORD(node))
2309
1.96k
        type = WORD_BOUNDARY;
2310
4.64k
#endif
2311
2312
4.64k
      COP(reg)->text_segment_boundary.type = type;
2313
4.64k
      COP(reg)->text_segment_boundary.not =
2314
4.64k
        (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
2315
4.64k
    }
2316
0
    break;
2317
2318
1.09k
  case ANCR_PREC_READ:
2319
1.09k
    {
2320
1.09k
      ID_ENTRY(env, mid);
2321
1.09k
      r = add_op(reg, OP_MARK);
2322
1.09k
      if (r != 0) return r;
2323
1.09k
      COP(reg)->mark.id = mid;
2324
1.09k
      COP(reg)->mark.save_pos = TRUE;
2325
2326
1.09k
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2327
1.09k
      if (r != 0) return r;
2328
2329
1.09k
      r = add_op(reg, OP_CUT_TO_MARK);
2330
1.09k
      if (r != 0) return r;
2331
1.09k
      COP(reg)->cut_to_mark.id = mid;
2332
1.09k
      COP(reg)->cut_to_mark.restore_pos = TRUE;
2333
1.09k
    }
2334
0
    break;
2335
2336
309
  case ANCR_PREC_READ_NOT:
2337
309
    {
2338
309
      len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2339
309
      if (len < 0) return len;
2340
2341
309
      ID_ENTRY(env, mid);
2342
309
      r = add_op(reg, OP_PUSH);
2343
309
      if (r != 0) return r;
2344
309
      COP(reg)->push.addr = SIZE_INC + OPSIZE_MARK + len +
2345
309
                            OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
2346
2347
309
      r = add_op(reg, OP_MARK);
2348
309
      if (r != 0) return r;
2349
309
      COP(reg)->mark.id = mid;
2350
309
      COP(reg)->mark.save_pos = FALSE;
2351
2352
309
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2353
309
      if (r != 0) return r;
2354
2355
309
      r = add_op(reg, OP_POP_TO_MARK);
2356
309
      if (r != 0) return r;
2357
309
      COP(reg)->pop_to_mark.id = mid;
2358
2359
309
      r = add_op(reg, OP_POP);
2360
309
      if (r != 0) return r;
2361
309
      r = add_op(reg, OP_FAIL);
2362
309
    }
2363
0
    break;
2364
2365
3.19k
  case ANCR_LOOK_BEHIND:
2366
3.19k
    r = compile_anchor_look_behind_node(node, reg, env);
2367
3.19k
    break;
2368
2369
2.68k
  case ANCR_LOOK_BEHIND_NOT:
2370
2.68k
    r = compile_anchor_look_behind_not_node(node, reg, env);
2371
2.68k
    break;
2372
2373
0
  default:
2374
0
    return ONIGERR_TYPE_BUG;
2375
0
    break;
2376
19.1k
  }
2377
2378
19.1k
  return r;
2379
19.1k
}
2380
2381
static int
2382
compile_gimmick_node(GimmickNode* node, regex_t* reg)
2383
11.9k
{
2384
11.9k
  int r = 0;
2385
2386
11.9k
  switch (node->type) {
2387
2.68k
  case GIMMICK_FAIL:
2388
2.68k
    r = add_op(reg, OP_FAIL);
2389
2.68k
    break;
2390
2391
4.17k
  case GIMMICK_SAVE:
2392
4.17k
    r = add_op(reg, OP_SAVE_VAL);
2393
4.17k
    if (r != 0) return r;
2394
4.17k
    COP(reg)->save_val.type = node->detail_type;
2395
4.17k
    COP(reg)->save_val.id   = node->id;
2396
4.17k
    break;
2397
2398
3.61k
  case GIMMICK_UPDATE_VAR:
2399
3.61k
    r = add_op(reg, OP_UPDATE_VAR);
2400
3.61k
    if (r != 0) return r;
2401
3.61k
    COP(reg)->update_var.type = node->detail_type;
2402
3.61k
    COP(reg)->update_var.id   = node->id;
2403
3.61k
    COP(reg)->update_var.clear = FALSE;
2404
3.61k
    break;
2405
2406
0
#ifdef USE_CALLOUT
2407
1.47k
  case GIMMICK_CALLOUT:
2408
1.47k
    switch (node->detail_type) {
2409
273
    case ONIG_CALLOUT_OF_CONTENTS:
2410
1.47k
    case ONIG_CALLOUT_OF_NAME:
2411
1.47k
      {
2412
1.47k
        if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
2413
1.20k
          r = add_op(reg, OP_CALLOUT_NAME);
2414
1.20k
          if (r != 0) return r;
2415
1.20k
          COP(reg)->callout_name.id  = node->id;
2416
1.20k
          COP(reg)->callout_name.num = node->num;
2417
1.20k
        }
2418
273
        else {
2419
273
          r = add_op(reg, OP_CALLOUT_CONTENTS);
2420
273
          if (r != 0) return r;
2421
273
          COP(reg)->callout_contents.num = node->num;
2422
273
        }
2423
1.47k
      }
2424
1.47k
      break;
2425
2426
1.47k
    default:
2427
0
      r = ONIGERR_TYPE_BUG;
2428
0
      break;
2429
1.47k
    }
2430
11.9k
#endif
2431
11.9k
  }
2432
2433
11.9k
  return r;
2434
11.9k
}
2435
2436
static int
2437
compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
2438
53.3k
{
2439
53.3k
  int len;
2440
2441
53.3k
  switch (node->type) {
2442
13.6k
  case GIMMICK_FAIL:
2443
13.6k
    len = OPSIZE_FAIL;
2444
13.6k
    break;
2445
2446
20.9k
  case GIMMICK_SAVE:
2447
20.9k
    len = OPSIZE_SAVE_VAL;
2448
20.9k
    break;
2449
2450
17.9k
  case GIMMICK_UPDATE_VAR:
2451
17.9k
    len = OPSIZE_UPDATE_VAR;
2452
17.9k
    break;
2453
2454
0
#ifdef USE_CALLOUT
2455
776
  case GIMMICK_CALLOUT:
2456
776
    switch (node->detail_type) {
2457
519
    case ONIG_CALLOUT_OF_CONTENTS:
2458
519
      len = OPSIZE_CALLOUT_CONTENTS;
2459
519
      break;
2460
257
    case ONIG_CALLOUT_OF_NAME:
2461
257
      len = OPSIZE_CALLOUT_NAME;
2462
257
      break;
2463
2464
0
    default:
2465
0
      len = ONIGERR_TYPE_BUG;
2466
0
      break;
2467
776
    }
2468
776
    break;
2469
776
#endif
2470
2471
776
  default:
2472
0
    return ONIGERR_TYPE_BUG;
2473
0
    break;
2474
53.3k
  }
2475
2476
53.3k
  return len;
2477
53.3k
}
2478
2479
static int
2480
compile_length_tree(Node* node, regex_t* reg, ParseEnv* env)
2481
1.23M
{
2482
1.23M
  int len, r;
2483
2484
1.23M
  switch (ND_TYPE(node)) {
2485
274k
  case ND_LIST:
2486
274k
    len = 0;
2487
684k
    do {
2488
684k
      r = compile_length_tree(ND_CAR(node), reg, env);
2489
684k
      if (r < 0) return r;
2490
684k
      len += r;
2491
684k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
2492
274k
    r = len;
2493
274k
    break;
2494
2495
19.9k
  case ND_ALT:
2496
19.9k
    {
2497
19.9k
      int n;
2498
2499
19.9k
      n = r = 0;
2500
133k
      do {
2501
133k
        r += compile_length_tree(ND_CAR(node), reg, env);
2502
133k
        n++;
2503
133k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2504
19.9k
      r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
2505
19.9k
    }
2506
19.9k
    break;
2507
2508
264k
  case ND_STRING:
2509
264k
    if (ND_STRING_IS_CRUDE(node))
2510
7.76k
      r = compile_length_string_crude_node(STR_(node), reg);
2511
256k
    else
2512
256k
      r = compile_length_string_node(node, reg);
2513
264k
    break;
2514
2515
411k
  case ND_CCLASS:
2516
411k
    r = compile_length_cclass_node(CCLASS_(node), reg);
2517
411k
    break;
2518
2519
49.2k
  case ND_CTYPE:
2520
49.2k
    r = SIZE_OPCODE;
2521
49.2k
    break;
2522
2523
8.02k
  case ND_BACKREF:
2524
8.02k
    r = OPSIZE_BACKREF;
2525
8.02k
    break;
2526
2527
0
#ifdef USE_CALL
2528
16.3k
  case ND_CALL:
2529
16.3k
    r = OPSIZE_CALL;
2530
16.3k
    break;
2531
0
#endif
2532
2533
58.7k
  case ND_QUANT:
2534
58.7k
    r = compile_length_quantifier_node(QUANT_(node), reg, env);
2535
58.7k
    break;
2536
2537
51.4k
  case ND_BAG:
2538
51.4k
    r = compile_length_bag_node(BAG_(node), reg, env);
2539
51.4k
    break;
2540
2541
29.1k
  case ND_ANCHOR:
2542
29.1k
    r = compile_length_anchor_node(ANCHOR_(node), reg, env);
2543
29.1k
    break;
2544
2545
53.3k
  case ND_GIMMICK:
2546
53.3k
    r = compile_length_gimmick_node(GIMMICK_(node), reg);
2547
53.3k
    break;
2548
2549
0
  default:
2550
0
    return ONIGERR_TYPE_BUG;
2551
0
    break;
2552
1.23M
  }
2553
2554
1.23M
  return r;
2555
1.23M
}
2556
2557
static int
2558
compile_tree(Node* node, regex_t* reg, ParseEnv* env)
2559
517k
{
2560
517k
  int n, len, pos, r = 0;
2561
2562
517k
  switch (ND_TYPE(node)) {
2563
100k
  case ND_LIST:
2564
290k
    do {
2565
290k
      r = compile_tree(ND_CAR(node), reg, env);
2566
290k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2567
100k
    break;
2568
2569
11.7k
  case ND_ALT:
2570
11.7k
    {
2571
11.7k
      Node* x = node;
2572
11.7k
      len = 0;
2573
110k
      do {
2574
110k
        len += compile_length_tree(ND_CAR(x), reg, env);
2575
110k
        if (IS_NOT_NULL(ND_CDR(x))) {
2576
98.8k
          len += OPSIZE_PUSH + OPSIZE_JUMP;
2577
98.8k
        }
2578
110k
      } while (IS_NOT_NULL(x = ND_CDR(x)));
2579
11.7k
      pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */
2580
2581
110k
      do {
2582
110k
        len = compile_length_tree(ND_CAR(node), reg, env);
2583
110k
        if (IS_NOT_NULL(ND_CDR(node))) {
2584
98.8k
          enum OpCode push = ND_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
2585
98.8k
          r = add_op(reg, push);
2586
98.8k
          if (r != 0) break;
2587
98.8k
          COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
2588
98.8k
        }
2589
110k
        r = compile_tree(ND_CAR(node), reg, env);
2590
110k
        if (r != 0) break;
2591
110k
        if (IS_NOT_NULL(ND_CDR(node))) {
2592
98.8k
          len = pos - (COP_CURR_OFFSET(reg) + 1);
2593
98.8k
          r = add_op(reg, OP_JUMP);
2594
98.8k
          if (r != 0) break;
2595
98.8k
          COP(reg)->jump.addr = len;
2596
98.8k
        }
2597
110k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2598
11.7k
    }
2599
0
    break;
2600
2601
114k
  case ND_STRING:
2602
114k
    if (ND_STRING_IS_CRUDE(node))
2603
1.78k
      r = compile_string_crude_node(STR_(node), reg);
2604
113k
    else
2605
113k
      r = compile_string_node(node, reg);
2606
114k
    break;
2607
2608
154k
  case ND_CCLASS:
2609
154k
    r = compile_cclass_node(CCLASS_(node), reg);
2610
154k
    break;
2611
2612
20.9k
  case ND_CTYPE:
2613
20.9k
    {
2614
20.9k
      int op;
2615
2616
20.9k
      switch (CTYPE_(node)->ctype) {
2617
15.6k
      case CTYPE_ANYCHAR:
2618
15.6k
        r = add_op(reg, ND_IS_MULTILINE(node) ? OP_ANYCHAR_ML : OP_ANYCHAR);
2619
15.6k
        break;
2620
2621
5.34k
      case ONIGENC_CTYPE_WORD:
2622
5.34k
        if (CTYPE_(node)->ascii_mode == 0) {
2623
3.06k
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
2624
3.06k
        }
2625
2.27k
        else {
2626
2.27k
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
2627
2.27k
        }
2628
5.34k
        r = add_op(reg, op);
2629
5.34k
        break;
2630
2631
0
      default:
2632
0
        return ONIGERR_TYPE_BUG;
2633
0
        break;
2634
20.9k
      }
2635
20.9k
    }
2636
20.9k
    break;
2637
2638
20.9k
  case ND_BACKREF:
2639
7.01k
    {
2640
7.01k
      BackRefNode* br = BACKREF_(node);
2641
2642
7.01k
      if (ND_IS_CHECKER(node)) {
2643
426
#ifdef USE_BACKREF_WITH_LEVEL
2644
426
        if (ND_IS_NEST_LEVEL(node)) {
2645
222
          r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
2646
222
          if (r != 0) return r;
2647
222
          COP(reg)->backref_general.nest_level = br->nest_level;
2648
222
        }
2649
204
        else
2650
204
#endif
2651
204
          {
2652
204
            r = add_op(reg, OP_BACKREF_CHECK);
2653
204
            if (r != 0) return r;
2654
204
          }
2655
426
        goto add_bacref_mems;
2656
426
      }
2657
6.58k
      else {
2658
6.58k
#ifdef USE_BACKREF_WITH_LEVEL
2659
6.58k
        if (ND_IS_NEST_LEVEL(node)) {
2660
644
          if (ND_IS_IGNORECASE(node))
2661
344
            r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
2662
300
          else
2663
300
            r = add_op(reg, OP_BACKREF_WITH_LEVEL);
2664
2665
644
          if (r != 0) return r;
2666
644
          COP(reg)->backref_general.nest_level = br->nest_level;
2667
644
          goto add_bacref_mems;
2668
644
        }
2669
5.94k
        else
2670
5.94k
#endif
2671
5.94k
        if (br->back_num == 1) {
2672
4.87k
          n = br->back_static[0];
2673
4.87k
          if (ND_IS_IGNORECASE(node)) {
2674
2.63k
            r = add_op(reg, OP_BACKREF_N_IC);
2675
2.63k
            if (r != 0) return r;
2676
2.63k
            COP(reg)->backref_n.n1 = n;
2677
2.63k
          }
2678
2.24k
          else {
2679
2.24k
            switch (n) {
2680
1.11k
            case 1:  r = add_op(reg, OP_BACKREF1); break;
2681
632
            case 2:  r = add_op(reg, OP_BACKREF2); break;
2682
503
            default:
2683
503
              r = add_op(reg, OP_BACKREF_N);
2684
503
              if (r != 0) return r;
2685
503
              COP(reg)->backref_n.n1 = n;
2686
503
              break;
2687
2.24k
            }
2688
2.24k
          }
2689
4.87k
        }
2690
1.06k
        else {
2691
1.06k
          int num;
2692
1.06k
          int* p;
2693
2694
1.06k
          r = add_op(reg, ND_IS_IGNORECASE(node) ?
2695
657
                     OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
2696
1.06k
          if (r != 0) return r;
2697
2698
2.13k
        add_bacref_mems:
2699
2.13k
          num = br->back_num;
2700
2.13k
          COP(reg)->backref_general.num = num;
2701
2.13k
          if (num == 1) {
2702
1.01k
            COP(reg)->backref_general.n1 = br->back_static[0];
2703
1.01k
          }
2704
1.11k
          else {
2705
1.11k
            int i, j;
2706
1.11k
            MemNumType* ns;
2707
2708
1.11k
            ns = xmalloc(sizeof(MemNumType) * num);
2709
1.11k
            CHECK_NULL_RETURN_MEMERR(ns);
2710
1.11k
            COP(reg)->backref_general.ns = ns;
2711
1.11k
            p = BACKREFS_P(br);
2712
5.86k
            for (i = num - 1, j = 0; i >= 0; i--, j++) {
2713
4.74k
              ns[j] = p[i];
2714
4.74k
            }
2715
1.11k
          }
2716
2.13k
        }
2717
6.58k
      }
2718
7.01k
    }
2719
7.01k
    break;
2720
2721
7.01k
#ifdef USE_CALL
2722
7.01k
  case ND_CALL:
2723
3.96k
    r = compile_call(CALL_(node), reg, env);
2724
3.96k
    break;
2725
0
#endif
2726
2727
39.5k
  case ND_QUANT:
2728
39.5k
    r = compile_quantifier_node(QUANT_(node), reg, env);
2729
39.5k
    break;
2730
2731
33.1k
  case ND_BAG:
2732
33.1k
    r = compile_bag_node(BAG_(node), reg, env);
2733
33.1k
    break;
2734
2735
19.1k
  case ND_ANCHOR:
2736
19.1k
    r = compile_anchor_node(ANCHOR_(node), reg, env);
2737
19.1k
    break;
2738
2739
11.9k
  case ND_GIMMICK:
2740
11.9k
    r = compile_gimmick_node(GIMMICK_(node), reg);
2741
11.9k
    break;
2742
2743
0
  default:
2744
#ifdef ONIG_DEBUG
2745
    fprintf(DBGFP, "compile_tree: undefined node type %d\n", ND_TYPE(node));
2746
#endif
2747
0
    break;
2748
517k
  }
2749
2750
517k
  return r;
2751
517k
}
2752
2753
static int
2754
make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
2755
17.1k
{
2756
17.1k
  int r;
2757
17.1k
  Node* node = *plink;
2758
2759
17.1k
  switch (ND_TYPE(node)) {
2760
2.37k
  case ND_LIST:
2761
2.77k
  case ND_ALT:
2762
10.8k
    do {
2763
10.8k
      r = make_named_capture_number_map(&(ND_CAR(node)), map, counter);
2764
10.8k
    } while (r >= 0 && IS_NOT_NULL(node = ND_CDR(node)));
2765
2.77k
    if (r < 0) return r;
2766
2.71k
    break;
2767
2768
2.71k
  case ND_QUANT:
2769
2.17k
    {
2770
2.17k
      Node** ptarget = &(ND_BODY(node));
2771
2.17k
      r = make_named_capture_number_map(ptarget, map, counter);
2772
2.17k
      if (r < 0) return r;
2773
2.15k
      if (r == 1 && ND_TYPE(*ptarget) == ND_QUANT) {
2774
92
        return onig_reduce_nested_quantifier(node);
2775
92
      }
2776
2.15k
    }
2777
2.06k
    break;
2778
2779
3.20k
  case ND_BAG:
2780
3.20k
    {
2781
3.20k
      BagNode* en = BAG_(node);
2782
3.20k
      if (en->type == BAG_MEMORY) {
2783
2.20k
        if (ND_IS_NAMED_GROUP(node)) {
2784
1.16k
          (*counter)++;
2785
1.16k
          map[en->m.regnum].new_val = *counter;
2786
1.16k
          en->m.regnum = *counter;
2787
1.16k
          r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2788
1.16k
          if (r < 0) return r;
2789
1.16k
        }
2790
1.04k
        else {
2791
1.04k
          *plink = ND_BODY(node);
2792
1.04k
          ND_BODY(node) = NULL_NODE;
2793
1.04k
          onig_node_free(node);
2794
1.04k
          r = make_named_capture_number_map(plink, map, counter);
2795
1.04k
          if (r < 0) return r;
2796
1.03k
          return 1;
2797
1.04k
        }
2798
2.20k
      }
2799
1.00k
      else if (en->type == BAG_IF_ELSE) {
2800
278
        r = make_named_capture_number_map(&(ND_BAG_BODY(en)), map, counter);
2801
278
        if (r < 0) return r;
2802
276
        if (IS_NOT_NULL(en->te.Then)) {
2803
82
          r = make_named_capture_number_map(&(en->te.Then), map, counter);
2804
82
          if (r < 0) return r;
2805
82
        }
2806
274
        if (IS_NOT_NULL(en->te.Else)) {
2807
204
          r = make_named_capture_number_map(&(en->te.Else), map, counter);
2808
204
          if (r < 0) return r;
2809
204
        }
2810
274
      }
2811
726
      else {
2812
726
        r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2813
726
        if (r < 0) return r;
2814
726
      }
2815
3.20k
    }
2816
2.14k
    break;
2817
2818
2.14k
  case ND_ANCHOR:
2819
890
    if (IS_NOT_NULL(ND_BODY(node))) {
2820
248
      r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2821
248
      if (r < 0) return r;
2822
248
    }
2823
888
    break;
2824
2825
8.10k
  default:
2826
8.10k
    break;
2827
17.1k
  }
2828
2829
15.9k
  return 0;
2830
17.1k
}
2831
2832
static int
2833
renumber_backref_node(Node* node, GroupNumMap* map)
2834
568
{
2835
568
  int i, pos, n, old_num;
2836
568
  int *backs;
2837
568
  BackRefNode* bn = BACKREF_(node);
2838
2839
568
  if (! ND_IS_BY_NAME(node))
2840
26
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2841
2842
542
  old_num = bn->back_num;
2843
542
  if (IS_NULL(bn->back_dynamic))
2844
414
    backs = bn->back_static;
2845
128
  else
2846
128
    backs = bn->back_dynamic;
2847
2848
2.42k
  for (i = 0, pos = 0; i < old_num; i++) {
2849
1.88k
    n = map[backs[i]].new_val;
2850
1.88k
    if (n > 0) {
2851
1.88k
      backs[pos] = n;
2852
1.88k
      pos++;
2853
1.88k
    }
2854
1.88k
  }
2855
2856
542
  bn->back_num = pos;
2857
542
  return 0;
2858
568
}
2859
2860
static int
2861
renumber_backref_traverse(Node* node, GroupNumMap* map)
2862
15.5k
{
2863
15.5k
  int r = 0;
2864
2865
15.5k
  switch (ND_TYPE(node)) {
2866
2.31k
  case ND_LIST:
2867
2.68k
  case ND_ALT:
2868
10.5k
    do {
2869
10.5k
      r = renumber_backref_traverse(ND_CAR(node), map);
2870
10.5k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2871
2.68k
    break;
2872
2873
2.00k
  case ND_QUANT:
2874
2.00k
    r = renumber_backref_traverse(ND_BODY(node), map);
2875
2.00k
    break;
2876
2877
2.11k
  case ND_BAG:
2878
2.11k
    {
2879
2.11k
      BagNode* en = BAG_(node);
2880
2881
2.11k
      r = renumber_backref_traverse(ND_BODY(node), map);
2882
2.11k
      if (r != 0) return r;
2883
2884
2.08k
      if (en->type == BAG_IF_ELSE) {
2885
272
        if (IS_NOT_NULL(en->te.Then)) {
2886
78
          r = renumber_backref_traverse(en->te.Then, map);
2887
78
          if (r != 0) return r;
2888
78
        }
2889
264
        if (IS_NOT_NULL(en->te.Else)) {
2890
202
          r = renumber_backref_traverse(en->te.Else, map);
2891
202
          if (r != 0) return r;
2892
202
        }
2893
264
      }
2894
2.08k
    }
2895
2.06k
    break;
2896
2897
2.06k
  case ND_BACKREF:
2898
568
    r = renumber_backref_node(node, map);
2899
568
    break;
2900
2901
880
  case ND_ANCHOR:
2902
880
    if (IS_NOT_NULL(ND_BODY(node)))
2903
246
      r = renumber_backref_traverse(ND_BODY(node), map);
2904
880
    break;
2905
2906
7.27k
  default:
2907
7.27k
    break;
2908
15.5k
  }
2909
2910
15.4k
  return r;
2911
15.5k
}
2912
2913
static int
2914
numbered_ref_check(Node* node)
2915
14.7k
{
2916
14.7k
  int r = 0;
2917
2918
14.7k
  switch (ND_TYPE(node)) {
2919
2.56k
  case ND_LIST:
2920
3.01k
  case ND_ALT:
2921
9.90k
    do {
2922
9.90k
      r = numbered_ref_check(ND_CAR(node));
2923
9.90k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2924
3.01k
    break;
2925
2926
1.01k
  case ND_ANCHOR:
2927
1.01k
    if (IS_NULL(ND_BODY(node)))
2928
760
      break;
2929
    /* fall */
2930
1.93k
  case ND_QUANT:
2931
1.93k
    r = numbered_ref_check(ND_BODY(node));
2932
1.93k
    break;
2933
2934
2.07k
  case ND_BAG:
2935
2.07k
    {
2936
2.07k
      BagNode* en = BAG_(node);
2937
2938
2.07k
      r = numbered_ref_check(ND_BODY(node));
2939
2.07k
      if (r != 0) return r;
2940
2941
2.03k
      if (en->type == BAG_IF_ELSE) {
2942
272
        if (IS_NOT_NULL(en->te.Then)) {
2943
76
          r = numbered_ref_check(en->te.Then);
2944
76
          if (r != 0) return r;
2945
76
        }
2946
264
        if (IS_NOT_NULL(en->te.Else)) {
2947
202
          r = numbered_ref_check(en->te.Else);
2948
202
          if (r != 0) return r;
2949
202
        }
2950
264
      }
2951
2.03k
    }
2952
2953
2.02k
    break;
2954
2955
2.02k
  case ND_BACKREF:
2956
752
    if (! ND_IS_BY_NAME(node))
2957
24
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2958
728
    break;
2959
2960
6.23k
  default:
2961
6.23k
    break;
2962
14.7k
  }
2963
2964
14.6k
  return r;
2965
14.7k
}
2966
2967
static int
2968
disable_noname_group_capture(Node** root, regex_t* reg, ParseEnv* env)
2969
372
{
2970
372
  int r, i, pos, counter;
2971
372
  MemStatusType loc;
2972
372
  GroupNumMap* map;
2973
2974
372
  map = (GroupNumMap* )xalloca(sizeof(GroupNumMap) * (env->num_mem + 1));
2975
372
  CHECK_NULL_RETURN_MEMERR(map);
2976
2.57k
  for (i = 1; i <= env->num_mem; i++) {
2977
2.19k
    map[i].new_val = 0;
2978
2.19k
  }
2979
372
  counter = 0;
2980
372
  r = make_named_capture_number_map(root, map, &counter);
2981
372
  if (r < 0) return r;
2982
2983
346
  r = renumber_backref_traverse(*root, map);
2984
346
  if (r != 0) return r;
2985
2986
2.33k
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2987
2.01k
    if (map[i].new_val > 0) {
2988
1.04k
      PARSEENV_MEMENV(env)[pos] = PARSEENV_MEMENV(env)[i];
2989
1.04k
      pos++;
2990
1.04k
    }
2991
2.01k
  }
2992
2993
320
  loc = env->cap_history;
2994
320
  MEM_STATUS_CLEAR(env->cap_history);
2995
10.2k
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2996
9.92k
    if (MEM_STATUS_AT(loc, i)) {
2997
0
      MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
2998
0
    }
2999
9.92k
  }
3000
3001
320
  env->num_mem = env->num_named;
3002
320
  reg->num_mem = env->num_named;
3003
3004
320
  return onig_renumber_name_table(reg, map);
3005
346
}
3006
3007
#ifdef USE_CALL
3008
static int
3009
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
3010
1.39k
{
3011
1.39k
  int i, offset;
3012
1.39k
  BagNode* en;
3013
1.39k
  AbsAddrType addr;
3014
1.39k
  AbsAddrType* paddr;
3015
3016
5.36k
  for (i = 0; i < uslist->num; i++) {
3017
3.96k
    if (! ND_IS_FIXED_ADDR(uslist->us[i].target)) {
3018
37
      if (ND_IS_CALLED(uslist->us[i].target))
3019
0
        return ONIGERR_PARSER_BUG;
3020
37
      else {
3021
        /* CASE: called node doesn't have called address.
3022
           ex. /((|a\g<1>)(.){0}){0}\g<3>/
3023
           group-1 doesn't called, but compiled into bytecodes,
3024
           because group-3 is referred from outside.
3025
        */
3026
37
        continue;
3027
37
      }
3028
37
    }
3029
3030
3.93k
    en = BAG_(uslist->us[i].target);
3031
3.93k
    addr   = en->m.called_addr;
3032
3.93k
    offset = uslist->us[i].offset;
3033
3034
3.93k
    paddr = (AbsAddrType* )((char* )reg->ops + offset);
3035
3.93k
    *paddr = addr;
3036
3.93k
  }
3037
1.39k
  return 0;
3038
1.39k
}
3039
#endif
3040
3041
/* x is not included y ==>  1 : 0 */
3042
static int
3043
is_exclusive(Node* x, Node* y, regex_t* reg)
3044
13.6k
{
3045
13.6k
  int i, len;
3046
13.6k
  OnigCodePoint code;
3047
13.6k
  UChar *p;
3048
13.6k
  NodeType ytype;
3049
3050
17.1k
 retry:
3051
17.1k
  ytype = ND_TYPE(y);
3052
17.1k
  switch (ND_TYPE(x)) {
3053
2.85k
  case ND_CTYPE:
3054
2.85k
    {
3055
2.85k
      if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
3056
2.85k
          CTYPE_(y)->ctype == CTYPE_ANYCHAR)
3057
0
        break;
3058
3059
2.85k
      switch (ytype) {
3060
778
      case ND_CTYPE:
3061
778
        if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
3062
778
            CTYPE_(y)->not   != CTYPE_(x)->not &&
3063
778
            CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
3064
140
          return 1;
3065
638
        else
3066
638
          return 0;
3067
0
        break;
3068
3069
1.29k
      case ND_CCLASS:
3070
3.45k
      swap:
3071
3.45k
        {
3072
3.45k
          Node* tmp;
3073
3.45k
          tmp = x; x = y; y = tmp;
3074
3.45k
          goto retry;
3075
1.29k
        }
3076
0
        break;
3077
3078
784
      case ND_STRING:
3079
784
        goto swap;
3080
0
        break;
3081
3082
0
      default:
3083
0
        break;
3084
2.85k
      }
3085
2.85k
    }
3086
0
    break;
3087
3088
6.55k
  case ND_CCLASS:
3089
6.55k
    {
3090
6.55k
      int range;
3091
6.55k
      CClassNode* xc = CCLASS_(x);
3092
3093
6.55k
      switch (ytype) {
3094
2.53k
      case ND_CTYPE:
3095
2.53k
        switch (CTYPE_(y)->ctype) {
3096
0
        case CTYPE_ANYCHAR:
3097
0
          return 0;
3098
0
          break;
3099
3100
2.53k
        case ONIGENC_CTYPE_WORD:
3101
2.53k
          if (CTYPE_(y)->not == 0) {
3102
846
            if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
3103
584
              range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3104
86.6k
              for (i = 0; i < range; i++) {
3105
86.4k
                if (BITSET_AT(xc->bs, i)) {
3106
972
                  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
3107
972
                }
3108
86.4k
              }
3109
208
              return 1;
3110
584
            }
3111
262
            return 0;
3112
846
          }
3113
1.68k
          else {
3114
1.68k
            if (IS_NOT_NULL(xc->mbuf)) return 0;
3115
1.48k
            if (IS_NCCLASS_NOT(xc)) return 0;
3116
3117
1.27k
            range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3118
204k
            for (i = 0; i < range; i++) {
3119
203k
              if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
3120
107k
                if (BITSET_AT(xc->bs, i))
3121
122
                  return 0;
3122
107k
              }
3123
203k
            }
3124
74.2k
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
3125
73.5k
              if (BITSET_AT(xc->bs, i)) return 0;
3126
73.5k
            }
3127
624
            return 1;
3128
1.15k
          }
3129
0
          break;
3130
3131
0
        default:
3132
0
          break;
3133
2.53k
        }
3134
0
        break;
3135
3136
2.64k
      case ND_CCLASS:
3137
2.64k
        {
3138
2.64k
          int v;
3139
2.64k
          CClassNode* yc = CCLASS_(y);
3140
3141
515k
          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
3142
513k
            v = BITSET_AT(xc->bs, i);
3143
513k
            if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
3144
66.9k
              v = BITSET_AT(yc->bs, i);
3145
66.9k
              if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
3146
66.9k
                  (v == 0 && IS_NCCLASS_NOT(yc)))
3147
1.05k
                return 0;
3148
66.9k
            }
3149
513k
          }
3150
1.58k
          if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
3151
1.58k
              (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
3152
1.06k
            return 1;
3153
516
          return 0;
3154
1.58k
        }
3155
0
        break;
3156
3157
1.38k
      case ND_STRING:
3158
1.38k
        goto swap;
3159
0
        break;
3160
3161
0
      default:
3162
0
        break;
3163
6.55k
      }
3164
6.55k
    }
3165
0
    break;
3166
3167
7.74k
  case ND_STRING:
3168
7.74k
    {
3169
7.74k
      StrNode* xs = STR_(x);
3170
3171
7.74k
      if (ND_STRING_LEN(x) == 0)
3172
0
        break;
3173
3174
7.74k
      switch (ytype) {
3175
1.44k
      case ND_CTYPE:
3176
1.44k
        switch (CTYPE_(y)->ctype) {
3177
0
        case CTYPE_ANYCHAR:
3178
0
          break;
3179
3180
1.44k
        case ONIGENC_CTYPE_WORD:
3181
1.44k
          if (CTYPE_(y)->ascii_mode == 0) {
3182
792
            if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
3183
486
              return CTYPE_(y)->not;
3184
306
            else
3185
306
              return !(CTYPE_(y)->not);
3186
792
          }
3187
656
          else {
3188
656
            if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
3189
244
              return CTYPE_(y)->not;
3190
412
            else
3191
412
              return !(CTYPE_(y)->not);
3192
656
          }
3193
0
          break;
3194
0
        default:
3195
0
          break;
3196
1.44k
        }
3197
0
        break;
3198
3199
2.33k
      case ND_CCLASS:
3200
2.33k
        {
3201
2.33k
          CClassNode* cc = CCLASS_(y);
3202
3203
2.33k
          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
3204
2.33k
                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
3205
2.33k
          return onig_is_code_in_cc(reg->enc, code, cc) == 0;
3206
1.44k
        }
3207
0
        break;
3208
3209
3.96k
      case ND_STRING:
3210
3.96k
        {
3211
3.96k
          UChar *q;
3212
3.96k
          StrNode* ys = STR_(y);
3213
3214
3.96k
          len = ND_STRING_LEN(x);
3215
3.96k
          if (len > ND_STRING_LEN(y)) len = ND_STRING_LEN(y);
3216
3217
6.14k
          for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
3218
4.60k
            if (*p != *q) return 1;
3219
4.60k
          }
3220
3.96k
        }
3221
1.53k
        break;
3222
3223
1.53k
      default:
3224
0
        break;
3225
7.74k
      }
3226
7.74k
    }
3227
1.53k
    break;
3228
3229
1.53k
  default:
3230
0
    break;
3231
17.1k
  }
3232
3233
1.53k
  return 0;
3234
17.1k
}
3235
3236
static Node*
3237
get_tree_head_literal(Node* node, int exact, regex_t* reg)
3238
174k
{
3239
174k
  Node* n = NULL_NODE;
3240
3241
174k
  switch (ND_TYPE(node)) {
3242
2.87k
  case ND_BACKREF:
3243
6.17k
  case ND_ALT:
3244
6.17k
#ifdef USE_CALL
3245
7.15k
  case ND_CALL:
3246
7.15k
#endif
3247
7.15k
    break;
3248
3249
28.8k
  case ND_CTYPE:
3250
28.8k
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
3251
16.7k
      break;
3252
    /* fall */
3253
40.2k
  case ND_CCLASS:
3254
40.2k
    if (exact == 0) {
3255
16.2k
      n = node;
3256
16.2k
    }
3257
40.2k
    break;
3258
3259
15.1k
  case ND_LIST:
3260
15.1k
    n = get_tree_head_literal(ND_CAR(node), exact, reg);
3261
15.1k
    break;
3262
3263
42.9k
  case ND_STRING:
3264
42.9k
    {
3265
42.9k
      StrNode* sn = STR_(node);
3266
3267
42.9k
      if (sn->end <= sn->s)
3268
1.34k
        break;
3269
3270
41.5k
      if (exact == 0 || !ND_IS_REAL_IGNORECASE(node)) {
3271
41.5k
        n = node;
3272
41.5k
      }
3273
41.5k
    }
3274
0
    break;
3275
3276
28.0k
  case ND_QUANT:
3277
28.0k
    {
3278
28.0k
      QuantNode* qn = QUANT_(node);
3279
28.0k
      if (qn->lower > 0) {
3280
22.6k
        if (IS_NOT_NULL(qn->head_exact))
3281
5.76k
          n = qn->head_exact;
3282
16.9k
        else
3283
16.9k
          n = get_tree_head_literal(ND_BODY(node), exact, reg);
3284
22.6k
      }
3285
28.0k
    }
3286
28.0k
    break;
3287
3288
14.5k
  case ND_BAG:
3289
14.5k
    {
3290
14.5k
      BagNode* en = BAG_(node);
3291
14.5k
      switch (en->type) {
3292
62
      case BAG_OPTION:
3293
4.60k
      case BAG_MEMORY:
3294
13.8k
      case BAG_STOP_BACKTRACK:
3295
13.8k
        n = get_tree_head_literal(ND_BODY(node), exact, reg);
3296
13.8k
        break;
3297
758
      default:
3298
758
        break;
3299
14.5k
      }
3300
14.5k
    }
3301
14.5k
    break;
3302
3303
14.5k
  case ND_ANCHOR:
3304
8.73k
    if (ANCHOR_(node)->type == ANCR_PREC_READ)
3305
980
      n = get_tree_head_literal(ND_BODY(node), exact, reg);
3306
8.73k
    break;
3307
3308
628
  case ND_GIMMICK:
3309
628
  default:
3310
628
    break;
3311
174k
  }
3312
3313
174k
  return n;
3314
174k
}
3315
3316
enum GetValue {
3317
  GET_VALUE_NONE   = -1,
3318
  GET_VALUE_IGNORE =  0,
3319
  GET_VALUE_FOUND  =  1
3320
};
3321
3322
44.2k
#define MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL  16
3323
3324
static int
3325
get_tree_tail_literal(Node* node, Node** rnode, regex_t* reg, int nest_level)
3326
44.2k
{
3327
44.2k
  int r;
3328
3329
44.2k
  nest_level++;
3330
44.2k
  if (nest_level >= MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL) {
3331
368
    return GET_VALUE_NONE;
3332
368
  }
3333
3334
43.8k
  switch (ND_TYPE(node)) {
3335
28.4k
  case ND_LIST:
3336
28.4k
    if (IS_NULL(ND_CDR(node))) {
3337
9.65k
      r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3338
9.65k
    }
3339
18.7k
    else {
3340
18.7k
      r = get_tree_tail_literal(ND_CDR(node), rnode, reg, nest_level);
3341
18.7k
      if (r == GET_VALUE_IGNORE) {
3342
714
        r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3343
714
      }
3344
18.7k
    }
3345
28.4k
    break;
3346
3347
0
#ifdef USE_CALL
3348
274
  case ND_CALL:
3349
274
    r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3350
274
    break;
3351
0
#endif
3352
3353
460
  case ND_CTYPE:
3354
460
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) {
3355
354
      r = GET_VALUE_NONE;
3356
354
      break;
3357
354
    }
3358
    /* fall */
3359
5.62k
  case ND_CCLASS:
3360
5.62k
    *rnode = node;
3361
5.62k
    r = GET_VALUE_FOUND;
3362
5.62k
    break;
3363
3364
4.67k
  case ND_STRING:
3365
4.67k
    {
3366
4.67k
      StrNode* sn = STR_(node);
3367
3368
4.67k
      if (sn->end <= sn->s) {
3369
350
        r = GET_VALUE_IGNORE;
3370
350
        break;
3371
350
      }
3372
3373
4.32k
      if (ND_IS_REAL_IGNORECASE(node)) {
3374
82
        r = GET_VALUE_NONE;
3375
82
        break;
3376
82
      }
3377
3378
4.24k
      *rnode = node;
3379
4.24k
      r = GET_VALUE_FOUND;
3380
4.24k
    }
3381
0
    break;
3382
3383
1.77k
  case ND_QUANT:
3384
1.77k
    {
3385
1.77k
      QuantNode* qn = QUANT_(node);
3386
1.77k
      if (qn->lower != 0) {
3387
976
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3388
976
      }
3389
794
      else
3390
794
        r = GET_VALUE_NONE;
3391
1.77k
    }
3392
1.77k
    break;
3393
3394
1.21k
  case ND_BAG:
3395
1.21k
    {
3396
1.21k
      BagNode* en = BAG_(node);
3397
3398
1.21k
      if (en->type == BAG_MEMORY) {
3399
698
        if (ND_IS_MARK1(node))
3400
14
          r = GET_VALUE_NONE;
3401
684
        else {
3402
684
          ND_STATUS_ADD(node, MARK1);
3403
684
          r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3404
684
          ND_STATUS_REMOVE(node, MARK1);
3405
684
        }
3406
698
      }
3407
514
      else {
3408
514
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3409
514
      }
3410
1.21k
    }
3411
1.21k
    break;
3412
3413
682
  case ND_ANCHOR:
3414
888
  case ND_GIMMICK:
3415
888
    r = GET_VALUE_IGNORE;
3416
888
    break;
3417
3418
454
  case ND_ALT:
3419
644
  case ND_BACKREF:
3420
644
  default:
3421
644
    r = GET_VALUE_NONE;
3422
644
    break;
3423
43.8k
  }
3424
3425
43.8k
  return r;
3426
43.8k
}
3427
3428
static int
3429
check_called_node_in_look_behind(Node* node, int not)
3430
20.9k
{
3431
20.9k
  int r;
3432
3433
20.9k
  r = 0;
3434
3435
20.9k
  switch (ND_TYPE(node)) {
3436
2.33k
  case ND_LIST:
3437
3.54k
  case ND_ALT:
3438
14.0k
    do {
3439
14.0k
      r = check_called_node_in_look_behind(ND_CAR(node), not);
3440
14.0k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3441
3.54k
    break;
3442
3443
2.25k
  case ND_QUANT:
3444
2.25k
    r = check_called_node_in_look_behind(ND_BODY(node), not);
3445
2.25k
    break;
3446
3447
2.32k
  case ND_BAG:
3448
2.32k
    {
3449
2.32k
      BagNode* en = BAG_(node);
3450
3451
2.32k
      if (en->type == BAG_MEMORY) {
3452
1.06k
        if (ND_IS_MARK1(node))
3453
0
          return 0;
3454
1.06k
        else {
3455
1.06k
          ND_STATUS_ADD(node, MARK1);
3456
1.06k
          r = check_called_node_in_look_behind(ND_BODY(node), not);
3457
1.06k
          ND_STATUS_REMOVE(node, MARK1);
3458
1.06k
        }
3459
1.06k
      }
3460
1.25k
      else {
3461
1.25k
        r = check_called_node_in_look_behind(ND_BODY(node), not);
3462
1.25k
        if (r == 0 && en->type == BAG_IF_ELSE) {
3463
454
          if (IS_NOT_NULL(en->te.Then)) {
3464
230
            r = check_called_node_in_look_behind(en->te.Then, not);
3465
230
            if (r != 0) break;
3466
230
          }
3467
448
          if (IS_NOT_NULL(en->te.Else)) {
3468
224
            r = check_called_node_in_look_behind(en->te.Else, not);
3469
224
          }
3470
448
        }
3471
1.25k
      }
3472
2.32k
    }
3473
2.31k
    break;
3474
3475
2.31k
  case ND_ANCHOR:
3476
1.97k
    if (IS_NOT_NULL(ND_BODY(node)))
3477
1.11k
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3478
1.97k
    break;
3479
3480
472
  case ND_GIMMICK:
3481
472
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3482
14
      return 1;
3483
458
    break;
3484
3485
10.3k
  default:
3486
10.3k
    break;
3487
20.9k
  }
3488
3489
20.9k
  return r;
3490
20.9k
}
3491
3492
/* allowed node types in look-behind */
3493
#define ALLOWED_TYPE_IN_LB \
3494
184k
  ( ND_BIT_LIST | ND_BIT_ALT | ND_BIT_STRING | ND_BIT_CCLASS \
3495
184k
  | ND_BIT_CTYPE | ND_BIT_ANCHOR | ND_BIT_BAG | ND_BIT_QUANT \
3496
184k
  | ND_BIT_CALL | ND_BIT_BACKREF | ND_BIT_GIMMICK)
3497
3498
184k
#define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3499
184k
#define ALLOWED_BAG_IN_LB_NOT   ( 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3500
3501
#define ALLOWED_ANCHOR_IN_LB \
3502
184k
  ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
3503
184k
  | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
3504
184k
  | ANCR_WORD_BEGIN | ANCR_WORD_END \
3505
184k
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3506
3507
#define ALLOWED_ANCHOR_IN_LB_NOT \
3508
184k
  ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
3509
184k
  | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
3510
184k
  | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
3511
184k
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3512
3513
3514
static int
3515
check_node_in_look_behind(Node* node, int not, int* used)
3516
184k
{
3517
184k
  static unsigned int
3518
184k
    bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT };
3519
3520
184k
  static unsigned int
3521
184k
    anchor_mask[2] = { ALLOWED_ANCHOR_IN_LB, ALLOWED_ANCHOR_IN_LB_NOT };
3522
3523
184k
  NodeType type;
3524
184k
  int r = 0;
3525
3526
184k
  type = ND_TYPE(node);
3527
184k
  if ((ND_TYPE2BIT(type) & ALLOWED_TYPE_IN_LB) == 0)
3528
0
    return 1;
3529
3530
184k
  switch (type) {
3531
34.0k
  case ND_LIST:
3532
38.3k
  case ND_ALT:
3533
128k
    do {
3534
128k
      r = check_node_in_look_behind(ND_CAR(node), not, used);
3535
128k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3536
38.3k
    break;
3537
3538
22.4k
  case ND_QUANT:
3539
22.4k
    r = check_node_in_look_behind(ND_BODY(node), not, used);
3540
22.4k
    break;
3541
3542
10.7k
  case ND_BAG:
3543
10.7k
    {
3544
10.7k
      BagNode* en = BAG_(node);
3545
10.7k
      if (((1<<en->type) & bag_mask[not]) == 0)
3546
6
        return 1;
3547
3548
10.7k
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3549
10.7k
      if (r != 0) break;
3550
3551
10.7k
      if (en->type == BAG_MEMORY) {
3552
4.70k
        if (ND_IS_BACKREF(node) || ND_IS_CALLED(node)
3553
4.70k
         || ND_IS_REFERENCED(node))
3554
1.29k
          *used = TRUE;
3555
4.70k
      }
3556
6.05k
      else if (en->type == BAG_IF_ELSE) {
3557
1.83k
        if (IS_NOT_NULL(en->te.Then)) {
3558
928
          r = check_node_in_look_behind(en->te.Then, not, used);
3559
928
          if (r != 0) break;
3560
928
        }
3561
1.83k
        if (IS_NOT_NULL(en->te.Else)) {
3562
1.31k
          r = check_node_in_look_behind(en->te.Else, not, used);
3563
1.31k
        }
3564
1.83k
      }
3565
10.7k
    }
3566
10.7k
    break;
3567
3568
10.7k
  case ND_ANCHOR:
3569
7.41k
    type = ANCHOR_(node)->type;
3570
7.41k
    if ((type & anchor_mask[not]) == 0)
3571
22
      return 1;
3572
3573
7.39k
    if (IS_NOT_NULL(ND_BODY(node)))
3574
2.79k
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3575
7.39k
    break;
3576
3577
4.38k
  case ND_GIMMICK:
3578
4.38k
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3579
8
      return 1;
3580
3581
4.37k
    {
3582
4.37k
      GimmickNode* g = GIMMICK_(node);
3583
4.37k
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
3584
810
        *used = TRUE;
3585
4.37k
    }
3586
4.37k
    break;
3587
3588
1.39k
  case ND_CALL:
3589
1.39k
    if (ND_IS_RECURSION(node)) {
3590
      /* fix: Issue 38040 in oss-fuzz */
3591
      /* This node should be removed before recursive call check. */
3592
660
      *used = TRUE;
3593
660
    }
3594
732
    else
3595
732
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3596
1.39k
    break;
3597
3598
99.1k
  default:
3599
99.1k
    break;
3600
184k
  }
3601
183k
  return r;
3602
184k
}
3603
3604
static OnigLen
3605
node_min_byte_len(Node* node, ParseEnv* env)
3606
476k
{
3607
476k
  OnigLen len;
3608
476k
  OnigLen tmin;
3609
3610
476k
  len = 0;
3611
476k
  switch (ND_TYPE(node)) {
3612
10.8k
  case ND_BACKREF:
3613
10.8k
    if (! ND_IS_CHECKER(node)) {
3614
10.1k
      int i;
3615
10.1k
      int* backs;
3616
10.1k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3617
10.1k
      BackRefNode* br = BACKREF_(node);
3618
10.1k
      if (ND_IS_RECURSION(node)) break;
3619
3620
8.03k
      backs = BACKREFS_P(br);
3621
8.03k
      len = node_min_byte_len(mem_env[backs[0]].mem_node, env);
3622
14.8k
      for (i = 1; i < br->back_num; i++) {
3623
6.82k
        tmin = node_min_byte_len(mem_env[backs[i]].mem_node, env);
3624
6.82k
        if (len > tmin) len = tmin;
3625
6.82k
      }
3626
8.03k
    }
3627
8.71k
    break;
3628
3629
8.71k
#ifdef USE_CALL
3630
8.71k
  case ND_CALL:
3631
7.61k
    {
3632
7.61k
      Node* t = ND_BODY(node);
3633
7.61k
      if (ND_IS_FIXED_MIN(t))
3634
4.02k
        len = BAG_(t)->min_len;
3635
3.58k
      else
3636
3.58k
        len = node_min_byte_len(t, env);
3637
7.61k
    }
3638
7.61k
    break;
3639
0
#endif
3640
3641
84.0k
  case ND_LIST:
3642
210k
    do {
3643
210k
      tmin = node_min_byte_len(ND_CAR(node), env);
3644
210k
      len = distance_add(len, tmin);
3645
210k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3646
84.0k
    break;
3647
3648
8.86k
  case ND_ALT:
3649
8.86k
    {
3650
8.86k
      Node *x, *y;
3651
8.86k
      y = node;
3652
77.4k
      do {
3653
77.4k
        x = ND_CAR(y);
3654
77.4k
        tmin = node_min_byte_len(x, env);
3655
77.4k
        if (y == node) len = tmin;
3656
68.5k
        else if (len > tmin) len = tmin;
3657
77.4k
      } while (IS_NOT_NULL(y = ND_CDR(y)));
3658
8.86k
    }
3659
8.86k
    break;
3660
3661
98.8k
  case ND_STRING:
3662
98.8k
    {
3663
98.8k
      StrNode* sn = STR_(node);
3664
98.8k
      len = (int )(sn->end - sn->s);
3665
98.8k
    }
3666
98.8k
    break;
3667
3668
29.5k
  case ND_CTYPE:
3669
137k
  case ND_CCLASS:
3670
137k
    len = ONIGENC_MBC_MINLEN(env->enc);
3671
137k
    break;
3672
3673
40.6k
  case ND_QUANT:
3674
40.6k
    {
3675
40.6k
      QuantNode* qn = QUANT_(node);
3676
3677
40.6k
      if (qn->lower > 0) {
3678
22.9k
        len = node_min_byte_len(ND_BODY(node), env);
3679
22.9k
        len = distance_multiply(len, qn->lower);
3680
22.9k
      }
3681
40.6k
    }
3682
40.6k
    break;
3683
3684
57.2k
  case ND_BAG:
3685
57.2k
    {
3686
57.2k
      BagNode* en = BAG_(node);
3687
57.2k
      switch (en->type) {
3688
39.0k
      case BAG_MEMORY:
3689
39.0k
        if (ND_IS_FIXED_MIN(node))
3690
18.2k
          len = en->min_len;
3691
20.8k
        else {
3692
20.8k
          if (ND_IS_MARK1(node))
3693
2.53k
            len = 0;  /* recursive */
3694
18.2k
          else {
3695
18.2k
            ND_STATUS_ADD(node, MARK1);
3696
18.2k
            len = node_min_byte_len(ND_BODY(node), env);
3697
18.2k
            ND_STATUS_REMOVE(node, MARK1);
3698
3699
18.2k
            en->min_len = len;
3700
18.2k
            ND_STATUS_ADD(node, FIXED_MIN);
3701
18.2k
          }
3702
20.8k
        }
3703
39.0k
        break;
3704
3705
515
      case BAG_OPTION:
3706
13.9k
      case BAG_STOP_BACKTRACK:
3707
13.9k
        len = node_min_byte_len(ND_BODY(node), env);
3708
13.9k
        break;
3709
4.23k
      case BAG_IF_ELSE:
3710
4.23k
        {
3711
4.23k
          OnigLen elen;
3712
3713
4.23k
          len = node_min_byte_len(ND_BODY(node), env);
3714
4.23k
          if (IS_NOT_NULL(en->te.Then))
3715
2.88k
            len += node_min_byte_len(en->te.Then, env);
3716
4.23k
          if (IS_NOT_NULL(en->te.Else))
3717
1.92k
            elen = node_min_byte_len(en->te.Else, env);
3718
2.31k
          else elen = 0;
3719
3720
4.23k
          if (elen < len) len = elen;
3721
4.23k
        }
3722
4.23k
        break;
3723
57.2k
      }
3724
57.2k
    }
3725
57.2k
    break;
3726
3727
57.2k
  case ND_GIMMICK:
3728
18.8k
    {
3729
18.8k
      GimmickNode* g = GIMMICK_(node);
3730
18.8k
      if (g->type == GIMMICK_FAIL) {
3731
4.53k
        len = INFINITE_LEN;
3732
4.53k
        break;
3733
4.53k
      }
3734
18.8k
    }
3735
    /* fall */
3736
26.1k
  case ND_ANCHOR:
3737
26.1k
  default:
3738
26.1k
    break;
3739
476k
  }
3740
3741
476k
  return len;
3742
476k
}
3743
3744
static int
3745
check_backrefs(Node* node, ParseEnv* env)
3746
865k
{
3747
865k
  int r;
3748
3749
865k
  switch (ND_TYPE(node)) {
3750
188k
  case ND_LIST:
3751
204k
  case ND_ALT:
3752
679k
    do {
3753
679k
      r = check_backrefs(ND_CAR(node), env);
3754
679k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3755
204k
    break;
3756
3757
29.7k
  case ND_ANCHOR:
3758
29.7k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3759
22.8k
      r = 0;
3760
22.8k
      break;
3761
22.8k
    }
3762
    /* fall */
3763
85.0k
  case ND_QUANT:
3764
85.0k
    r = check_backrefs(ND_BODY(node), env);
3765
85.0k
    break;
3766
3767
55.0k
  case ND_BAG:
3768
55.0k
    r = check_backrefs(ND_BODY(node), env);
3769
55.0k
    {
3770
55.0k
      BagNode* en = BAG_(node);
3771
3772
55.0k
      if (en->type == BAG_IF_ELSE) {
3773
6.46k
        if (r != 0) return r;
3774
6.45k
        if (IS_NOT_NULL(en->te.Then)) {
3775
2.74k
          r = check_backrefs(en->te.Then, env);
3776
2.74k
          if (r != 0) return r;
3777
2.74k
        }
3778
6.44k
        if (IS_NOT_NULL(en->te.Else)) {
3779
4.20k
          r = check_backrefs(en->te.Else, env);
3780
4.20k
        }
3781
6.44k
      }
3782
55.0k
    }
3783
55.0k
    break;
3784
3785
55.0k
  case ND_BACKREF:
3786
10.4k
    {
3787
10.4k
      int i;
3788
10.4k
      BackRefNode* br = BACKREF_(node);
3789
10.4k
      int* backs = BACKREFS_P(br);
3790
10.4k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3791
3792
25.9k
      for (i = 0; i < br->back_num; i++) {
3793
15.8k
        if (backs[i] > env->num_mem)
3794
314
          return ONIGERR_INVALID_BACKREF;
3795
3796
15.5k
        ND_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
3797
15.5k
      }
3798
10.1k
      r = 0;
3799
10.1k
    }
3800
0
    break;
3801
3802
488k
  default:
3803
488k
    r = 0;
3804
488k
    break;
3805
865k
  }
3806
3807
865k
  return r;
3808
865k
}
3809
3810
static int
3811
set_empty_repeat_node_trav(Node* node, Node* empty, ParseEnv* env)
3812
160k
{
3813
160k
  int r;
3814
3815
160k
  switch (ND_TYPE(node)) {
3816
24.3k
  case ND_LIST:
3817
28.9k
  case ND_ALT:
3818
110k
    do {
3819
110k
      r = set_empty_repeat_node_trav(ND_CAR(node), empty, env);
3820
110k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3821
28.9k
    break;
3822
3823
6.13k
  case ND_ANCHOR:
3824
6.13k
    {
3825
6.13k
      AnchorNode* an = ANCHOR_(node);
3826
3827
6.13k
      if (! ANCHOR_HAS_BODY(an)) {
3828
2.51k
        r = 0;
3829
2.51k
        break;
3830
2.51k
      }
3831
3832
3.62k
      switch (an->type) {
3833
896
      case ANCR_PREC_READ:
3834
2.06k
      case ANCR_LOOK_BEHIND:
3835
2.06k
        empty = NULL_NODE;
3836
2.06k
        break;
3837
1.55k
      default:
3838
1.55k
        break;
3839
3.62k
      }
3840
3.62k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3841
3.62k
    }
3842
0
    break;
3843
3844
14.8k
  case ND_QUANT:
3845
14.8k
    {
3846
14.8k
      QuantNode* qn = QUANT_(node);
3847
3848
14.8k
      if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
3849
14.8k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3850
14.8k
    }
3851
14.8k
    break;
3852
3853
24.3k
  case ND_BAG:
3854
24.3k
    if (IS_NOT_NULL(ND_BODY(node))) {
3855
24.3k
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3856
24.3k
      if (r != 0) return r;
3857
24.3k
    }
3858
24.3k
    {
3859
24.3k
      BagNode* en = BAG_(node);
3860
3861
24.3k
      r = 0;
3862
24.3k
      if (en->type == BAG_MEMORY) {
3863
19.3k
        if (ND_IS_BACKREF(node)) {
3864
9.62k
          if (IS_NOT_NULL(empty))
3865
4.19k
            PARSEENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
3866
9.62k
        }
3867
19.3k
      }
3868
4.96k
      else if (en->type == BAG_IF_ELSE) {
3869
1.63k
        if (IS_NOT_NULL(en->te.Then)) {
3870
842
          r = set_empty_repeat_node_trav(en->te.Then, empty, env);
3871
842
          if (r != 0) return r;
3872
842
        }
3873
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3874
930
          r = set_empty_repeat_node_trav(en->te.Else, empty, env);
3875
930
        }
3876
1.63k
      }
3877
24.3k
    }
3878
24.3k
    break;
3879
3880
86.0k
  default:
3881
86.0k
    r = 0;
3882
86.0k
    break;
3883
160k
  }
3884
3885
160k
  return r;
3886
160k
}
3887
3888
static int
3889
is_ancestor_node(Node* node, Node* me)
3890
5.01k
{
3891
5.01k
  Node* parent;
3892
3893
11.9k
  while ((parent = ND_PARENT(me)) != NULL_NODE) {
3894
6.95k
    if (parent == node) return 1;
3895
6.89k
    me = parent;
3896
6.89k
  }
3897
4.94k
  return 0;
3898
5.01k
}
3899
3900
static void
3901
set_empty_status_check_trav(Node* node, ParseEnv* env)
3902
160k
{
3903
160k
  switch (ND_TYPE(node)) {
3904
24.3k
  case ND_LIST:
3905
28.9k
  case ND_ALT:
3906
110k
    do {
3907
110k
      set_empty_status_check_trav(ND_CAR(node), env);
3908
110k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3909
28.9k
    break;
3910
3911
6.13k
  case ND_ANCHOR:
3912
6.13k
    {
3913
6.13k
      AnchorNode* an = ANCHOR_(node);
3914
3915
6.13k
      if (! ANCHOR_HAS_BODY(an)) break;
3916
3.62k
      set_empty_status_check_trav(ND_BODY(node), env);
3917
3.62k
    }
3918
0
    break;
3919
3920
14.8k
  case ND_QUANT:
3921
14.8k
    set_empty_status_check_trav(ND_BODY(node), env);
3922
14.8k
    break;
3923
3924
24.3k
  case ND_BAG:
3925
24.3k
    if (IS_NOT_NULL(ND_BODY(node)))
3926
24.3k
      set_empty_status_check_trav(ND_BODY(node), env);
3927
24.3k
    {
3928
24.3k
      BagNode* en = BAG_(node);
3929
3930
24.3k
      if (en->type == BAG_IF_ELSE) {
3931
1.63k
        if (IS_NOT_NULL(en->te.Then)) {
3932
842
          set_empty_status_check_trav(en->te.Then, env);
3933
842
        }
3934
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3935
930
          set_empty_status_check_trav(en->te.Else, env);
3936
930
        }
3937
1.63k
      }
3938
24.3k
    }
3939
24.3k
    break;
3940
3941
9.84k
  case ND_BACKREF:
3942
9.84k
    {
3943
9.84k
      int i;
3944
9.84k
      int* backs;
3945
9.84k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3946
9.84k
      BackRefNode* br = BACKREF_(node);
3947
9.84k
      backs = BACKREFS_P(br);
3948
24.9k
      for (i = 0; i < br->back_num; i++) {
3949
15.1k
        Node* ernode = mem_env[backs[i]].empty_repeat_node;
3950
15.1k
        if (IS_NOT_NULL(ernode)) {
3951
5.01k
          if (! is_ancestor_node(ernode, node)) {
3952
4.94k
            MEM_STATUS_LIMIT_ON(QUANT_(ernode)->empty_status_mem, backs[i]);
3953
4.94k
            ND_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
3954
4.94k
            ND_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
3955
4.94k
          }
3956
5.01k
        }
3957
15.1k
      }
3958
9.84k
    }
3959
9.84k
    break;
3960
3961
76.1k
  default:
3962
76.1k
    break;
3963
160k
  }
3964
160k
}
3965
3966
static void
3967
set_parent_node_trav(Node* node, Node* parent)
3968
160k
{
3969
160k
  ND_PARENT(node) = parent;
3970
3971
160k
  switch (ND_TYPE(node)) {
3972
24.3k
  case ND_LIST:
3973
28.9k
  case ND_ALT:
3974
110k
    do {
3975
110k
      set_parent_node_trav(ND_CAR(node), node);
3976
110k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3977
28.9k
    break;
3978
3979
6.13k
  case ND_ANCHOR:
3980
6.13k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
3981
3.62k
    set_parent_node_trav(ND_BODY(node), node);
3982
3.62k
    break;
3983
3984
14.8k
  case ND_QUANT:
3985
14.8k
    set_parent_node_trav(ND_BODY(node), node);
3986
14.8k
    break;
3987
3988
24.3k
  case ND_BAG:
3989
24.3k
    if (IS_NOT_NULL(ND_BODY(node)))
3990
24.3k
      set_parent_node_trav(ND_BODY(node), node);
3991
24.3k
    {
3992
24.3k
      BagNode* en = BAG_(node);
3993
3994
24.3k
      if (en->type == BAG_IF_ELSE) {
3995
1.63k
        if (IS_NOT_NULL(en->te.Then))
3996
842
          set_parent_node_trav(en->te.Then, node);
3997
1.63k
        if (IS_NOT_NULL(en->te.Else)) {
3998
930
          set_parent_node_trav(en->te.Else, node);
3999
930
        }
4000
1.63k
      }
4001
24.3k
    }
4002
24.3k
    break;
4003
4004
86.0k
  default:
4005
86.0k
    break;
4006
160k
  }
4007
160k
}
4008
4009
4010
#ifdef USE_CALL
4011
4012
54.5k
#define RECURSION_EXIST        (1<<0)
4013
64.0k
#define RECURSION_MUST         (1<<1)
4014
215k
#define RECURSION_INFINITE     (1<<2)
4015
4016
static int
4017
infinite_recursive_call_check(Node* node, ParseEnv* env, int head)
4018
329k
{
4019
329k
  int ret;
4020
329k
  int r = 0;
4021
4022
329k
  switch (ND_TYPE(node)) {
4023
46.1k
  case ND_LIST:
4024
46.1k
    {
4025
46.1k
      Node *x;
4026
46.1k
      OnigLen min;
4027
4028
46.1k
      x = node;
4029
172k
      do {
4030
172k
        ret = infinite_recursive_call_check(ND_CAR(x), env, head);
4031
172k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4032
172k
        r |= ret;
4033
172k
        if (head != 0) {
4034
16.7k
          min = node_min_byte_len(ND_CAR(x), env);
4035
16.7k
          if (min != 0) head = 0;
4036
16.7k
        }
4037
172k
      } while (IS_NOT_NULL(x = ND_CDR(x)));
4038
46.1k
    }
4039
46.0k
    break;
4040
4041
46.0k
  case ND_ALT:
4042
11.5k
    {
4043
11.5k
      int must;
4044
4045
11.5k
      must = RECURSION_MUST;
4046
31.7k
      do {
4047
31.7k
        ret = infinite_recursive_call_check(ND_CAR(node), env, head);
4048
31.7k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4049
4050
31.7k
        r    |= (ret & RECURSION_EXIST);
4051
31.7k
        must &= ret;
4052
31.7k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4053
11.4k
      r |= must;
4054
11.4k
    }
4055
0
    break;
4056
4057
21.3k
  case ND_QUANT:
4058
21.3k
    if (QUANT_(node)->upper == 0) break;
4059
4060
20.8k
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4061
20.8k
    if (r < 0) return r;
4062
20.8k
    if ((r & RECURSION_MUST) != 0) {
4063
5.14k
      if (QUANT_(node)->lower == 0)
4064
2.41k
        r &= ~RECURSION_MUST;
4065
5.14k
    }
4066
20.8k
    break;
4067
4068
10.7k
  case ND_ANCHOR:
4069
10.7k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
4070
5.10k
      break;
4071
    /* fall */
4072
50.3k
  case ND_CALL:
4073
50.3k
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4074
50.3k
    break;
4075
4076
84.8k
  case ND_BAG:
4077
84.8k
    {
4078
84.8k
      BagNode* en = BAG_(node);
4079
4080
84.8k
      if (en->type == BAG_MEMORY) {
4081
78.0k
        if (ND_IS_MARK2(node))
4082
17.2k
          return 0;
4083
60.8k
        else if (ND_IS_MARK1(node))
4084
21.0k
          return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
4085
21.0k
                  : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
4086
39.7k
        else {
4087
39.7k
          ND_STATUS_ADD(node, MARK2);
4088
39.7k
          r = infinite_recursive_call_check(ND_BODY(node), env, head);
4089
39.7k
          ND_STATUS_REMOVE(node, MARK2);
4090
39.7k
        }
4091
78.0k
      }
4092
6.80k
      else if (en->type == BAG_IF_ELSE) {
4093
3.46k
        int eret;
4094
4095
3.46k
        ret = infinite_recursive_call_check(ND_BODY(node), env, head);
4096
3.46k
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4097
3.45k
        r |= ret;
4098
3.45k
        if (IS_NOT_NULL(en->te.Then)) {
4099
2.11k
          OnigLen min;
4100
2.11k
          if (head != 0) {
4101
1.01k
            min = node_min_byte_len(ND_BODY(node), env);
4102
1.01k
          }
4103
1.09k
          else min = 0;
4104
4105
2.11k
          ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
4106
2.11k
          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4107
2.10k
          r |= ret;
4108
2.10k
        }
4109
3.44k
        if (IS_NOT_NULL(en->te.Else)) {
4110
1.79k
          eret = infinite_recursive_call_check(en->te.Else, env, head);
4111
1.79k
          if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
4112
1.79k
          r |= (eret & RECURSION_EXIST);
4113
1.79k
          if ((eret & RECURSION_MUST) == 0)
4114
1.44k
            r &= ~RECURSION_MUST;
4115
1.79k
        }
4116
1.64k
        else {
4117
1.64k
          r &= ~RECURSION_MUST;
4118
1.64k
        }
4119
3.44k
      }
4120
3.34k
      else {
4121
3.34k
        r = infinite_recursive_call_check(ND_BODY(node), env, head);
4122
3.34k
      }
4123
84.8k
    }
4124
46.5k
    break;
4125
4126
110k
  default:
4127
110k
    break;
4128
329k
  }
4129
4130
291k
  return r;
4131
329k
}
4132
4133
static int
4134
infinite_recursive_call_check_trav(Node* node, ParseEnv* env)
4135
125k
{
4136
125k
  int r;
4137
4138
125k
  switch (ND_TYPE(node)) {
4139
18.8k
  case ND_LIST:
4140
23.4k
  case ND_ALT:
4141
86.8k
    do {
4142
86.8k
      r = infinite_recursive_call_check_trav(ND_CAR(node), env);
4143
86.8k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4144
23.4k
    break;
4145
4146
5.19k
  case ND_ANCHOR:
4147
5.19k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4148
2.72k
      r = 0;
4149
2.72k
      break;
4150
2.72k
    }
4151
    /* fall */
4152
15.9k
  case ND_QUANT:
4153
15.9k
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4154
15.9k
    break;
4155
4156
18.2k
  case ND_BAG:
4157
18.2k
    {
4158
18.2k
      BagNode* en = BAG_(node);
4159
4160
18.2k
      if (en->type == BAG_MEMORY) {
4161
13.8k
        if (ND_IS_RECURSION(node) && ND_IS_CALLED(node)) {
4162
3.28k
          int ret;
4163
4164
3.28k
          ND_STATUS_ADD(node, MARK1);
4165
4166
3.28k
          ret = infinite_recursive_call_check(ND_BODY(node), env, 1);
4167
3.28k
          if (ret < 0) return ret;
4168
3.28k
          else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
4169
228
            return ONIGERR_NEVER_ENDING_RECURSION;
4170
4171
3.05k
          ND_STATUS_REMOVE(node, MARK1);
4172
3.05k
        }
4173
13.8k
      }
4174
4.43k
      else if (en->type == BAG_IF_ELSE) {
4175
1.38k
        if (IS_NOT_NULL(en->te.Then)) {
4176
616
          r = infinite_recursive_call_check_trav(en->te.Then, env);
4177
616
          if (r != 0) return r;
4178
616
        }
4179
1.36k
        if (IS_NOT_NULL(en->te.Else)) {
4180
864
          r = infinite_recursive_call_check_trav(en->te.Else, env);
4181
864
          if (r != 0) return r;
4182
864
        }
4183
1.36k
      }
4184
18.2k
    }
4185
4186
18.0k
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4187
18.0k
    break;
4188
4189
64.9k
  default:
4190
64.9k
    r = 0;
4191
64.9k
    break;
4192
125k
  }
4193
4194
125k
  return r;
4195
125k
}
4196
4197
static int
4198
recursive_call_check(Node* node)
4199
477k
{
4200
477k
  int r;
4201
4202
477k
  switch (ND_TYPE(node)) {
4203
63.6k
  case ND_LIST:
4204
79.2k
  case ND_ALT:
4205
79.2k
    r = 0;
4206
291k
    do {
4207
291k
      r |= recursive_call_check(ND_CAR(node));
4208
291k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4209
79.2k
    break;
4210
4211
16.5k
  case ND_ANCHOR:
4212
16.5k
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4213
7.66k
      r = 0;
4214
7.66k
      break;
4215
7.66k
    }
4216
    /* fall */
4217
47.2k
  case ND_QUANT:
4218
47.2k
    r = recursive_call_check(ND_BODY(node));
4219
47.2k
    break;
4220
4221
55.2k
  case ND_CALL:
4222
55.2k
    r = recursive_call_check(ND_BODY(node));
4223
55.2k
    if (r != 0) {
4224
32.2k
      if (ND_IS_MARK1(ND_BODY(node)))
4225
13.4k
        ND_STATUS_ADD(node, RECURSION);
4226
32.2k
    }
4227
55.2k
    break;
4228
4229
115k
  case ND_BAG:
4230
115k
    {
4231
115k
      BagNode* en = BAG_(node);
4232
4233
115k
      if (en->type == BAG_MEMORY) {
4234
105k
        if (ND_IS_MARK2(node))
4235
22.7k
          return 0;
4236
82.5k
        else if (ND_IS_MARK1(node))
4237
24.4k
          return 1; /* recursion */
4238
58.0k
        else {
4239
58.0k
          ND_STATUS_ADD(node, MARK2);
4240
58.0k
          r = recursive_call_check(ND_BODY(node));
4241
58.0k
          ND_STATUS_REMOVE(node, MARK2);
4242
58.0k
        }
4243
105k
      }
4244
10.4k
      else if (en->type == BAG_IF_ELSE) {
4245
4.81k
        r = 0;
4246
4.81k
        if (IS_NOT_NULL(en->te.Then)) {
4247
2.71k
          r |= recursive_call_check(en->te.Then);
4248
2.71k
        }
4249
4.81k
        if (IS_NOT_NULL(en->te.Else)) {
4250
2.65k
          r |= recursive_call_check(en->te.Else);
4251
2.65k
        }
4252
4.81k
        r |= recursive_call_check(ND_BODY(node));
4253
4.81k
      }
4254
5.59k
      else {
4255
5.59k
        r = recursive_call_check(ND_BODY(node));
4256
5.59k
      }
4257
115k
    }
4258
68.4k
    break;
4259
4260
172k
  default:
4261
172k
    r = 0;
4262
172k
    break;
4263
477k
  }
4264
4265
429k
  return r;
4266
477k
}
4267
4268
13.9k
#define IN_RECURSION         (1<<0)
4269
124k
#define FOUND_CALLED_NODE    1
4270
4271
static int
4272
recursive_call_check_trav(Node* node, ParseEnv* env, int state)
4273
133k
{
4274
133k
  int r = 0;
4275
4276
133k
  switch (ND_TYPE(node)) {
4277
20.8k
  case ND_LIST:
4278
25.6k
  case ND_ALT:
4279
25.6k
    {
4280
25.6k
      int ret;
4281
93.5k
      do {
4282
93.5k
        ret = recursive_call_check_trav(ND_CAR(node), env, state);
4283
93.5k
        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
4284
89.7k
        else if (ret < 0) return ret;
4285
93.5k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4286
25.6k
    }
4287
25.6k
    break;
4288
4289
25.6k
  case ND_QUANT:
4290
13.6k
    r = recursive_call_check_trav(ND_BODY(node), env, state);
4291
13.6k
    if (QUANT_(node)->upper == 0) {
4292
706
      if (r == FOUND_CALLED_NODE)
4293
156
        QUANT_(node)->include_referred = 1;
4294
706
    }
4295
13.6k
    break;
4296
4297
5.27k
  case ND_ANCHOR:
4298
5.27k
    {
4299
5.27k
      AnchorNode* an = ANCHOR_(node);
4300
5.27k
      if (ANCHOR_HAS_BODY(an))
4301
2.46k
        r = recursive_call_check_trav(ND_ANCHOR_BODY(an), env, state);
4302
5.27k
    }
4303
5.27k
    break;
4304
4305
18.6k
  case ND_BAG:
4306
18.6k
    {
4307
18.6k
      int ret;
4308
18.6k
      int state1;
4309
18.6k
      BagNode* en = BAG_(node);
4310
4311
18.6k
      if (en->type == BAG_MEMORY) {
4312
13.9k
        if (ND_IS_CALLED(node)) {
4313
4.70k
          r = FOUND_CALLED_NODE;
4314
4.70k
          goto check_recursion;
4315
4.70k
        }
4316
9.28k
        else if ((state & IN_RECURSION) != 0) {
4317
9.29k
        check_recursion:
4318
9.29k
          if (! ND_IS_RECURSION(node)) {
4319
9.29k
            ND_STATUS_ADD(node, MARK1);
4320
9.29k
            ret = recursive_call_check(ND_BODY(node));
4321
9.29k
            if (ret != 0) {
4322
4.68k
              ND_STATUS_ADD(node, RECURSION);
4323
4.68k
              MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
4324
4.68k
            }
4325
9.29k
            ND_STATUS_REMOVE(node, MARK1);
4326
9.29k
          }
4327
9.29k
        }
4328
13.9k
      }
4329
4330
18.6k
      state1 = state;
4331
18.6k
      if (ND_IS_RECURSION(node))
4332
4.68k
        state1 |= IN_RECURSION;
4333
4334
18.6k
      ret = recursive_call_check_trav(ND_BODY(node), env, state1);
4335
18.6k
      if (ret == FOUND_CALLED_NODE)
4336
1.84k
        r = FOUND_CALLED_NODE;
4337
4338
18.6k
      if (en->type == BAG_IF_ELSE) {
4339
1.49k
        if (IS_NOT_NULL(en->te.Then)) {
4340
658
          ret = recursive_call_check_trav(en->te.Then, env, state1);
4341
658
          if (ret == FOUND_CALLED_NODE)
4342
24
            r = FOUND_CALLED_NODE;
4343
658
        }
4344
1.49k
        if (IS_NOT_NULL(en->te.Else)) {
4345
940
          ret = recursive_call_check_trav(en->te.Else, env, state1);
4346
940
          if (ret == FOUND_CALLED_NODE)
4347
34
            r = FOUND_CALLED_NODE;
4348
940
        }
4349
1.49k
      }
4350
18.6k
    }
4351
0
    break;
4352
4353
69.8k
  default:
4354
69.8k
    break;
4355
133k
  }
4356
4357
133k
  return r;
4358
133k
}
4359
4360
#endif
4361
4362
static void
4363
remove_from_list(Node* prev, Node* a)
4364
6.02k
{
4365
6.02k
  if (ND_CDR(prev) != a) return ;
4366
4367
6.02k
  ND_CDR(prev) = ND_CDR(a);
4368
6.02k
  ND_CDR(a) = NULL_NODE;
4369
6.02k
}
4370
4371
static int
4372
reduce_string_list(Node* node, OnigEncoding enc)
4373
776k
{
4374
776k
  int r = 0;
4375
4376
776k
  switch (ND_TYPE(node)) {
4377
188k
  case ND_LIST:
4378
188k
    {
4379
188k
      Node* prev;
4380
188k
      Node* curr;
4381
188k
      Node* prev_node;
4382
188k
      Node* next_node;
4383
4384
188k
      prev = NULL_NODE;
4385
519k
      do {
4386
519k
        next_node = ND_CDR(node);
4387
519k
        curr = ND_CAR(node);
4388
519k
        if (ND_TYPE(curr) == ND_STRING) {
4389
100k
          if (IS_NULL(prev)
4390
100k
              || STR_(curr)->flag  != STR_(prev)->flag
4391
100k
              || ND_STATUS(curr) != ND_STATUS(prev)) {
4392
94.0k
            prev = curr;
4393
94.0k
            prev_node = node;
4394
94.0k
          }
4395
6.02k
          else {
4396
6.02k
            r = node_str_node_cat(prev, curr);
4397
6.02k
            if (r != 0) return r;
4398
6.02k
            remove_from_list(prev_node, node);
4399
6.02k
            onig_node_free(node);
4400
6.02k
          }
4401
100k
        }
4402
419k
        else {
4403
419k
          if (IS_NOT_NULL(prev)) {
4404
42.8k
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4405
42.8k
            StrNode* sn = STR_(prev);
4406
42.8k
            if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4407
4
              return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4408
42.8k
#endif
4409
42.8k
            prev = NULL_NODE;
4410
42.8k
          }
4411
419k
          r = reduce_string_list(curr, enc);
4412
419k
          if (r != 0) return r;
4413
419k
          prev_node = node;
4414
419k
        }
4415
4416
519k
        node = next_node;
4417
519k
      } while (r == 0 && IS_NOT_NULL(node));
4418
4419
188k
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4420
188k
      if (IS_NOT_NULL(prev)) {
4421
51.1k
        StrNode* sn = STR_(prev);
4422
51.1k
        if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4423
30
          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4424
51.1k
      }
4425
188k
#endif
4426
188k
    }
4427
188k
    break;
4428
4429
188k
  case ND_ALT:
4430
169k
    do {
4431
169k
      r = reduce_string_list(ND_CAR(node), enc);
4432
169k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4433
16.0k
    break;
4434
4435
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4436
78.8k
  case ND_STRING:
4437
78.8k
    {
4438
78.8k
      StrNode* sn = STR_(node);
4439
78.8k
      if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4440
138
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4441
78.8k
    }
4442
78.7k
    break;
4443
78.7k
#endif
4444
4445
78.7k
  case ND_ANCHOR:
4446
29.8k
    if (IS_NULL(ND_BODY(node)))
4447
23.0k
      break;
4448
    /* fall */
4449
85.4k
  case ND_QUANT:
4450
85.4k
    r = reduce_string_list(ND_BODY(node), enc);
4451
85.4k
    break;
4452
4453
56.4k
  case ND_BAG:
4454
56.4k
    {
4455
56.4k
      BagNode* en = BAG_(node);
4456
4457
56.4k
      r = reduce_string_list(ND_BODY(node), enc);
4458
56.4k
      if (r != 0) return r;
4459
4460
56.3k
      if (en->type == BAG_IF_ELSE) {
4461
6.51k
        if (IS_NOT_NULL(en->te.Then)) {
4462
2.78k
          r = reduce_string_list(en->te.Then, enc);
4463
2.78k
          if (r != 0) return r;
4464
2.78k
        }
4465
6.50k
        if (IS_NOT_NULL(en->te.Else)) {
4466
4.22k
          r = reduce_string_list(en->te.Else, enc);
4467
4.22k
          if (r != 0) return r;
4468
4.22k
        }
4469
6.50k
      }
4470
56.3k
    }
4471
56.3k
    break;
4472
4473
328k
  default:
4474
328k
    break;
4475
776k
  }
4476
4477
776k
  return r;
4478
776k
}
4479
4480
4481
261k
#define IN_ALT          (1<<0)
4482
46.4k
#define IN_NOT          (1<<1)
4483
312k
#define IN_REAL_REPEAT  (1<<2)
4484
160k
#define IN_VAR_REPEAT   (1<<3)
4485
22.8k
#define IN_ZERO_REPEAT  (1<<4)
4486
124k
#define IN_MULTI_ENTRY  (1<<5)
4487
87.9k
#define IN_PREC_READ    (1<<6)
4488
164k
#define IN_LOOK_BEHIND  (1<<7)
4489
188k
#define IN_PEEK         (1<<8)
4490
4491
/* divide different length alternatives in look-behind.
4492
  (?<=A|B) ==> (?<=A)|(?<=B)
4493
  (?<!A|B) ==> (?<!A)(?<!B)
4494
*/
4495
static int
4496
divide_look_behind_alternatives(Node* node)
4497
1.38k
{
4498
1.38k
  int r;
4499
1.38k
  int anc_type;
4500
1.38k
  Node *head, *np, *insert_node;
4501
1.38k
  AnchorNode* an;
4502
4503
1.38k
  an = ANCHOR_(node);
4504
1.38k
  anc_type = an->type;
4505
4506
1.38k
  head = ND_ANCHOR_BODY(an);
4507
1.38k
  np = ND_CAR(head);
4508
1.38k
  node_swap(node, head);
4509
1.38k
  ND_CAR(node) = head;
4510
1.38k
  ND_BODY(head) = np;
4511
4512
1.38k
  np = node;
4513
11.2k
  while (IS_NOT_NULL(np = ND_CDR(np))) {
4514
9.86k
    r = onig_node_copy(&insert_node, head);
4515
9.86k
    if (r != 0) return r;
4516
9.86k
    CHECK_NULL_RETURN_MEMERR(insert_node);
4517
9.86k
    ND_BODY(insert_node) = ND_CAR(np);
4518
9.86k
    ND_CAR(np) = insert_node;
4519
9.86k
  }
4520
4521
1.38k
  if (anc_type == ANCR_LOOK_BEHIND_NOT) {
4522
420
    np = node;
4523
5.54k
    do {
4524
5.54k
      ND_SET_TYPE(np, ND_LIST);  /* alt -> list */
4525
5.54k
    } while (IS_NOT_NULL(np = ND_CDR(np)));
4526
420
  }
4527
1.38k
  return 0;
4528
1.38k
}
4529
4530
static int
4531
node_reduce_in_look_behind(Node* node)
4532
23.1k
{
4533
23.1k
  NodeType type;
4534
23.1k
  Node* body;
4535
4536
23.1k
  if (ND_TYPE(node) != ND_QUANT) return 0;
4537
4538
4.86k
  body = ND_BODY(node);
4539
4.86k
  type = ND_TYPE(body);
4540
4.86k
  if (type == ND_STRING || type == ND_CTYPE ||
4541
4.86k
      type == ND_CCLASS || type == ND_BACKREF) {
4542
3.86k
    QuantNode* qn = QUANT_(node);
4543
3.86k
    qn->upper = qn->lower;
4544
3.86k
    if (qn->upper == 0)
4545
1.61k
      return 1; /* removed */
4546
3.86k
  }
4547
4548
3.24k
  return 0;
4549
4.86k
}
4550
4551
static int
4552
list_reduce_in_look_behind(Node* node)
4553
29.7k
{
4554
29.7k
  int r;
4555
4556
29.7k
  switch (ND_TYPE(node)) {
4557
2.46k
  case ND_QUANT:
4558
2.46k
    r = node_reduce_in_look_behind(node);
4559
2.46k
    if (r > 0) r = 0;
4560
2.46k
    break;
4561
4562
19.0k
  case ND_LIST:
4563
20.6k
    do {
4564
20.6k
      r = node_reduce_in_look_behind(ND_CAR(node));
4565
20.6k
      if (r <= 0) break;
4566
20.6k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4567
0
    break;
4568
4569
8.24k
  default:
4570
8.24k
    r = 0;
4571
8.24k
    break;
4572
29.7k
  }
4573
4574
29.7k
  return r;
4575
29.7k
}
4576
4577
static int
4578
alt_reduce_in_look_behind(Node* node, regex_t* reg, ParseEnv* env)
4579
17.1k
{
4580
17.1k
  int r;
4581
4582
17.1k
  switch (ND_TYPE(node)) {
4583
2.06k
  case ND_ALT:
4584
14.7k
    do {
4585
14.7k
      r = list_reduce_in_look_behind(ND_CAR(node));
4586
14.7k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4587
2.06k
    break;
4588
4589
15.0k
  default:
4590
15.0k
    r = list_reduce_in_look_behind(node);
4591
15.0k
    break;
4592
17.1k
  }
4593
4594
17.1k
  return r;
4595
17.1k
}
4596
4597
static int tune_tree(Node* node, regex_t* reg, int state, ParseEnv* env);
4598
4599
static int
4600
tune_look_behind(Node* node, regex_t* reg, int state, ParseEnv* env)
4601
17.1k
{
4602
17.1k
  int r;
4603
17.1k
  int state1;
4604
17.1k
  int used;
4605
17.1k
  MinMaxCharLen ci;
4606
17.1k
  Node* body;
4607
17.1k
  AnchorNode* an = ANCHOR_(node);
4608
4609
17.1k
  used = FALSE;
4610
17.1k
  r = check_node_in_look_behind(ND_ANCHOR_BODY(an),
4611
17.1k
                                an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0,
4612
17.1k
                                &used);
4613
17.1k
  if (r < 0) return r;
4614
17.1k
  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4615
4616
17.1k
  if (an->type == ANCR_LOOK_BEHIND_NOT)
4617
7.18k
    state1 = state | IN_NOT | IN_LOOK_BEHIND;
4618
9.96k
  else
4619
9.96k
    state1 = state | IN_LOOK_BEHIND;
4620
4621
17.1k
  body = ND_ANCHOR_BODY(an);
4622
  /* Execute tune_tree(body) before call node_char_len().
4623
     Because case-fold expansion must be done before node_char_len().
4624
   */
4625
17.1k
  r = tune_tree(body, reg, state1, env);
4626
17.1k
  if (r != 0) return r;
4627
4628
17.1k
  r = alt_reduce_in_look_behind(body, reg, env);
4629
17.1k
  if (r != 0) return r;
4630
4631
17.1k
  r = node_char_len(body, reg, &ci, env);
4632
17.1k
  if (r >= 0) {
4633
    /* #177: overflow in onigenc_step_back() */
4634
17.0k
    if ((ci.max != INFINITE_LEN && ci.max > LOOK_BEHIND_MAX_CHAR_LEN)
4635
17.0k
      || ci.min > LOOK_BEHIND_MAX_CHAR_LEN) {
4636
430
      return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4637
430
    }
4638
4639
16.6k
    if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) {
4640
1.35k
      if (an->type == ANCR_LOOK_BEHIND_NOT)
4641
834
        r = onig_node_reset_fail(node);
4642
522
      else
4643
522
        r = onig_node_reset_empty(node);
4644
4645
1.35k
      return r;
4646
1.35k
    }
4647
4648
15.2k
    if (r == CHAR_LEN_TOP_ALT_FIXED) {
4649
1.40k
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) {
4650
1.38k
        r = divide_look_behind_alternatives(node);
4651
1.38k
        if (r == 0)
4652
1.38k
          r = tune_tree(node, reg, state, env);
4653
1.38k
      }
4654
16
      else if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND))
4655
0
        goto normal;
4656
16
      else
4657
16
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4658
1.40k
    }
4659
13.8k
    else { /* CHAR_LEN_NORMAL */
4660
13.8k
    normal:
4661
13.8k
      if (ci.min == INFINITE_LEN) {
4662
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4663
0
      }
4664
13.8k
      else {
4665
13.8k
        if (ci.min != ci.max &&
4666
13.8k
            ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND)) {
4667
52
          r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4668
52
        }
4669
13.8k
        else {
4670
13.8k
          Node* tail;
4671
4672
          /* check lead_node is already set by double call after
4673
             divide_look_behind_alternatives() */
4674
13.8k
          if (IS_NULL(an->lead_node)) {
4675
12.6k
            an->char_min_len = ci.min;
4676
12.6k
            an->char_max_len = ci.max;
4677
12.6k
            r = get_tree_tail_literal(body, &tail, reg, 0);
4678
12.6k
            if (r == GET_VALUE_FOUND) {
4679
9.87k
              r = onig_node_copy(&(an->lead_node), tail);
4680
9.87k
              if (r != 0) return r;
4681
9.87k
            }
4682
12.6k
          }
4683
13.8k
          r = ONIG_NORMAL;
4684
13.8k
        }
4685
13.8k
      }
4686
13.8k
    }
4687
15.2k
  }
4688
4689
15.3k
  return r;
4690
17.1k
}
4691
4692
static int
4693
tune_next(Node* node, Node* next_node, regex_t* reg)
4694
350k
{
4695
350k
  int called;
4696
350k
  NodeType type;
4697
4698
350k
  called = FALSE;
4699
4700
372k
 retry:
4701
372k
  type = ND_TYPE(node);
4702
372k
  if (type == ND_QUANT) {
4703
42.8k
    QuantNode* qn = QUANT_(node);
4704
42.8k
    if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
4705
28.9k
#ifdef USE_QUANT_PEEK_NEXT
4706
28.9k
      if (called == FALSE) {
4707
28.8k
        Node* n = get_tree_head_literal(next_node, 1, reg);
4708
        /* '\0': for UTF-16BE etc... */
4709
28.8k
        if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
4710
8.69k
          qn->next_head_exact = n;
4711
8.69k
        }
4712
28.8k
      }
4713
28.9k
#endif
4714
      /* automatic posseivation a*b ==> (?>a*)b */
4715
28.9k
      if (qn->lower <= 1) {
4716
28.2k
        if (is_strict_real_node(ND_BODY(node))) {
4717
20.2k
          Node *x, *y;
4718
20.2k
          x = get_tree_head_literal(ND_BODY(node), 0, reg);
4719
20.2k
          if (IS_NOT_NULL(x)) {
4720
16.9k
            y = get_tree_head_literal(next_node,  0, reg);
4721
16.9k
            if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
4722
6.86k
              Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
4723
6.86k
              CHECK_NULL_RETURN_MEMERR(en);
4724
6.86k
              ND_STATUS_ADD(en, STRICT_REAL_REPEAT);
4725
6.86k
              node_swap(node, en);
4726
6.86k
              ND_BODY(node) = en;
4727
6.86k
            }
4728
16.9k
          }
4729
20.2k
        }
4730
28.2k
      }
4731
28.9k
    }
4732
42.8k
  }
4733
329k
  else if (type == ND_BAG) {
4734
31.5k
    BagNode* en = BAG_(node);
4735
31.5k
    if (en->type == BAG_MEMORY) {
4736
21.3k
      if (ND_IS_CALLED(node))
4737
1.81k
        called = TRUE;
4738
21.3k
      node = ND_BODY(node);
4739
21.3k
      goto retry;
4740
21.3k
    }
4741
31.5k
  }
4742
350k
  return 0;
4743
372k
}
4744
4745
4746
static int
4747
is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])
4748
62.8k
{
4749
62.8k
  int i;
4750
4751
123k
  for (i = 0; i < n; i++) {
4752
70.0k
    OnigCaseFoldCodeItem* item = items + i;
4753
70.0k
    if (item->code_len != 1) return 0;
4754
70.0k
  }
4755
4756
53.7k
  return 1;
4757
62.8k
}
4758
4759
static int
4760
get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[],
4761
                                     OnigLen* rmin, OnigLen* rmax)
4762
62.8k
{
4763
62.8k
  int i;
4764
62.8k
  OnigLen len, minlen, maxlen;
4765
4766
62.8k
  minlen = INFINITE_LEN;
4767
62.8k
  maxlen = 0;
4768
171k
  for (i = 0; i < n; i++) {
4769
108k
    OnigCaseFoldCodeItem* item = items + i;
4770
4771
108k
    len = item->byte_len;
4772
108k
    if (len < minlen) minlen = len;
4773
108k
    if (len > maxlen) maxlen = len;
4774
108k
  }
4775
4776
62.8k
  *rmin = minlen;
4777
62.8k
  *rmax = maxlen;
4778
62.8k
  return 0;
4779
62.8k
}
4780
4781
static int
4782
make_code_list_to_string(Node** rnode, OnigEncoding enc,
4783
                         int n, OnigCodePoint codes[])
4784
53.6k
{
4785
53.6k
  int r, i, len;
4786
53.6k
  Node* node;
4787
53.6k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4788
4789
53.6k
  *rnode = NULL_NODE;
4790
53.6k
  node = onig_node_new_str(NULL, NULL);
4791
53.6k
  CHECK_NULL_RETURN_MEMERR(node);
4792
4793
163k
  for (i = 0; i < n; i++) {
4794
109k
    len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
4795
109k
    if (len < 0) {
4796
0
      r = len;
4797
0
      goto err;
4798
0
    }
4799
4800
109k
    r = onig_node_str_cat(node, buf, buf + len);
4801
109k
    if (r != 0) goto err;
4802
109k
  }
4803
4804
53.6k
  *rnode = node;
4805
53.6k
  return 0;
4806
4807
0
 err:
4808
0
  onig_node_free(node);
4809
0
  return r;
4810
53.6k
}
4811
4812
static int
4813
unravel_cf_node_add(Node** rlist, Node* add)
4814
123k
{
4815
123k
  Node *list;
4816
4817
123k
  list = *rlist;
4818
123k
  if (IS_NULL(list)) {
4819
62.5k
    list = onig_node_new_list(add, NULL);
4820
62.5k
    CHECK_NULL_RETURN_MEMERR(list);
4821
62.5k
    *rlist = list;
4822
62.5k
  }
4823
60.6k
  else {
4824
60.6k
    Node* r = node_list_add(list, add);
4825
60.6k
    CHECK_NULL_RETURN_MEMERR(r);
4826
60.6k
  }
4827
4828
123k
  return 0;
4829
123k
}
4830
4831
static int
4832
unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end,
4833
                      unsigned int flag)
4834
106k
{
4835
106k
  int r;
4836
106k
  Node *sn, *list;
4837
4838
106k
  list = *rlist;
4839
106k
  sn   = *rsn;
4840
4841
106k
  if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) {
4842
54.4k
    r = onig_node_str_cat(sn, s, end);
4843
54.4k
  }
4844
51.6k
  else {
4845
51.6k
    sn = onig_node_new_str(s, end);
4846
51.6k
    CHECK_NULL_RETURN_MEMERR(sn);
4847
4848
51.6k
    STR_(sn)->flag = flag;
4849
51.6k
    r = unravel_cf_node_add(&list, sn);
4850
51.6k
  }
4851
4852
106k
  if (r == 0) {
4853
106k
    *rlist = list;
4854
106k
    *rsn = sn;
4855
106k
  }
4856
106k
  return r;
4857
106k
}
4858
4859
static int
4860
unravel_cf_string_alt_or_cc_add(Node** rlist, int n,
4861
            OnigCaseFoldCodeItem items[], OnigEncoding enc,
4862
            OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
4863
62.8k
{
4864
62.8k
  int r, i;
4865
62.8k
  Node* node;
4866
4867
62.8k
  if (is_all_code_len_1_items(n, items)) {
4868
53.7k
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4869
4870
53.7k
    codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end);
4871
108k
    for (i = 0; i < n; i++) {
4872
55.0k
      OnigCaseFoldCodeItem* item = items + i;
4873
55.0k
      codes[i+1] = item->code[0];
4874
55.0k
    }
4875
53.7k
    r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes);
4876
53.7k
    if (r != 0) return r;
4877
53.7k
  }
4878
9.11k
  else {
4879
9.11k
    Node *snode, *alt, *curr;
4880
4881
9.11k
    snode = onig_node_new_str(s, end);
4882
9.11k
    CHECK_NULL_RETURN_MEMERR(snode);
4883
9.11k
    node = curr = onig_node_new_alt(snode, NULL_NODE);
4884
9.11k
    if (IS_NULL(curr)) {
4885
0
      onig_node_free(snode);
4886
0
      return ONIGERR_MEMORY;
4887
0
    }
4888
4889
9.11k
    r = 0;
4890
62.7k
    for (i = 0; i < n; i++) {
4891
53.6k
      OnigCaseFoldCodeItem* item = items + i;
4892
53.6k
      r = make_code_list_to_string(&snode, enc, item->code_len, item->code);
4893
53.6k
      if (r != 0) {
4894
0
        onig_node_free(node);
4895
0
        return r;
4896
0
      }
4897
4898
53.6k
      alt = onig_node_new_alt(snode, NULL_NODE);
4899
53.6k
      if (IS_NULL(alt)) {
4900
0
        onig_node_free(snode);
4901
0
        onig_node_free(node);
4902
0
        return ONIGERR_MEMORY;
4903
0
      }
4904
4905
53.6k
      ND_CDR(curr) = alt;
4906
53.6k
      curr = alt;
4907
53.6k
    }
4908
9.11k
  }
4909
4910
62.8k
  r = unravel_cf_node_add(rlist, node);
4911
62.8k
  if (r != 0) onig_node_free(node);
4912
62.8k
  return r;
4913
62.8k
}
4914
4915
static int
4916
unravel_cf_look_behind_add(Node** rlist, Node** rsn,
4917
                int n, OnigCaseFoldCodeItem items[], OnigEncoding enc,
4918
                UChar* s, OnigLen one_len)
4919
9.60k
{
4920
9.60k
  int r, i, found;
4921
4922
9.60k
  found = FALSE;
4923
12.9k
  for (i = 0; i < n; i++) {
4924
12.1k
    OnigCaseFoldCodeItem* item = items + i;
4925
12.1k
    if (item->byte_len == one_len) {
4926
12.1k
      if (item->code_len == 1) {
4927
8.72k
        found = TRUE;
4928
8.72k
        break;
4929
8.72k
      }
4930
12.1k
    }
4931
12.1k
  }
4932
4933
9.60k
  if (found == FALSE) {
4934
878
    r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */);
4935
878
  }
4936
8.72k
  else {
4937
8.72k
    Node* node;
4938
8.72k
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4939
4940
8.72k
    found = 0;
4941
8.72k
    codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len);
4942
18.0k
    for (i = 0; i < n; i++) {
4943
9.30k
      OnigCaseFoldCodeItem* item = items + i;
4944
9.30k
      if (item->byte_len == one_len) {
4945
9.30k
        if (item->code_len == 1) {
4946
9.23k
          codes[found++] = item->code[0];
4947
9.23k
        }
4948
9.30k
      }
4949
9.30k
    }
4950
8.72k
    r = onig_new_cclass_with_code_list(&node, enc, found, codes);
4951
8.72k
    if (r != 0) return r;
4952
4953
8.72k
    r = unravel_cf_node_add(rlist, node);
4954
8.72k
    if (r != 0) onig_node_free(node);
4955
4956
8.72k
    *rsn = NULL_NODE;
4957
8.72k
  }
4958
4959
9.60k
  return r;
4960
9.60k
}
4961
4962
static int
4963
unravel_case_fold_string(Node* node, regex_t* reg, int state)
4964
62.6k
{
4965
62.6k
  int r, n, in_look_behind;
4966
62.6k
  OnigLen min_len, max_len, one_len;
4967
62.6k
  UChar *start, *end, *p, *q;
4968
62.6k
  StrNode* snode;
4969
62.6k
  Node *sn, *list;
4970
62.6k
  OnigEncoding enc;
4971
62.6k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4972
4973
62.6k
  if (ND_STRING_IS_CASE_EXPANDED(node)) return 0;
4974
4975
62.6k
  ND_STATUS_REMOVE(node, IGNORECASE);
4976
62.6k
  snode = STR_(node);
4977
62.6k
  start = snode->s;
4978
62.6k
  end   = snode->end;
4979
62.6k
  if (start >= end) return 0;
4980
4981
62.5k
  in_look_behind = (state & IN_LOOK_BEHIND) != 0;
4982
62.5k
  enc = reg->enc;
4983
4984
62.5k
  list = sn = NULL_NODE;
4985
62.5k
  p = start;
4986
240k
  while (p < end) {
4987
177k
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end,
4988
177k
                                           items);
4989
177k
    if (n < 0) {
4990
0
      r = n;
4991
0
      goto err;
4992
0
    }
4993
4994
177k
    one_len = (OnigLen )enclen(enc, p);
4995
177k
    if (n == 0) {
4996
105k
      q = p + one_len;
4997
105k
      if (q > end) q = end;
4998
105k
      r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */);
4999
105k
      if (r != 0) goto err;
5000
105k
    }
5001
72.4k
    else {
5002
72.4k
      if (in_look_behind != 0) {
5003
9.60k
        q = p + one_len;
5004
9.60k
        if (items[0].byte_len != one_len) {
5005
1.86k
          r = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, q,
5006
1.86k
                                                 items);
5007
1.86k
          if (r < 0) goto err;
5008
1.86k
          n = r;
5009
1.86k
        }
5010
9.60k
        r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len);
5011
9.60k
        if (r != 0) goto err;
5012
9.60k
      }
5013
62.8k
      else {
5014
62.8k
        get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len);
5015
62.8k
        if (min_len != max_len) {
5016
0
          r = ONIGERR_PARSER_BUG;
5017
0
          goto err;
5018
0
        }
5019
5020
62.8k
        q = p + max_len;
5021
62.8k
        r = unravel_cf_string_alt_or_cc_add(&list, n, items, enc,
5022
62.8k
                                            reg->case_fold_flag, p, q);
5023
62.8k
        if (r != 0) goto err;
5024
62.8k
        sn = NULL_NODE;
5025
62.8k
      }
5026
72.4k
    }
5027
5028
177k
    p = q;
5029
177k
  }
5030
5031
62.5k
  if (IS_NOT_NULL(list)) {
5032
62.5k
    if (node_list_len(list) == 1) {
5033
46.9k
      node_swap(node, ND_CAR(list));
5034
46.9k
    }
5035
15.5k
    else {
5036
15.5k
      node_swap(node, list);
5037
15.5k
    }
5038
62.5k
    onig_node_free(list);
5039
62.5k
  }
5040
0
  else {
5041
0
    node_swap(node, sn);
5042
0
    onig_node_free(sn);
5043
0
  }
5044
62.5k
  return 0;
5045
5046
0
 err:
5047
0
  if (IS_NOT_NULL(list))
5048
0
    onig_node_free(list);
5049
0
  else if (IS_NOT_NULL(sn))
5050
0
    onig_node_free(sn);
5051
5052
0
  return r;
5053
62.5k
}
5054
5055
#ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
5056
static enum BodyEmptyType
5057
quantifiers_memory_node_info(Node* node)
5058
74.2k
{
5059
74.2k
  int r = BODY_MAY_BE_EMPTY;
5060
5061
74.2k
  switch (ND_TYPE(node)) {
5062
7.84k
  case ND_LIST:
5063
11.3k
  case ND_ALT:
5064
11.3k
    {
5065
11.3k
      int v;
5066
33.7k
      do {
5067
33.7k
        v = quantifiers_memory_node_info(ND_CAR(node));
5068
33.7k
        if (v > r) r = v;
5069
33.7k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
5070
11.3k
    }
5071
11.3k
    break;
5072
5073
0
#ifdef USE_CALL
5074
1.10k
  case ND_CALL:
5075
1.10k
    if (ND_IS_RECURSION(node)) {
5076
782
      return BODY_MAY_BE_EMPTY_REC; /* tiny version */
5077
782
    }
5078
320
    else
5079
320
      r = quantifiers_memory_node_info(ND_BODY(node));
5080
320
    break;
5081
320
#endif
5082
5083
16.9k
  case ND_QUANT:
5084
16.9k
    {
5085
16.9k
      QuantNode* qn = QUANT_(node);
5086
16.9k
      if (qn->upper != 0) {
5087
16.1k
        r = quantifiers_memory_node_info(ND_BODY(node));
5088
16.1k
      }
5089
16.9k
    }
5090
16.9k
    break;
5091
5092
14.2k
  case ND_BAG:
5093
14.2k
    {
5094
14.2k
      BagNode* en = BAG_(node);
5095
14.2k
      switch (en->type) {
5096
6.89k
      case BAG_MEMORY:
5097
6.89k
        if (ND_IS_RECURSION(node)) {
5098
1.02k
          return BODY_MAY_BE_EMPTY_REC;
5099
1.02k
        }
5100
5.87k
        return BODY_MAY_BE_EMPTY_MEM;
5101
0
        break;
5102
5103
354
      case BAG_OPTION:
5104
5.91k
      case BAG_STOP_BACKTRACK:
5105
5.91k
        r = quantifiers_memory_node_info(ND_BODY(node));
5106
5.91k
        break;
5107
1.40k
      case BAG_IF_ELSE:
5108
1.40k
        {
5109
1.40k
          int v;
5110
1.40k
          r = quantifiers_memory_node_info(ND_BODY(node));
5111
1.40k
          if (IS_NOT_NULL(en->te.Then)) {
5112
894
            v = quantifiers_memory_node_info(en->te.Then);
5113
894
            if (v > r) r = v;
5114
894
          }
5115
1.40k
          if (IS_NOT_NULL(en->te.Else)) {
5116
624
            v = quantifiers_memory_node_info(en->te.Else);
5117
624
            if (v > r) r = v;
5118
624
          }
5119
1.40k
        }
5120
1.40k
        break;
5121
14.2k
      }
5122
14.2k
    }
5123
7.32k
    break;
5124
5125
7.32k
  case ND_BACKREF:
5126
12.4k
  case ND_STRING:
5127
16.9k
  case ND_CTYPE:
5128
19.0k
  case ND_CCLASS:
5129
20.0k
  case ND_ANCHOR:
5130
30.7k
  case ND_GIMMICK:
5131
30.7k
  default:
5132
30.7k
    break;
5133
74.2k
  }
5134
5135
66.6k
  return r;
5136
74.2k
}
5137
#endif /* USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT */
5138
5139
5140
#ifdef USE_CALL
5141
5142
#ifdef __GNUC__
5143
__inline
5144
#endif
5145
static int
5146
check_call_reference(CallNode* cn, ParseEnv* env, int state)
5147
8.11k
{
5148
8.11k
  MemEnv* mem_env = PARSEENV_MEMENV(env);
5149
5150
8.11k
  if (cn->by_number != 0) {
5151
8.02k
    int gnum = cn->called_gnum;
5152
5153
8.02k
    if (env->num_named > 0 &&
5154
8.02k
        IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5155
8.02k
        ! OPTON_CAPTURE_GROUP(env->options)) {
5156
8
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
5157
8
    }
5158
5159
8.01k
    if (gnum > env->num_mem) {
5160
322
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
5161
322
                                     cn->name, cn->name_end);
5162
322
      return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5163
322
    }
5164
5165
7.73k
  set_call_attr:
5166
7.73k
    ND_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node;
5167
7.73k
    if (IS_NULL(ND_CALL_BODY(cn))) {
5168
0
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5169
0
                                     cn->name, cn->name_end);
5170
0
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5171
0
    }
5172
5173
7.73k
    ND_STATUS_ADD(ND_CALL_BODY(cn), REFERENCED);
5174
7.73k
  }
5175
88
  else {
5176
88
    int *refs;
5177
5178
88
    int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
5179
88
    if (n <= 0) {
5180
38
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5181
38
                                     cn->name, cn->name_end);
5182
38
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5183
38
    }
5184
50
    else if (n > 1) {
5185
4
      onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
5186
4
                                     cn->name, cn->name_end);
5187
4
      return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
5188
4
    }
5189
46
    else {
5190
46
      cn->called_gnum = refs[0];
5191
46
      goto set_call_attr;
5192
46
    }
5193
88
  }
5194
5195
7.73k
  return 0;
5196
8.11k
}
5197
5198
#ifdef USE_WHOLE_OPTIONS
5199
static int
5200
check_whole_options_position(Node* node /* root */)
5201
20
{
5202
20
  int is_list;
5203
5204
20
  is_list = FALSE;
5205
5206
30
 start:
5207
30
  switch (ND_TYPE(node)) {
5208
10
  case ND_LIST:
5209
10
    if (IS_NOT_NULL(ND_CDR(node)))
5210
10
      is_list = TRUE;
5211
5212
10
    node = ND_CAR(node);
5213
10
    goto start;
5214
0
    break;
5215
5216
16
  case ND_BAG:
5217
16
    {
5218
16
      BagNode* en = BAG_(node);
5219
5220
16
      if (en->type == BAG_OPTION) {
5221
14
        if (ND_IS_WHOLE_OPTIONS(node)) {
5222
12
          if (is_list == TRUE && IS_NOT_NULL(ND_BODY(node)))
5223
2
            break;
5224
5225
10
          return 0;
5226
12
        }
5227
14
      }
5228
16
    }
5229
4
    break;
5230
5231
4
  default:
5232
4
    break;
5233
30
  }
5234
5235
10
  return ONIGERR_INVALID_GROUP_OPTION;
5236
30
}
5237
#endif
5238
5239
static void
5240
tune_call2_call(Node* node)
5241
675k
{
5242
675k
  switch (ND_TYPE(node)) {
5243
81.7k
  case ND_LIST:
5244
104k
  case ND_ALT:
5245
417k
    do {
5246
417k
      tune_call2_call(ND_CAR(node));
5247
417k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5248
104k
    break;
5249
5250
43.2k
  case ND_QUANT:
5251
43.2k
    tune_call2_call(ND_BODY(node));
5252
43.2k
    break;
5253
5254
20.5k
  case ND_ANCHOR:
5255
20.5k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5256
10.7k
      tune_call2_call(ND_BODY(node));
5257
20.5k
    break;
5258
5259
171k
  case ND_BAG:
5260
171k
    {
5261
171k
      BagNode* en = BAG_(node);
5262
5263
171k
      if (en->type == BAG_MEMORY) {
5264
157k
        if (! ND_IS_MARK1(node)) {
5265
78.6k
          ND_STATUS_ADD(node, MARK1);
5266
78.6k
          tune_call2_call(ND_BODY(node));
5267
78.6k
          ND_STATUS_REMOVE(node, MARK1);
5268
78.6k
        }
5269
157k
      }
5270
13.8k
      else if (en->type == BAG_IF_ELSE) {
5271
6.42k
        tune_call2_call(ND_BODY(node));
5272
6.42k
        if (IS_NOT_NULL(en->te.Then))
5273
3.49k
          tune_call2_call(en->te.Then);
5274
6.42k
        if (IS_NOT_NULL(en->te.Else))
5275
3.69k
          tune_call2_call(en->te.Else);
5276
6.42k
      }
5277
7.46k
      else {
5278
7.46k
        tune_call2_call(ND_BODY(node));
5279
7.46k
      }
5280
171k
    }
5281
171k
    break;
5282
5283
110k
  case ND_CALL:
5284
110k
    if (! ND_IS_MARK1(node)) {
5285
97.3k
      ND_STATUS_ADD(node, MARK1);
5286
97.3k
      {
5287
97.3k
        CallNode* cn = CALL_(node);
5288
97.3k
        Node* called = ND_CALL_BODY(cn);
5289
5290
97.3k
        cn->entry_count++;
5291
5292
97.3k
        ND_STATUS_ADD(called, CALLED);
5293
97.3k
        BAG_(called)->m.entry_count++;
5294
97.3k
        tune_call2_call(called);
5295
97.3k
      }
5296
97.3k
      ND_STATUS_REMOVE(node, MARK1);
5297
97.3k
    }
5298
110k
    break;
5299
5300
224k
  default:
5301
224k
    break;
5302
675k
  }
5303
675k
}
5304
5305
static int
5306
tune_call(Node* node, ParseEnv* env, int state)
5307
134k
{
5308
134k
  int r;
5309
5310
134k
  switch (ND_TYPE(node)) {
5311
21.1k
  case ND_LIST:
5312
26.0k
  case ND_ALT:
5313
94.6k
    do {
5314
94.6k
      r = tune_call(ND_CAR(node), env, state);
5315
94.6k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5316
26.0k
    break;
5317
5318
13.7k
  case ND_QUANT:
5319
13.7k
    if (QUANT_(node)->upper == 0)
5320
706
      state |= IN_ZERO_REPEAT;
5321
5322
13.7k
    r = tune_call(ND_BODY(node), env, state);
5323
13.7k
    break;
5324
5325
5.28k
  case ND_ANCHOR:
5326
5.28k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5327
2.46k
      r = tune_call(ND_BODY(node), env, state);
5328
2.81k
    else
5329
2.81k
      r = 0;
5330
5.28k
    break;
5331
5332
18.7k
  case ND_BAG:
5333
18.7k
    {
5334
18.7k
      BagNode* en = BAG_(node);
5335
5336
18.7k
      if (en->type == BAG_MEMORY) {
5337
14.0k
        if ((state & IN_ZERO_REPEAT) != 0) {
5338
396
          ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5339
396
          BAG_(node)->m.entry_count--;
5340
396
        }
5341
14.0k
        r = tune_call(ND_BODY(node), env, state);
5342
14.0k
      }
5343
4.70k
      else if (en->type == BAG_IF_ELSE) {
5344
1.51k
        r = tune_call(ND_BODY(node), env, state);
5345
1.51k
        if (r != 0) return r;
5346
1.50k
        if (IS_NOT_NULL(en->te.Then)) {
5347
666
          r = tune_call(en->te.Then, env, state);
5348
666
          if (r != 0) return r;
5349
666
        }
5350
1.49k
        if (IS_NOT_NULL(en->te.Else))
5351
940
          r = tune_call(en->te.Else, env, state);
5352
1.49k
      }
5353
3.18k
      else
5354
3.18k
        r = tune_call(ND_BODY(node), env, state);
5355
18.7k
    }
5356
18.6k
    break;
5357
5358
18.6k
  case ND_CALL:
5359
8.11k
    if ((state & IN_ZERO_REPEAT) != 0) {
5360
494
      ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5361
494
      CALL_(node)->entry_count--;
5362
494
    }
5363
5364
8.11k
    r = check_call_reference(CALL_(node), env, state);
5365
8.11k
    break;
5366
5367
62.7k
  default:
5368
62.7k
    r = 0;
5369
62.7k
    break;
5370
134k
  }
5371
5372
134k
  return r;
5373
134k
}
5374
5375
static int
5376
tune_call2(Node* node)
5377
129k
{
5378
129k
  int r = 0;
5379
5380
129k
  switch (ND_TYPE(node)) {
5381
20.4k
  case ND_LIST:
5382
25.1k
  case ND_ALT:
5383
91.8k
    do {
5384
91.8k
      r = tune_call2(ND_CAR(node));
5385
91.8k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5386
25.1k
    break;
5387
5388
13.2k
  case ND_QUANT:
5389
13.2k
    if (QUANT_(node)->upper != 0)
5390
12.6k
      r = tune_call2(ND_BODY(node));
5391
13.2k
    break;
5392
5393
5.05k
  case ND_ANCHOR:
5394
5.05k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5395
2.32k
      r = tune_call2(ND_BODY(node));
5396
5.05k
    break;
5397
5398
18.0k
  case ND_BAG:
5399
18.0k
    if (! ND_IS_IN_ZERO_REPEAT(node))
5400
18.0k
      r = tune_call2(ND_BODY(node));
5401
5402
18.0k
    {
5403
18.0k
      BagNode* en = BAG_(node);
5404
5405
18.0k
      if (r != 0) return r;
5406
18.0k
      if (en->type == BAG_IF_ELSE) {
5407
1.45k
        if (IS_NOT_NULL(en->te.Then)) {
5408
624
          r = tune_call2(en->te.Then);
5409
624
          if (r != 0) return r;
5410
624
        }
5411
1.45k
        if (IS_NOT_NULL(en->te.Else))
5412
936
          r = tune_call2(en->te.Else);
5413
1.45k
      }
5414
18.0k
    }
5415
18.0k
    break;
5416
5417
18.0k
  case ND_CALL:
5418
7.23k
    if (! ND_IS_IN_ZERO_REPEAT(node)) {
5419
7.23k
      tune_call2_call(node);
5420
7.23k
    }
5421
7.23k
    break;
5422
5423
60.8k
  default:
5424
60.8k
    break;
5425
129k
  }
5426
5427
129k
  return r;
5428
129k
}
5429
5430
5431
static void
5432
tune_called_state_call(Node* node, int state)
5433
682k
{
5434
682k
  switch (ND_TYPE(node)) {
5435
22.4k
  case ND_ALT:
5436
22.4k
    state |= IN_ALT;
5437
    /* fall */
5438
102k
  case ND_LIST:
5439
410k
    do {
5440
410k
      tune_called_state_call(ND_CAR(node), state);
5441
410k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5442
102k
    break;
5443
5444
45.7k
  case ND_QUANT:
5445
45.7k
    {
5446
45.7k
      QuantNode* qn = QUANT_(node);
5447
5448
45.7k
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5449
31.5k
        state |= IN_REAL_REPEAT;
5450
45.7k
      if (qn->lower != qn->upper)
5451
41.9k
        state |= IN_VAR_REPEAT;
5452
45.7k
      if ((state & IN_PEEK) != 0)
5453
14.7k
        ND_STATUS_ADD(node, INPEEK);
5454
5455
45.7k
      tune_called_state_call(ND_QUANT_BODY(qn), state);
5456
45.7k
    }
5457
45.7k
    break;
5458
5459
21.1k
  case ND_ANCHOR:
5460
21.1k
    {
5461
21.1k
      AnchorNode* an = ANCHOR_(node);
5462
5463
21.1k
      switch (an->type) {
5464
2.25k
      case ANCR_PREC_READ_NOT:
5465
3.12k
      case ANCR_LOOK_BEHIND_NOT:
5466
3.12k
        state |= (IN_NOT | IN_PEEK);
5467
3.12k
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5468
3.12k
        break;
5469
6.51k
      case ANCR_PREC_READ:
5470
7.77k
      case ANCR_LOOK_BEHIND:
5471
7.77k
        state |= IN_PEEK;
5472
7.77k
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5473
7.77k
        break;
5474
10.2k
      default:
5475
10.2k
        break;
5476
21.1k
      }
5477
21.1k
    }
5478
21.1k
    break;
5479
5480
182k
  case ND_BAG:
5481
182k
    {
5482
182k
      BagNode* en = BAG_(node);
5483
5484
182k
      if (en->type == BAG_MEMORY) {
5485
167k
        if (ND_IS_MARK1(node)) {
5486
90.5k
          if ((~en->m.called_state & state) != 0) {
5487
442
            en->m.called_state |= state;
5488
442
            tune_called_state_call(ND_BODY(node), state);
5489
442
          }
5490
90.5k
        }
5491
77.3k
        else {
5492
77.3k
          ND_STATUS_ADD(node, MARK1);
5493
77.3k
          en->m.called_state |= state;
5494
77.3k
          tune_called_state_call(ND_BODY(node), state);
5495
77.3k
          ND_STATUS_REMOVE(node, MARK1);
5496
77.3k
        }
5497
167k
      }
5498
14.4k
      else if (en->type == BAG_IF_ELSE) {
5499
6.26k
        state |= IN_ALT;
5500
6.26k
        tune_called_state_call(ND_BODY(node), state);
5501
6.26k
        if (IS_NOT_NULL(en->te.Then)) {
5502
3.45k
          tune_called_state_call(en->te.Then, state);
5503
3.45k
        }
5504
6.26k
        if (IS_NOT_NULL(en->te.Else))
5505
3.57k
          tune_called_state_call(en->te.Else, state);
5506
6.26k
      }
5507
8.18k
      else {
5508
8.18k
        tune_called_state_call(ND_BODY(node), state);
5509
8.18k
      }
5510
182k
    }
5511
182k
    break;
5512
5513
108k
  case ND_CALL:
5514
108k
    if ((state & IN_PEEK) != 0)
5515
28.5k
      ND_STATUS_ADD(node, INPEEK);
5516
108k
    if ((state & IN_REAL_REPEAT) != 0)
5517
66.6k
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5518
5519
108k
    tune_called_state_call(ND_BODY(node), state);
5520
108k
    break;
5521
5522
221k
  default:
5523
221k
    break;
5524
682k
  }
5525
682k
}
5526
5527
static void
5528
tune_called_state(Node* node, int state)
5529
124k
{
5530
124k
  switch (ND_TYPE(node)) {
5531
4.59k
  case ND_ALT:
5532
4.59k
    state |= IN_ALT;
5533
    /* fall */
5534
23.2k
  case ND_LIST:
5535
86.4k
    do {
5536
86.4k
      tune_called_state(ND_CAR(node), state);
5537
86.4k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5538
23.2k
    break;
5539
5540
0
#ifdef USE_CALL
5541
7.31k
  case ND_CALL:
5542
7.31k
    if ((state & IN_PEEK) != 0)
5543
1.46k
      ND_STATUS_ADD(node, INPEEK);
5544
7.31k
    if ((state & IN_REAL_REPEAT) != 0)
5545
3.26k
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5546
5547
7.31k
    tune_called_state_call(node, state);
5548
7.31k
    break;
5549
0
#endif
5550
5551
17.8k
  case ND_BAG:
5552
17.8k
    {
5553
17.8k
      BagNode* en = BAG_(node);
5554
5555
17.8k
      switch (en->type) {
5556
13.5k
      case BAG_MEMORY:
5557
13.5k
        if (en->m.entry_count > 1)
5558
4.35k
          state |= IN_MULTI_ENTRY;
5559
5560
13.5k
        en->m.called_state |= state;
5561
        /* fall */
5562
13.6k
      case BAG_OPTION:
5563
16.5k
      case BAG_STOP_BACKTRACK:
5564
16.5k
        tune_called_state(ND_BODY(node), state);
5565
16.5k
        break;
5566
1.35k
      case BAG_IF_ELSE:
5567
1.35k
        state |= IN_ALT;
5568
1.35k
        tune_called_state(ND_BODY(node), state);
5569
1.35k
        if (IS_NOT_NULL(en->te.Then))
5570
594
          tune_called_state(en->te.Then, state);
5571
1.35k
        if (IS_NOT_NULL(en->te.Else))
5572
858
          tune_called_state(en->te.Else, state);
5573
1.35k
        break;
5574
17.8k
      }
5575
17.8k
    }
5576
17.8k
    break;
5577
5578
17.8k
  case ND_QUANT:
5579
13.3k
    {
5580
13.3k
      QuantNode* qn = QUANT_(node);
5581
5582
13.3k
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5583
9.51k
        state |= IN_REAL_REPEAT;
5584
13.3k
      if (qn->lower != qn->upper)
5585
11.7k
        state |= IN_VAR_REPEAT;
5586
13.3k
      if ((state & IN_PEEK) != 0)
5587
2.28k
        ND_STATUS_ADD(node, INPEEK);
5588
5589
13.3k
      tune_called_state(ND_QUANT_BODY(qn), state);
5590
13.3k
    }
5591
13.3k
    break;
5592
5593
5.18k
  case ND_ANCHOR:
5594
5.18k
    {
5595
5.18k
      AnchorNode* an = ANCHOR_(node);
5596
5597
5.18k
      switch (an->type) {
5598
264
      case ANCR_PREC_READ_NOT:
5599
686
      case ANCR_LOOK_BEHIND_NOT:
5600
686
        state |= (IN_NOT | IN_PEEK);
5601
686
        tune_called_state(ND_ANCHOR_BODY(an), state);
5602
686
        break;
5603
1.44k
      case ANCR_PREC_READ:
5604
1.77k
      case ANCR_LOOK_BEHIND:
5605
1.77k
        state |= IN_PEEK;
5606
1.77k
        tune_called_state(ND_ANCHOR_BODY(an), state);
5607
1.77k
        break;
5608
2.72k
      default:
5609
2.72k
        break;
5610
5.18k
      }
5611
5.18k
    }
5612
5.18k
    break;
5613
5614
5.18k
  case ND_BACKREF:
5615
33.4k
  case ND_STRING:
5616
41.6k
  case ND_CTYPE:
5617
51.4k
  case ND_CCLASS:
5618
57.4k
  case ND_GIMMICK:
5619
57.4k
  default:
5620
57.4k
    break;
5621
124k
  }
5622
124k
}
5623
5624
#endif  /* USE_CALL */
5625
5626
5627
#ifdef __GNUC__
5628
__inline
5629
#endif
5630
static int
5631
tune_anchor(Node* node, regex_t* reg, int state, ParseEnv* env)
5632
44.2k
{
5633
44.2k
  int r;
5634
44.2k
  AnchorNode* an = ANCHOR_(node);
5635
5636
44.2k
  switch (an->type) {
5637
2.13k
  case ANCR_PREC_READ:
5638
2.13k
    r = tune_tree(ND_ANCHOR_BODY(an), reg, (state | IN_PREC_READ), env);
5639
2.13k
    break;
5640
616
  case ANCR_PREC_READ_NOT:
5641
616
    r = tune_tree(ND_ANCHOR_BODY(an), reg, (state | IN_PREC_READ | IN_NOT),
5642
616
                  env);
5643
616
    break;
5644
5645
9.98k
  case ANCR_LOOK_BEHIND:
5646
17.1k
  case ANCR_LOOK_BEHIND_NOT:
5647
17.1k
    r = tune_look_behind(node, reg, state, env);
5648
17.1k
    break;
5649
5650
24.2k
  default:
5651
24.2k
    r = 0;
5652
24.2k
    break;
5653
44.2k
  }
5654
5655
44.2k
  return r;
5656
44.2k
}
5657
5658
#ifdef __GNUC__
5659
__inline
5660
#endif
5661
static int
5662
tune_quant(Node* node, regex_t* reg, int state, ParseEnv* env)
5663
85.2k
{
5664
85.2k
  int r;
5665
85.2k
  QuantNode* qn = QUANT_(node);
5666
85.2k
  Node* body = ND_BODY(node);
5667
5668
85.2k
  if ((state & IN_REAL_REPEAT) != 0) {
5669
26.3k
    ND_STATUS_ADD(node, IN_REAL_REPEAT);
5670
26.3k
  }
5671
85.2k
  if ((state & IN_MULTI_ENTRY) != 0) {
5672
10.8k
    ND_STATUS_ADD(node, IN_MULTI_ENTRY);
5673
10.8k
  }
5674
5675
85.2k
  if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
5676
83.1k
    OnigLen d = node_min_byte_len(body, env);
5677
83.1k
    if (d == 0) {
5678
15.1k
#ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
5679
15.1k
      qn->emptiness = quantifiers_memory_node_info(body);
5680
#else
5681
      qn->emptiness = BODY_MAY_BE_EMPTY;
5682
#endif
5683
15.1k
    }
5684
83.1k
  }
5685
5686
85.2k
  if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5687
70.3k
    state |= IN_REAL_REPEAT;
5688
85.2k
  if (qn->lower != qn->upper)
5689
71.6k
    state |= IN_VAR_REPEAT;
5690
5691
85.2k
  r = tune_tree(body, reg, state, env);
5692
85.2k
  if (r != 0) return r;
5693
5694
  /* expand string */
5695
85.0k
#define EXPAND_STRING_MAX_LENGTH  100
5696
85.0k
  if (ND_TYPE(body) == ND_STRING) {
5697
23.5k
    if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
5698
23.5k
        qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
5699
1.00k
      int len = ND_STRING_LEN(body);
5700
5701
1.00k
      if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
5702
902
        int i, n = qn->lower;
5703
902
        node_conv_to_str_node(node, body);
5704
25.8k
        for (i = 0; i < n; i++) {
5705
24.9k
          r = node_str_node_cat(node, body);
5706
24.9k
          if (r != 0) return r;
5707
24.9k
        }
5708
902
        onig_node_free(body);
5709
902
        return r;
5710
902
      }
5711
1.00k
    }
5712
23.5k
  }
5713
5714
84.1k
  if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
5715
65.5k
    if (ND_TYPE(body) == ND_QUANT) {
5716
4.12k
      QuantNode* tqn = QUANT_(body);
5717
4.12k
      if (IS_NOT_NULL(tqn->head_exact)) {
5718
1.62k
        qn->head_exact  = tqn->head_exact;
5719
1.62k
        tqn->head_exact = NULL;
5720
1.62k
      }
5721
4.12k
    }
5722
61.3k
    else {
5723
61.3k
      qn->head_exact = get_tree_head_literal(ND_BODY(node), 1, reg);
5724
61.3k
    }
5725
65.5k
  }
5726
5727
84.1k
  return r;
5728
85.0k
}
5729
5730
/* tune_tree does the following work.
5731
 1. check empty loop. (set qn->emptiness)
5732
 2. expand ignore-case in char class.
5733
 3. set memory status bit flags. (reg->mem_stats)
5734
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
5735
 5. find invalid patterns in look-behind.
5736
 6. expand repeated string.
5737
 */
5738
static int
5739
tune_tree(Node* node, regex_t* reg, int state, ParseEnv* env)
5740
939k
{
5741
939k
  int r = 0;
5742
5743
939k
  switch (ND_TYPE(node)) {
5744
200k
  case ND_LIST:
5745
200k
    {
5746
200k
      Node* prev = NULL_NODE;
5747
551k
      do {
5748
551k
        r = tune_tree(ND_CAR(node), reg, state, env);
5749
551k
        if (IS_NOT_NULL(prev) && r == 0) {
5750
350k
          r = tune_next(prev, ND_CAR(node), reg);
5751
350k
        }
5752
551k
        prev = ND_CAR(node);
5753
551k
      } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5754
200k
    }
5755
200k
    break;
5756
5757
17.3k
  case ND_ALT:
5758
177k
    do {
5759
177k
      r = tune_tree(ND_CAR(node), reg, (state | IN_ALT), env);
5760
177k
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5761
17.3k
    break;
5762
5763
183k
  case ND_STRING:
5764
183k
    if (ND_IS_REAL_IGNORECASE(node)) {
5765
62.6k
      r = unravel_case_fold_string(node, reg, state);
5766
62.6k
    }
5767
183k
    break;
5768
5769
11.3k
  case ND_BACKREF:
5770
11.3k
    {
5771
11.3k
      int i;
5772
11.3k
      int* p;
5773
11.3k
      BackRefNode* br = BACKREF_(node);
5774
11.3k
      p = BACKREFS_P(br);
5775
28.9k
      for (i = 0; i < br->back_num; i++) {
5776
17.5k
        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
5777
17.5k
        MEM_STATUS_ON(env->backrefed_mem, p[i]);
5778
#if 0
5779
#ifdef USE_BACKREF_WITH_LEVEL
5780
        if (ND_IS_NEST_LEVEL(node)) {
5781
          MEM_STATUS_ON(env->backtrack_mem, p[i]);
5782
        }
5783
#endif
5784
#else
5785
        /* More precisely, it should be checked whether alt/repeat exists before
5786
           the subject capture node, and then this backreference position
5787
           exists before (or in) the capture node. */
5788
17.5k
        MEM_STATUS_ON(env->backtrack_mem, p[i]);
5789
17.5k
#endif
5790
17.5k
      }
5791
11.3k
    }
5792
11.3k
    break;
5793
5794
58.1k
  case ND_BAG:
5795
58.1k
    {
5796
58.1k
      BagNode* en = BAG_(node);
5797
5798
58.1k
      switch (en->type) {
5799
506
      case BAG_OPTION:
5800
506
        {
5801
506
          OnigOptionType options = reg->options;
5802
506
          reg->options = BAG_(node)->o.options;
5803
506
          r = tune_tree(ND_BODY(node), reg, state, env);
5804
506
          reg->options = options;
5805
506
        }
5806
506
        break;
5807
5808
34.8k
      case BAG_MEMORY:
5809
34.8k
#ifdef USE_CALL
5810
34.8k
        state |= en->m.called_state;
5811
34.8k
#endif
5812
5813
34.8k
        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
5814
34.8k
            || ND_IS_RECURSION(node)) {
5815
24.5k
          MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
5816
24.5k
        }
5817
34.8k
        r = tune_tree(ND_BODY(node), reg, state, env);
5818
34.8k
        break;
5819
5820
15.9k
      case BAG_STOP_BACKTRACK:
5821
15.9k
        {
5822
15.9k
          Node* target = ND_BODY(node);
5823
15.9k
          r = tune_tree(target, reg, state, env);
5824
15.9k
          if (ND_TYPE(target) == ND_QUANT) {
5825
9.22k
            QuantNode* tqn = QUANT_(target);
5826
9.22k
            if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
5827
9.22k
                tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
5828
6.94k
              if (is_strict_real_node(ND_BODY(target)))
5829
1.92k
                ND_STATUS_ADD(node, STRICT_REAL_REPEAT);
5830
6.94k
            }
5831
9.22k
          }
5832
15.9k
        }
5833
15.9k
        break;
5834
5835
6.79k
      case BAG_IF_ELSE:
5836
6.79k
        r = tune_tree(ND_BODY(node), reg, (state | IN_ALT), env);
5837
6.79k
        if (r != 0) return r;
5838
6.78k
        if (IS_NOT_NULL(en->te.Then)) {
5839
2.94k
          r = tune_tree(en->te.Then, reg, (state | IN_ALT), env);
5840
2.94k
          if (r != 0) return r;
5841
2.94k
        }
5842
6.77k
        if (IS_NOT_NULL(en->te.Else))
5843
4.48k
          r = tune_tree(en->te.Else, reg, (state | IN_ALT), env);
5844
6.77k
        break;
5845
58.1k
      }
5846
58.1k
    }
5847
58.1k
    break;
5848
5849
85.2k
  case ND_QUANT:
5850
85.2k
    if ((state & (IN_PREC_READ | IN_LOOK_BEHIND)) != 0)
5851
18.2k
      ND_STATUS_ADD(node, INPEEK);
5852
5853
85.2k
    r = tune_quant(node, reg, state, env);
5854
85.2k
    break;
5855
5856
44.2k
  case ND_ANCHOR:
5857
44.2k
    r = tune_anchor(node, reg, state, env);
5858
44.2k
    break;
5859
5860
0
#ifdef USE_CALL
5861
7.71k
  case ND_CALL:
5862
7.71k
#endif
5863
50.9k
  case ND_CTYPE:
5864
313k
  case ND_CCLASS:
5865
339k
  case ND_GIMMICK:
5866
339k
  default:
5867
339k
    break;
5868
939k
  }
5869
5870
939k
  return r;
5871
939k
}
5872
5873
#ifndef ONIG_DONT_OPTIMIZE
5874
static int
5875
set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,
5876
                                          UChar* s, UChar* end,
5877
                                          UChar skip[], int* roffset)
5878
7.38k
{
5879
7.38k
  int i, j, k, len, offset;
5880
7.38k
  int n, clen;
5881
7.38k
  UChar* p;
5882
7.38k
  OnigEncoding enc;
5883
7.38k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
5884
7.38k
  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
5885
5886
7.38k
  enc = reg->enc;
5887
7.38k
  offset = ENC_GET_SKIP_OFFSET(enc);
5888
7.38k
  if (offset == ENC_SKIP_OFFSET_1_OR_0) {
5889
2.16k
    UChar* p = s;
5890
7.95k
    while (1) {
5891
7.95k
      len = enclen(enc, p);
5892
7.95k
      if (p + len >= end) {
5893
2.16k
        if (len == 1) offset = 1;
5894
463
        else          offset = 0;
5895
2.16k
        break;
5896
2.16k
      }
5897
5.78k
      p += len;
5898
5.78k
    }
5899
2.16k
  }
5900
5901
7.38k
  len = (int )(end - s);
5902
7.38k
  if (len + offset >= UCHAR_MAX)
5903
0
    return ONIGERR_PARSER_BUG;
5904
5905
7.38k
  *roffset = offset;
5906
5907
1.89M
  for (i = 0; i < CHAR_MAP_SIZE; i++) {
5908
1.89M
    skip[i] = (UChar )(len + offset);
5909
1.89M
  }
5910
5911
38.2k
  for (p = s; p < end; ) {
5912
30.8k
    int z;
5913
5914
30.8k
    clen = enclen(enc, p);
5915
30.8k
    if (p + clen > end) clen = (int )(end - p);
5916
5917
30.8k
    len = (int )(end - p);
5918
67.2k
    for (j = 0; j < clen; j++) {
5919
36.8k
      z = len - j + (offset - 1);
5920
36.8k
      if (z <= 0) break;
5921
36.4k
      skip[p[j]] = z;
5922
36.4k
    }
5923
5924
30.8k
    if (case_expand != 0) {
5925
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
5926
0
                                             p, end, items);
5927
0
      for (k = 0; k < n; k++) {
5928
0
        ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
5929
0
        for (j = 0; j < clen; j++) {
5930
0
          z = len - j + (offset - 1);
5931
0
          if (z <= 0) break;
5932
0
          if (skip[buf[j]] > z)
5933
0
            skip[buf[j]] = z;
5934
0
        }
5935
0
      }
5936
0
    }
5937
5938
30.8k
    p += clen;
5939
30.8k
  }
5940
5941
7.38k
  return 0;
5942
7.38k
}
5943
#endif
5944
5945
5946
427k
#define OPT_EXACT_MAXLEN   24
5947
5948
#if OPT_EXACT_MAXLEN >= UCHAR_MAX
5949
#error Too big OPT_EXACT_MAXLEN
5950
#endif
5951
5952
typedef struct {
5953
  MinMaxLen        mm;
5954
  OnigEncoding     enc;
5955
  OnigCaseFoldType case_fold_flag;
5956
  ParseEnv*        scan_env;
5957
} OptEnv;
5958
5959
typedef struct {
5960
  int left;
5961
  int right;
5962
} OptAnc;
5963
5964
typedef struct {
5965
  MinMaxLen  mm;   /* position */
5966
  OptAnc     anc;
5967
  int        reach_end;
5968
  int        len;
5969
  UChar      s[OPT_EXACT_MAXLEN];
5970
} OptStr;
5971
5972
typedef struct {
5973
  MinMaxLen mm;     /* position */
5974
  OptAnc    anc;
5975
  int       value;  /* weighted value */
5976
  UChar     map[CHAR_MAP_SIZE];
5977
} OptMap;
5978
5979
typedef struct {
5980
  MinMaxLen len;
5981
  OptAnc  anc;
5982
  OptStr  sb;     /* boundary */
5983
  OptStr  sm;     /* middle */
5984
  OptStr  spr;    /* prec read (?=...) */
5985
  OptMap  map;    /* boundary */
5986
} OptNode;
5987
5988
5989
#ifndef ONIG_DONT_OPTIMIZE
5990
5991
static int
5992
map_position_value(OnigEncoding enc, int i)
5993
876k
{
5994
876k
  static const short int Vals[] = {
5995
876k
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
5996
876k
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
5997
876k
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
5998
876k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
5999
876k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
6000
876k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
6001
876k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
6002
876k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
6003
876k
  };
6004
6005
876k
  if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
6006
504k
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
6007
4.45k
      return 20;
6008
499k
    else
6009
499k
      return (int )Vals[i];
6010
504k
  }
6011
371k
  else
6012
371k
    return 4;   /* Take it easy. */
6013
876k
}
6014
6015
static int
6016
distance_value(MinMaxLen* mm)
6017
204k
{
6018
  /* 1000 / (min-max-dist + 1) */
6019
204k
  static const short int dist_vals[] = {
6020
204k
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
6021
204k
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
6022
204k
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
6023
204k
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
6024
204k
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
6025
204k
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
6026
204k
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
6027
204k
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
6028
204k
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
6029
204k
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
6030
204k
  };
6031
6032
204k
  OnigLen d;
6033
6034
204k
  if (mm->max == INFINITE_LEN) return 0;
6035
6036
137k
  d = mm->max - mm->min;
6037
137k
  if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
6038
    /* return dist_vals[d] * 16 / (mm->min + 12); */
6039
131k
    return (int )dist_vals[d];
6040
6.68k
  else
6041
6.68k
    return 1;
6042
137k
}
6043
6044
static int
6045
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
6046
102k
{
6047
102k
  if (v2 <= 0) return -1;
6048
102k
  if (v1 <= 0) return  1;
6049
6050
102k
  v1 *= distance_value(d1);
6051
102k
  v2 *= distance_value(d2);
6052
6053
102k
  if (v2 > v1) return  1;
6054
91.9k
  if (v2 < v1) return -1;
6055
6056
38.3k
  if (d2->min < d1->min) return  1;
6057
38.1k
  if (d2->min > d1->min) return -1;
6058
3.82k
  return 0;
6059
38.1k
}
6060
6061
static void
6062
copy_opt_env(OptEnv* to, OptEnv* from)
6063
96.8k
{
6064
96.8k
  *to = *from;
6065
96.8k
}
6066
6067
static void
6068
clear_opt_anc_info(OptAnc* a)
6069
2.53M
{
6070
2.53M
  a->left  = 0;
6071
2.53M
  a->right = 0;
6072
2.53M
}
6073
6074
static void
6075
copy_opt_anc_info(OptAnc* to, OptAnc* from)
6076
321k
{
6077
321k
  *to = *from;
6078
321k
}
6079
6080
static void
6081
concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
6082
                    OnigLen left_len, OnigLen right_len)
6083
321k
{
6084
321k
  clear_opt_anc_info(to);
6085
6086
321k
  to->left = left->left;
6087
321k
  if (left_len == 0) {
6088
126k
    to->left |= right->left;
6089
126k
  }
6090
6091
321k
  to->right = right->right;
6092
321k
  if (right_len == 0) {
6093
37.9k
    to->right |= left->right;
6094
37.9k
  }
6095
283k
  else {
6096
283k
    to->right |= (left->right & ANCR_PREC_READ_NOT);
6097
283k
  }
6098
321k
}
6099
6100
static int
6101
is_left(int a)
6102
11.3k
{
6103
11.3k
  if (a == ANCR_END_BUF  || a == ANCR_SEMI_END_BUF ||
6104
11.3k
      a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)
6105
3.52k
    return 0;
6106
6107
7.79k
  return 1;
6108
11.3k
}
6109
6110
static int
6111
is_set_opt_anc_info(OptAnc* to, int anc)
6112
17.4k
{
6113
17.4k
  if ((to->left & anc) != 0) return 1;
6114
6115
16.9k
  return ((to->right & anc) != 0 ? 1 : 0);
6116
17.4k
}
6117
6118
static void
6119
add_opt_anc_info(OptAnc* to, int anc)
6120
11.0k
{
6121
11.0k
  if (is_left(anc))
6122
7.50k
    to->left |= anc;
6123
3.52k
  else
6124
3.52k
    to->right |= anc;
6125
11.0k
}
6126
6127
static void
6128
remove_opt_anc_info(OptAnc* to, int anc)
6129
287
{
6130
287
  if (is_left(anc))
6131
287
    to->left &= ~anc;
6132
0
  else
6133
0
    to->right &= ~anc;
6134
287
}
6135
6136
static void
6137
alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
6138
132k
{
6139
132k
  to->left  &= add->left;
6140
132k
  to->right &= add->right;
6141
132k
}
6142
6143
static int
6144
is_full_opt_exact(OptStr* e)
6145
21.4k
{
6146
21.4k
  return e->len >= OPT_EXACT_MAXLEN;
6147
21.4k
}
6148
6149
static void
6150
clear_opt_exact(OptStr* e)
6151
1.73M
{
6152
1.73M
  mml_clear(&e->mm);
6153
1.73M
  clear_opt_anc_info(&e->anc);
6154
1.73M
  e->reach_end = 0;
6155
1.73M
  e->len       = 0;
6156
1.73M
  e->s[0]      = '\0';
6157
1.73M
}
6158
6159
static void
6160
copy_opt_exact(OptStr* to, OptStr* from)
6161
49.8k
{
6162
49.8k
  *to = *from;
6163
49.8k
}
6164
6165
static int
6166
concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
6167
22.6k
{
6168
22.6k
  int i, j, len, r;
6169
22.6k
  UChar *p, *end;
6170
22.6k
  OptAnc tanc;
6171
6172
22.6k
  r = 0;
6173
22.6k
  p = add->s;
6174
22.6k
  end = p + add->len;
6175
56.3k
  for (i = to->len; p < end; ) {
6176
34.1k
    len = enclen(enc, p);
6177
34.1k
    if (i + len > OPT_EXACT_MAXLEN) {
6178
449
      r = 1; /* 1:full */
6179
449
      break;
6180
449
    }
6181
68.1k
    for (j = 0; j < len && p < end; j++) {
6182
      /* coverity[overrun-local] */
6183
34.4k
      to->s[i++] = *p++;
6184
34.4k
    }
6185
33.7k
  }
6186
6187
22.6k
  to->len = i;
6188
22.6k
  to->reach_end = (p == end ? add->reach_end : 0);
6189
6190
22.6k
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
6191
22.6k
  if (! to->reach_end) tanc.right = 0;
6192
22.6k
  copy_opt_anc_info(&to->anc, &tanc);
6193
6194
22.6k
  return r;
6195
22.6k
}
6196
6197
static void
6198
concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
6199
106k
{
6200
106k
  int i, j, len;
6201
106k
  UChar *p;
6202
6203
291k
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
6204
185k
    len = enclen(enc, p);
6205
185k
    if (i + len > OPT_EXACT_MAXLEN) break;
6206
433k
    for (j = 0; j < len && p < end; j++) {
6207
      /* coverity[overrun-local] */
6208
247k
      to->s[i++] = *p++;
6209
247k
    }
6210
185k
  }
6211
6212
106k
  to->len = i;
6213
6214
106k
  if (p >= end)
6215
105k
    to->reach_end = 1;
6216
106k
}
6217
6218
static void
6219
alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
6220
293k
{
6221
293k
  int i, j, len;
6222
6223
293k
  if (add->len == 0 || to->len == 0) {
6224
287k
    clear_opt_exact(to);
6225
287k
    return ;
6226
287k
  }
6227
6228
5.99k
  if (! mml_is_equal(&to->mm, &add->mm)) {
6229
681
    clear_opt_exact(to);
6230
681
    return ;
6231
681
  }
6232
6233
6.47k
  for (i = 0; i < to->len && i < add->len; ) {
6234
5.98k
    if (to->s[i] != add->s[i]) break;
6235
1.65k
    len = enclen(env->enc, to->s + i);
6236
6237
2.32k
    for (j = 1; j < len; j++) {
6238
1.16k
      if (to->s[i+j] != add->s[i+j]) break;
6239
1.16k
    }
6240
1.65k
    if (j < len) break;
6241
1.15k
    i += len;
6242
1.15k
  }
6243
6244
5.31k
  if (! add->reach_end || i < add->len || i < to->len) {
6245
5.08k
    to->reach_end = 0;
6246
5.08k
  }
6247
5.31k
  to->len = i;
6248
6249
5.31k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6250
5.31k
  if (! to->reach_end) to->anc.right = 0;
6251
5.31k
}
6252
6253
static void
6254
select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
6255
583k
{
6256
583k
  int vn, va;
6257
6258
583k
  vn = now->len;
6259
583k
  va = alt->len;
6260
6261
583k
  if (va == 0) {
6262
513k
    return ;
6263
513k
  }
6264
70.9k
  else if (vn == 0) {
6265
46.9k
    copy_opt_exact(now, alt);
6266
46.9k
    return ;
6267
46.9k
  }
6268
23.9k
  else if (vn <= 2 && va <= 2) {
6269
    /* ByteValTable[x] is big value --> low price */
6270
11.6k
    va = map_position_value(enc, now->s[0]);
6271
11.6k
    vn = map_position_value(enc, alt->s[0]);
6272
6273
11.6k
    if (now->len > 1) vn += 5;
6274
11.6k
    if (alt->len > 1) va += 5;
6275
11.6k
  }
6276
6277
23.9k
  vn *= 2;
6278
23.9k
  va *= 2;
6279
6280
23.9k
  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
6281
1.91k
    copy_opt_exact(now, alt);
6282
23.9k
}
6283
6284
static void
6285
clear_opt_map(OptMap* map)
6286
483k
{
6287
483k
  static const OptMap clean_info = {
6288
483k
    {0, 0}, {0, 0}, 0,
6289
483k
    {
6290
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6291
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6292
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6293
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6294
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6295
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6296
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6297
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6298
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6299
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6300
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6301
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6302
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6303
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6304
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6305
483k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6306
483k
    }
6307
483k
  };
6308
6309
483k
  xmemcpy(map, &clean_info, sizeof(OptMap));
6310
483k
}
6311
6312
static void
6313
copy_opt_map(OptMap* to, OptMap* from)
6314
54.9k
{
6315
54.9k
  *to = *from;
6316
54.9k
}
6317
6318
static void
6319
add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
6320
736k
{
6321
736k
  if (m->map[c] == 0) {
6322
736k
    m->map[c] = 1;
6323
736k
    m->value += map_position_value(enc, c);
6324
736k
  }
6325
736k
}
6326
6327
static void
6328
select_opt_map(OptMap* now, OptMap* alt)
6329
286k
{
6330
286k
  static int z = 1<<15; /* 32768: something big value */
6331
6332
286k
  int vn, va;
6333
6334
286k
  if (alt->value == 0) return ;
6335
116k
  if (now->value == 0) {
6336
46.7k
    copy_opt_map(now, alt);
6337
46.7k
    return ;
6338
46.7k
  }
6339
6340
69.7k
  vn = z / now->value;
6341
69.7k
  va = z / alt->value;
6342
69.7k
  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
6343
7.67k
    copy_opt_map(now, alt);
6344
69.7k
}
6345
6346
static int
6347
comp_opt_exact_or_map(OptStr* e, OptMap* m)
6348
8.66k
{
6349
17.3k
#define COMP_EM_BASE  20
6350
8.66k
  int ae, am;
6351
8.66k
  int case_value;
6352
6353
8.66k
  if (m->value <= 0) return -1;
6354
6355
8.66k
  case_value = 3;
6356
8.66k
  ae = COMP_EM_BASE * e->len * case_value;
6357
8.66k
  am = COMP_EM_BASE * 5 * 2 / m->value;
6358
8.66k
  return comp_distance_value(&e->mm, &m->mm, ae, am);
6359
8.66k
}
6360
6361
static void
6362
alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
6363
97.9k
{
6364
97.9k
  int i, val;
6365
6366
  /* if (! mml_is_equal(&to->mm, &add->mm)) return ; */
6367
97.9k
  if (to->value == 0) return ;
6368
31.6k
  if (add->value == 0 || to->mm.max < add->mm.min) {
6369
2.29k
    clear_opt_map(to);
6370
2.29k
    return ;
6371
2.29k
  }
6372
6373
29.3k
  mml_alt_merge(&to->mm, &add->mm);
6374
6375
29.3k
  val = 0;
6376
7.54M
  for (i = 0; i < CHAR_MAP_SIZE; i++) {
6377
7.51M
    if (add->map[i])
6378
41.0k
      to->map[i] = 1;
6379
6380
7.51M
    if (to->map[i])
6381
116k
      val += map_position_value(enc, i);
6382
7.51M
  }
6383
29.3k
  to->value = val;
6384
6385
29.3k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6386
29.3k
}
6387
6388
static void
6389
set_bound_node_opt_info(OptNode* opt, MinMaxLen* plen)
6390
480k
{
6391
480k
  mml_copy(&(opt->sb.mm),  plen);
6392
480k
  mml_copy(&(opt->spr.mm), plen);
6393
480k
  mml_copy(&(opt->map.mm), plen);
6394
480k
}
6395
6396
static void
6397
clear_node_opt_info(OptNode* opt)
6398
480k
{
6399
480k
  mml_clear(&opt->len);
6400
480k
  clear_opt_anc_info(&opt->anc);
6401
480k
  clear_opt_exact(&opt->sb);
6402
480k
  clear_opt_exact(&opt->sm);
6403
480k
  clear_opt_exact(&opt->spr);
6404
480k
  clear_opt_map(&opt->map);
6405
480k
}
6406
6407
static void
6408
copy_node_opt_info(OptNode* to, OptNode* from)
6409
29.8k
{
6410
29.8k
  *to = *from;
6411
29.8k
}
6412
6413
static void
6414
concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
6415
286k
{
6416
286k
  int sb_reach, sm_reach;
6417
286k
  OptAnc tanc;
6418
6419
286k
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
6420
286k
  copy_opt_anc_info(&to->anc, &tanc);
6421
6422
286k
  if (add->sb.len > 0 && to->len.max == 0) {
6423
12.6k
    concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);
6424
12.6k
    copy_opt_anc_info(&add->sb.anc, &tanc);
6425
12.6k
  }
6426
6427
286k
  if (add->map.value > 0 && to->len.max == 0) {
6428
29.3k
    if (add->map.mm.max == 0)
6429
10.1k
      add->map.anc.left |= to->anc.left;
6430
29.3k
  }
6431
6432
286k
  sb_reach = to->sb.reach_end;
6433
286k
  sm_reach = to->sm.reach_end;
6434
6435
286k
  if (add->len.max != 0)
6436
248k
    to->sb.reach_end = to->sm.reach_end = 0;
6437
6438
286k
  if (add->sb.len > 0) {
6439
54.1k
    if (sb_reach) {
6440
0
      concat_opt_exact(&to->sb, &add->sb, enc);
6441
0
      clear_opt_exact(&add->sb);
6442
0
    }
6443
54.1k
    else if (sm_reach) {
6444
2.16k
      concat_opt_exact(&to->sm, &add->sb, enc);
6445
2.16k
      clear_opt_exact(&add->sb);
6446
2.16k
    }
6447
54.1k
  }
6448
286k
  select_opt_exact(enc, &to->sm, &add->sb);
6449
286k
  select_opt_exact(enc, &to->sm, &add->sm);
6450
6451
286k
  if (to->spr.len > 0) {
6452
3.20k
    if (add->len.max > 0) {
6453
2.56k
      if (to->spr.mm.max == 0)
6454
374
        select_opt_exact(enc, &to->sb, &to->spr);
6455
2.19k
      else
6456
2.19k
        select_opt_exact(enc, &to->sm, &to->spr);
6457
2.56k
    }
6458
3.20k
  }
6459
283k
  else if (add->spr.len > 0) {
6460
479
    copy_opt_exact(&to->spr, &add->spr);
6461
479
  }
6462
6463
286k
  select_opt_map(&to->map, &add->map);
6464
286k
  mml_add(&to->len, &add->len);
6465
286k
}
6466
6467
static void
6468
alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)
6469
97.9k
{
6470
97.9k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6471
97.9k
  alt_merge_opt_exact(&to->sb,  &add->sb, env);
6472
97.9k
  alt_merge_opt_exact(&to->sm,  &add->sm, env);
6473
97.9k
  alt_merge_opt_exact(&to->spr, &add->spr, env);
6474
97.9k
  alt_merge_opt_map(env->enc, &to->map, &add->map);
6475
6476
97.9k
  mml_alt_merge(&to->len, &add->len);
6477
97.9k
}
6478
6479
static OnigLen
6480
node_max_byte_len(Node* node, ParseEnv* env)
6481
40.2k
{
6482
40.2k
  OnigLen len;
6483
40.2k
  OnigLen tmax;
6484
6485
40.2k
  len = 0;
6486
40.2k
  switch (ND_TYPE(node)) {
6487
3.51k
  case ND_LIST:
6488
14.6k
    do {
6489
14.6k
      tmax = node_max_byte_len(ND_CAR(node), env);
6490
14.6k
      len = distance_add(len, tmax);
6491
14.6k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
6492
3.51k
    break;
6493
6494
994
  case ND_ALT:
6495
4.97k
    do {
6496
4.97k
      tmax = node_max_byte_len(ND_CAR(node), env);
6497
4.97k
      if (len < tmax) len = tmax;
6498
4.97k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
6499
994
    break;
6500
6501
9.06k
  case ND_STRING:
6502
9.06k
    {
6503
9.06k
      StrNode* sn = STR_(node);
6504
9.06k
      len = (OnigLen )(sn->end - sn->s);
6505
9.06k
    }
6506
9.06k
    break;
6507
6508
1.81k
  case ND_CTYPE:
6509
5.41k
  case ND_CCLASS:
6510
5.41k
    len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
6511
5.41k
    break;
6512
6513
5.37k
  case ND_BACKREF:
6514
5.37k
    if (! ND_IS_CHECKER(node)) {
6515
5.32k
      int i;
6516
5.32k
      int* backs;
6517
5.32k
      MemEnv* mem_env = PARSEENV_MEMENV(env);
6518
5.32k
      BackRefNode* br = BACKREF_(node);
6519
5.32k
      if (ND_IS_RECURSION(node)) {
6520
1.03k
#ifdef USE_BACKREF_WITH_LEVEL
6521
1.03k
        if (ND_IS_NEST_LEVEL(node)) {
6522
128
          len = INFINITE_LEN;
6523
128
        }
6524
1.03k
#endif
6525
1.03k
        break;
6526
1.03k
      }
6527
4.29k
      backs = BACKREFS_P(br);
6528
12.0k
      for (i = 0; i < br->back_num; i++) {
6529
7.70k
        tmax = node_max_byte_len(mem_env[backs[i]].mem_node, env);
6530
7.70k
        if (len < tmax) len = tmax;
6531
7.70k
      }
6532
4.29k
    }
6533
4.34k
    break;
6534
6535
4.34k
#ifdef USE_CALL
6536
4.34k
  case ND_CALL:
6537
311
    if (! ND_IS_RECURSION(node))
6538
129
      len = node_max_byte_len(ND_BODY(node), env);
6539
182
    else
6540
182
      len = INFINITE_LEN;
6541
311
    break;
6542
0
#endif
6543
6544
2.05k
  case ND_QUANT:
6545
2.05k
    {
6546
2.05k
      QuantNode* qn = QUANT_(node);
6547
6548
2.05k
      if (qn->upper != 0) {
6549
1.96k
        len = node_max_byte_len(ND_BODY(node), env);
6550
1.96k
        if (len != 0) {
6551
1.79k
          if (! IS_INFINITE_REPEAT(qn->upper))
6552
803
            len = distance_multiply(len, qn->upper);
6553
992
          else
6554
992
            len = INFINITE_LEN;
6555
1.79k
        }
6556
1.96k
      }
6557
2.05k
    }
6558
2.05k
    break;
6559
6560
10.9k
  case ND_BAG:
6561
10.9k
    {
6562
10.9k
      BagNode* en = BAG_(node);
6563
10.9k
      switch (en->type) {
6564
10.0k
      case BAG_MEMORY:
6565
10.0k
        if (ND_IS_FIXED_MAX(node))
6566
5.20k
          len = en->max_len;
6567
4.79k
        else {
6568
4.79k
          if (ND_IS_MARK1(node))
6569
138
            len = INFINITE_LEN;
6570
4.65k
          else {
6571
4.65k
            ND_STATUS_ADD(node, MARK1);
6572
4.65k
            len = node_max_byte_len(ND_BODY(node), env);
6573
4.65k
            ND_STATUS_REMOVE(node, MARK1);
6574
6575
4.65k
            en->max_len = len;
6576
4.65k
            ND_STATUS_ADD(node, FIXED_MAX);
6577
4.65k
          }
6578
4.79k
        }
6579
10.0k
        break;
6580
6581
49
      case BAG_OPTION:
6582
628
      case BAG_STOP_BACKTRACK:
6583
628
        len = node_max_byte_len(ND_BODY(node), env);
6584
628
        break;
6585
359
      case BAG_IF_ELSE:
6586
359
        {
6587
359
          OnigLen tlen, elen;
6588
6589
359
          len = node_max_byte_len(ND_BODY(node), env);
6590
359
          if (IS_NOT_NULL(en->te.Then)) {
6591
188
            tlen = node_max_byte_len(en->te.Then, env);
6592
188
            len = distance_add(len, tlen);
6593
188
          }
6594
359
          if (IS_NOT_NULL(en->te.Else))
6595
189
            elen = node_max_byte_len(en->te.Else, env);
6596
170
          else elen = 0;
6597
6598
359
          if (elen > len) len = elen;
6599
359
        }
6600
359
        break;
6601
10.9k
      }
6602
10.9k
    }
6603
10.9k
    break;
6604
6605
10.9k
  case ND_ANCHOR:
6606
2.53k
  case ND_GIMMICK:
6607
2.53k
  default:
6608
2.53k
    break;
6609
40.2k
  }
6610
6611
40.2k
  return len;
6612
40.2k
}
6613
6614
17.7k
#define MAX_ND_OPT_INFO_REF_COUNT    5
6615
6616
static int
6617
optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
6618
480k
{
6619
480k
  int i;
6620
480k
  int r;
6621
480k
  OptNode xo;
6622
480k
  OnigEncoding enc;
6623
6624
480k
  r = 0;
6625
480k
  enc = env->enc;
6626
480k
  clear_node_opt_info(opt);
6627
480k
  set_bound_node_opt_info(opt, &env->mm);
6628
6629
480k
  switch (ND_TYPE(node)) {
6630
94.5k
  case ND_LIST:
6631
94.5k
    {
6632
94.5k
      OptEnv nenv;
6633
94.5k
      Node* nd = node;
6634
6635
94.5k
      copy_opt_env(&nenv, env);
6636
283k
      do {
6637
283k
        r = optimize_nodes(ND_CAR(nd), &xo, &nenv);
6638
283k
        if (r == 0) {
6639
283k
          mml_add(&nenv.mm, &xo.len);
6640
283k
          concat_left_node_opt_info(enc, opt, &xo);
6641
283k
        }
6642
283k
      } while (r == 0 && IS_NOT_NULL(nd = ND_CDR(nd)));
6643
94.5k
    }
6644
94.5k
    break;
6645
6646
11.4k
  case ND_ALT:
6647
11.4k
    {
6648
11.4k
      Node* nd = node;
6649
6650
107k
      do {
6651
107k
        r = optimize_nodes(ND_CAR(nd), &xo, env);
6652
107k
        if (r == 0) {
6653
107k
          if (nd == node) copy_node_opt_info(opt, &xo);
6654
95.5k
          else            alt_merge_node_opt_info(opt, &xo, env);
6655
107k
        }
6656
107k
      } while ((r == 0) && IS_NOT_NULL(nd = ND_CDR(nd)));
6657
11.4k
    }
6658
11.4k
    break;
6659
6660
106k
  case ND_STRING:
6661
106k
    {
6662
106k
      StrNode* sn = STR_(node);
6663
106k
      int slen = (int )(sn->end - sn->s);
6664
6665
106k
      concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
6666
106k
      if (slen > 0) {
6667
92.9k
        add_char_opt_map(&opt->map, *(sn->s), enc);
6668
92.9k
      }
6669
106k
      mml_set_min_max(&opt->len, slen, slen);
6670
106k
    }
6671
106k
    break;
6672
6673
141k
  case ND_CCLASS:
6674
141k
    {
6675
141k
      int z;
6676
141k
      CClassNode* cc = CCLASS_(node);
6677
6678
      /* no need to check ignore case. (set in tune_tree()) */
6679
6680
141k
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
6681
97.2k
        OnigLen min = ONIGENC_MBC_MINLEN(enc);
6682
97.2k
        OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
6683
6684
97.2k
        mml_set_min_max(&opt->len, min, max);
6685
97.2k
      }
6686
44.6k
      else {
6687
11.4M
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
6688
11.4M
          z = BITSET_AT(cc->bs, i);
6689
11.4M
          if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
6690
159k
            add_char_opt_map(&opt->map, (UChar )i, enc);
6691
159k
          }
6692
11.4M
        }
6693
44.6k
        mml_set_min_max(&opt->len, 1, 1);
6694
44.6k
      }
6695
141k
    }
6696
141k
    break;
6697
6698
20.9k
  case ND_CTYPE:
6699
20.9k
    {
6700
20.9k
      int min, max;
6701
20.9k
      int range;
6702
6703
20.9k
      max = ONIGENC_MBC_MAXLEN_DIST(enc);
6704
6705
20.9k
      if (max == 1) {
6706
13.2k
        min = 1;
6707
6708
13.2k
        switch (CTYPE_(node)->ctype) {
6709
9.39k
        case CTYPE_ANYCHAR:
6710
9.39k
          break;
6711
6712
3.87k
        case ONIGENC_CTYPE_WORD:
6713
3.87k
          range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
6714
3.87k
          if (CTYPE_(node)->not != 0) {
6715
319k
            for (i = 0; i < range; i++) {
6716
318k
              if (! ONIGENC_IS_CODE_WORD(enc, i)) {
6717
154k
                add_char_opt_map(&opt->map, (UChar )i, enc);
6718
154k
              }
6719
318k
            }
6720
144k
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
6721
142k
              add_char_opt_map(&opt->map, (UChar )i, enc);
6722
142k
            }
6723
1.79k
          }
6724
2.07k
          else {
6725
368k
            for (i = 0; i < range; i++) {
6726
366k
              if (ONIGENC_IS_CODE_WORD(enc, i)) {
6727
187k
                add_char_opt_map(&opt->map, (UChar )i, enc);
6728
187k
              }
6729
366k
            }
6730
2.07k
          }
6731
3.87k
          break;
6732
13.2k
        }
6733
13.2k
      }
6734
7.65k
      else {
6735
7.65k
        min = ONIGENC_MBC_MINLEN(enc);
6736
7.65k
      }
6737
20.9k
      mml_set_min_max(&opt->len, min, max);
6738
20.9k
    }
6739
0
    break;
6740
6741
18.1k
  case ND_ANCHOR:
6742
18.1k
    switch (ANCHOR_(node)->type) {
6743
1.56k
    case ANCR_BEGIN_BUF:
6744
2.00k
    case ANCR_BEGIN_POSITION:
6745
3.19k
    case ANCR_BEGIN_LINE:
6746
3.64k
    case ANCR_END_BUF:
6747
5.38k
    case ANCR_SEMI_END_BUF:
6748
6.40k
    case ANCR_END_LINE:
6749
6.72k
    case ANCR_PREC_READ_NOT:
6750
9.00k
    case ANCR_LOOK_BEHIND:
6751
9.00k
      add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
6752
9.00k
      break;
6753
6754
1.33k
    case ANCR_PREC_READ:
6755
1.33k
      {
6756
1.33k
        r = optimize_nodes(ND_BODY(node), &xo, env);
6757
1.33k
        if (r == 0) {
6758
1.33k
          if (xo.sb.len > 0)
6759
246
            copy_opt_exact(&opt->spr, &xo.sb);
6760
1.08k
          else if (xo.sm.len > 0)
6761
248
            copy_opt_exact(&opt->spr, &xo.sm);
6762
6763
1.33k
          opt->spr.reach_end = 0;
6764
6765
1.33k
          if (xo.map.value > 0)
6766
511
            copy_opt_map(&opt->map, &xo.map);
6767
1.33k
        }
6768
1.33k
      }
6769
1.33k
      break;
6770
6771
2.54k
    case ANCR_LOOK_BEHIND_NOT:
6772
2.54k
      break;
6773
18.1k
    }
6774
18.1k
    break;
6775
6776
18.1k
  case ND_BACKREF:
6777
4.93k
    if (! ND_IS_CHECKER(node)) {
6778
4.76k
      OnigLen min, max;
6779
6780
4.76k
      min = node_min_byte_len(node, env->scan_env);
6781
4.76k
      max = node_max_byte_len(node, env->scan_env);
6782
4.76k
      mml_set_min_max(&opt->len, min, max);
6783
4.76k
    }
6784
4.93k
    break;
6785
6786
0
#ifdef USE_CALL
6787
3.50k
  case ND_CALL:
6788
3.50k
    if (ND_IS_RECURSION(node))
6789
1.98k
      mml_set_min_max(&opt->len, 0, INFINITE_LEN);
6790
1.51k
    else {
6791
1.51k
      r = optimize_nodes(ND_BODY(node), opt, env);
6792
1.51k
    }
6793
3.50k
    break;
6794
0
#endif
6795
6796
35.8k
  case ND_QUANT:
6797
35.8k
    {
6798
35.8k
      OnigLen min, max;
6799
35.8k
      QuantNode* qn = QUANT_(node);
6800
6801
      /* Issue #175
6802
         ex. /\g<1>{0}(?<=|())/
6803
6804
         Empty and unused nodes in look-behind is removed in
6805
         tune_look_behind().
6806
         Called group nodes are assigned to be not called if the caller side is
6807
         inside of zero-repetition.
6808
         As a result, the nodes are considered unused.
6809
       */
6810
35.8k
      if (qn->upper == 0) {
6811
503
        mml_set_min_max(&opt->len, 0, 0);
6812
503
        break;
6813
503
      }
6814
6815
35.3k
      r = optimize_nodes(ND_BODY(node), &xo, env);
6816
35.3k
      if (r != 0) break;
6817
6818
35.3k
      if (qn->lower > 0) {
6819
18.3k
        copy_node_opt_info(opt, &xo);
6820
18.3k
        if (xo.sb.len > 0) {
6821
5.48k
          if (xo.sb.reach_end) {
6822
24.8k
            for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {
6823
20.5k
              int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);
6824
20.5k
              if (rc > 0) break;
6825
20.5k
            }
6826
4.42k
            if (i < qn->lower) opt->sb.reach_end = 0;
6827
4.42k
          }
6828
5.48k
        }
6829
6830
18.3k
        if (qn->lower != qn->upper) {
6831
14.1k
          opt->sb.reach_end = 0;
6832
14.1k
          opt->sm.reach_end = 0;
6833
14.1k
        }
6834
18.3k
        if (qn->lower > 1)
6835
5.28k
          opt->sm.reach_end = 0;
6836
18.3k
      }
6837
6838
35.3k
      if (IS_INFINITE_REPEAT(qn->upper)) {
6839
25.3k
        if (env->mm.max == 0 &&
6840
25.3k
            ND_IS_ANYCHAR(ND_BODY(node)) && qn->greedy != 0) {
6841
2.03k
          if (ND_IS_MULTILINE(ND_QUANT_BODY(qn)))
6842
984
            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
6843
1.05k
          else
6844
1.05k
            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);
6845
2.03k
        }
6846
6847
25.3k
        max = (xo.len.max > 0 ? INFINITE_LEN : 0);
6848
25.3k
      }
6849
10.0k
      else {
6850
10.0k
        max = distance_multiply(xo.len.max, qn->upper);
6851
10.0k
      }
6852
6853
35.3k
      min = distance_multiply(xo.len.min, qn->lower);
6854
35.3k
      mml_set_min_max(&opt->len, min, max);
6855
35.3k
    }
6856
0
    break;
6857
6858
31.8k
  case ND_BAG:
6859
31.8k
    {
6860
31.8k
      BagNode* en = BAG_(node);
6861
6862
31.8k
      switch (en->type) {
6863
10.7k
      case BAG_STOP_BACKTRACK:
6864
10.9k
      case BAG_OPTION:
6865
10.9k
        r = optimize_nodes(ND_BODY(node), opt, env);
6866
10.9k
        break;
6867
6868
17.7k
      case BAG_MEMORY:
6869
17.7k
#ifdef USE_CALL
6870
17.7k
        en->opt_count++;
6871
17.7k
        if (en->opt_count > MAX_ND_OPT_INFO_REF_COUNT) {
6872
276
          OnigLen min, max;
6873
6874
276
          min = 0;
6875
276
          max = INFINITE_LEN;
6876
276
          if (ND_IS_FIXED_MIN(node)) min = en->min_len;
6877
276
          if (ND_IS_FIXED_MAX(node)) max = en->max_len;
6878
276
          mml_set_min_max(&opt->len, min, max);
6879
276
        }
6880
17.4k
        else
6881
17.4k
#endif
6882
17.4k
          {
6883
17.4k
            r = optimize_nodes(ND_BODY(node), opt, env);
6884
17.4k
            if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {
6885
484
              if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
6886
287
                remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);
6887
484
            }
6888
17.4k
          }
6889
17.7k
        break;
6890
6891
3.24k
      case BAG_IF_ELSE:
6892
3.24k
        {
6893
3.24k
          OptEnv nenv;
6894
6895
3.24k
          if (IS_NOT_NULL(en->te.Else)) {
6896
2.35k
            copy_opt_env(&nenv, env);
6897
2.35k
            r = optimize_nodes(ND_BAG_BODY(en), &xo, &nenv);
6898
2.35k
            if (r == 0) {
6899
2.35k
              mml_add(&nenv.mm, &xo.len);
6900
2.35k
              concat_left_node_opt_info(enc, opt, &xo);
6901
2.35k
              if (IS_NOT_NULL(en->te.Then)) {
6902
252
                r = optimize_nodes(en->te.Then, &xo, &nenv);
6903
252
                if (r == 0) {
6904
252
                  concat_left_node_opt_info(enc, opt, &xo);
6905
252
                }
6906
252
              }
6907
6908
2.35k
                r = optimize_nodes(en->te.Else, &xo, env);
6909
2.35k
                if (r == 0)
6910
2.35k
                  alt_merge_node_opt_info(opt, &xo, env);
6911
2.35k
            }
6912
2.35k
          }
6913
3.24k
        }
6914
3.24k
        break;
6915
31.8k
      }
6916
31.8k
    }
6917
31.8k
    break;
6918
6919
31.8k
  case ND_GIMMICK:
6920
11.7k
    break;
6921
6922
0
  default:
6923
#ifdef ONIG_DEBUG
6924
    fprintf(DBGFP, "optimize_nodes: undefined node type %d\n", ND_TYPE(node));
6925
#endif
6926
0
    r = ONIGERR_TYPE_BUG;
6927
0
    break;
6928
480k
  }
6929
6930
480k
  return r;
6931
480k
}
6932
6933
static int
6934
set_optimize_exact(regex_t* reg, OptStr* e)
6935
7.74k
{
6936
7.74k
  int r;
6937
7.74k
  int allow_reverse;
6938
6939
7.74k
  if (e->len == 0) return 0;
6940
6941
7.74k
  reg->exact = (UChar* )xmalloc(e->len);
6942
7.74k
  CHECK_NULL_RETURN_MEMERR(reg->exact);
6943
7.74k
  xmemcpy(reg->exact, e->s, e->len);
6944
7.74k
  reg->exact_end = reg->exact + e->len;
6945
6946
7.74k
  allow_reverse =
6947
7.74k
    ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
6948
6949
7.74k
  if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {
6950
7.38k
    r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,
6951
7.38k
                                                  reg->exact, reg->exact_end,
6952
7.38k
                                                  reg->map, &(reg->map_offset));
6953
7.38k
    if (r != 0) return r;
6954
6955
7.38k
    reg->optimize = (allow_reverse != 0
6956
7.38k
                     ? OPTIMIZE_STR_FAST
6957
7.38k
                     : OPTIMIZE_STR_FAST_STEP_FORWARD);
6958
7.38k
  }
6959
355
  else {
6960
355
    reg->optimize = OPTIMIZE_STR;
6961
355
  }
6962
6963
7.74k
  reg->dist_min = e->mm.min;
6964
7.74k
  reg->dist_max = e->mm.max;
6965
6966
7.74k
  if (reg->dist_min != INFINITE_LEN) {
6967
7.72k
    int n = (int )(reg->exact_end - reg->exact);
6968
7.72k
    reg->threshold_len = reg->dist_min + n;
6969
7.72k
  }
6970
6971
7.74k
  return 0;
6972
7.74k
}
6973
6974
static void
6975
set_optimize_map(regex_t* reg, OptMap* m)
6976
3.14k
{
6977
3.14k
  int i;
6978
6979
808k
  for (i = 0; i < CHAR_MAP_SIZE; i++)
6980
805k
    reg->map[i] = m->map[i];
6981
6982
3.14k
  reg->optimize   = OPTIMIZE_MAP;
6983
3.14k
  reg->dist_min   = m->mm.min;
6984
3.14k
  reg->dist_max   = m->mm.max;
6985
6986
3.14k
  if (reg->dist_min != INFINITE_LEN) {
6987
3.13k
    reg->threshold_len = reg->dist_min + ONIGENC_MBC_MINLEN(reg->enc);
6988
3.13k
  }
6989
3.14k
}
6990
6991
static void
6992
set_sub_anchor(regex_t* reg, OptAnc* anc)
6993
10.8k
{
6994
10.8k
  reg->sub_anchor |= anc->left  & ANCR_BEGIN_LINE;
6995
10.8k
  reg->sub_anchor |= anc->right & ANCR_END_LINE;
6996
10.8k
}
6997
6998
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
6999
static void print_optimize_info(FILE* f, regex_t* reg);
7000
#endif
7001
7002
static int
7003
set_optimize_info_from_tree(Node* node, regex_t* reg, ParseEnv* scan_env)
7004
18.5k
{
7005
18.5k
  int r;
7006
18.5k
  OptNode opt;
7007
18.5k
  OptEnv env;
7008
7009
18.5k
  env.enc            = reg->enc;
7010
18.5k
  env.case_fold_flag = reg->case_fold_flag;
7011
18.5k
  env.scan_env       = scan_env;
7012
18.5k
  mml_clear(&env.mm);
7013
7014
18.5k
  r = optimize_nodes(node, &opt, &env);
7015
18.5k
  if (r != 0) return r;
7016
7017
18.5k
  reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |
7018
18.5k
        ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |
7019
18.5k
        ANCR_LOOK_BEHIND);
7020
7021
18.5k
  if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)
7022
390
    reg->anchor &= ~ANCR_ANYCHAR_INF_ML;
7023
7024
18.5k
  reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |
7025
18.5k
                                  ANCR_PREC_READ_NOT);
7026
7027
18.5k
  if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
7028
724
    reg->anc_dist_min = opt.len.min;
7029
724
    reg->anc_dist_max = opt.len.max;
7030
724
  }
7031
7032
18.5k
  if (opt.sb.len > 0 || opt.sm.len > 0) {
7033
8.66k
    select_opt_exact(reg->enc, &opt.sb, &opt.sm);
7034
8.66k
    if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {
7035
925
      goto set_map;
7036
925
    }
7037
7.74k
    else {
7038
7.74k
      r = set_optimize_exact(reg, &opt.sb);
7039
7.74k
      set_sub_anchor(reg, &opt.sb.anc);
7040
7.74k
    }
7041
8.66k
  }
7042
9.88k
  else if (opt.map.value > 0) {
7043
3.14k
  set_map:
7044
3.14k
    set_optimize_map(reg, &opt.map);
7045
3.14k
    set_sub_anchor(reg, &opt.map.anc);
7046
3.14k
  }
7047
7.66k
  else {
7048
7.66k
    reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
7049
7.66k
    if (opt.len.max == 0)
7050
3.37k
      reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;
7051
7.66k
  }
7052
7053
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
7054
  print_optimize_info(DBGFP, reg);
7055
#endif
7056
18.5k
  return r;
7057
18.5k
}
7058
#endif /* ONIG_DONT_OPTIMIZE */
7059
7060
static void
7061
clear_optimize_info(regex_t* reg)
7062
18.5k
{
7063
18.5k
  reg->optimize      = OPTIMIZE_NONE;
7064
18.5k
  reg->anchor        = 0;
7065
18.5k
  reg->anc_dist_min  = 0;
7066
18.5k
  reg->anc_dist_max  = 0;
7067
18.5k
  reg->sub_anchor    = 0;
7068
18.5k
  reg->exact_end     = (UChar* )NULL;
7069
18.5k
  reg->map_offset    = 0;
7070
18.5k
  reg->threshold_len = 0;
7071
18.5k
  if (IS_NOT_NULL(reg->exact)) {
7072
0
    xfree(reg->exact);
7073
0
    reg->exact = (UChar* )NULL;
7074
0
  }
7075
18.5k
}
7076
7077
#if defined(ONIG_DEBUG_PARSE)  || defined(ONIG_DEBUG_MATCH) || \
7078
    defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE)
7079
7080
static void
7081
print_enc_string(FILE* fp, OnigEncoding enc,
7082
                 const UChar *s, const UChar *end)
7083
{
7084
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
7085
    const UChar *p;
7086
    OnigCodePoint code;
7087
7088
    p = s;
7089
    while (p < end) {
7090
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
7091
      if (code >= 0x80) {
7092
        fprintf(fp, " 0x%04x ", (int )code);
7093
      }
7094
      else {
7095
        fputc((int )code, fp);
7096
      }
7097
7098
      p += enclen(enc, p);
7099
    }
7100
  }
7101
  else {
7102
    while (s < end) {
7103
      if (ONIGENC_MBC_MAXLEN(enc) == 1) {
7104
        if (*s >= 0x80) {
7105
          fprintf(fp, "\\x%02x", (unsigned int )*s);
7106
        }
7107
        else {
7108
          fputc((int )*s, fp);
7109
        }
7110
      }
7111
      else { /* for UTF-8 */
7112
        fputc((int )*s, fp);
7113
      }
7114
      s++;
7115
    }
7116
  }
7117
}
7118
7119
static void
7120
print_options(FILE* fp, OnigOptionType o)
7121
{
7122
  if ((o & ONIG_OPTION_IGNORECASE) != 0)      fprintf(fp, " IGNORECASE");
7123
  if ((o & ONIG_OPTION_EXTEND) != 0)          fprintf(fp, " EXTEND");
7124
  if ((o & ONIG_OPTION_MULTILINE) != 0)       fprintf(fp, " MULTILINE");
7125
  if ((o & ONIG_OPTION_SINGLELINE) != 0)      fprintf(fp, " SINGLELINE");
7126
  if ((o & ONIG_OPTION_FIND_LONGEST) != 0)    fprintf(fp, " FIND_LONGEST");
7127
  if ((o & ONIG_OPTION_FIND_NOT_EMPTY) != 0)  fprintf(fp, " FIND_NOT_EMPTY");
7128
  if ((o & ONIG_OPTION_NEGATE_SINGLELINE) != 0)  fprintf(fp, " NEGATE_SINGLELINE");
7129
  if ((o & ONIG_OPTION_DONT_CAPTURE_GROUP) != 0) fprintf(fp, " DONT_CAPTURE_GROUP");
7130
  if ((o & ONIG_OPTION_CAPTURE_GROUP) != 0)   fprintf(fp, " CAPTURE_GROUP");
7131
  if ((o & ONIG_OPTION_NOTBOL) != 0)          fprintf(fp, " NOTBOL");
7132
  if ((o & ONIG_OPTION_NOTEOL) != 0)          fprintf(fp, " NOTEOL");
7133
  if ((o & ONIG_OPTION_POSIX_REGION) != 0)    fprintf(fp, " POSIX_REGION");
7134
  if ((o & ONIG_OPTION_CHECK_VALIDITY_OF_STRING) != 0) fprintf(fp, " CHECK_VALIDITY_OF_STRING");
7135
  if ((o & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) fprintf(fp, " IGNORECASE_IS_ASCII");
7136
  if ((o & ONIG_OPTION_WORD_IS_ASCII) != 0)   fprintf(fp, " WORD_IS_ASCII");
7137
  if ((o & ONIG_OPTION_DIGIT_IS_ASCII) != 0)  fprintf(fp, " DIGIT_IS_ASCII");
7138
  if ((o & ONIG_OPTION_SPACE_IS_ASCII) != 0)  fprintf(fp, " SPACE_IS_ASCII");
7139
  if ((o & ONIG_OPTION_POSIX_IS_ASCII) != 0)  fprintf(fp, " POSIX_IS_ASCII");
7140
  if ((o & ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER) != 0) fprintf(fp, " TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER");
7141
  if ((o & ONIG_OPTION_TEXT_SEGMENT_WORD) != 0) fprintf(fp, " TEXT_SEGMENT_WORD");
7142
  if ((o & ONIG_OPTION_NOT_BEGIN_STRING) != 0) fprintf(fp, " NOT_BIGIN_STRING");
7143
  if ((o & ONIG_OPTION_NOT_END_STRING) != 0)   fprintf(fp, " NOT_END_STRING");
7144
  if ((o & ONIG_OPTION_NOT_BEGIN_POSITION) != 0) fprintf(fp, " NOT_BEGIN_POSITION");
7145
  if ((o & ONIG_OPTION_CALLBACK_EACH_MATCH) != 0) fprintf(fp, " CALLBACK_EACH_MATCH");
7146
}
7147
7148
#endif
7149
7150
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
7151
7152
#ifndef ONIG_DONT_OPTIMIZE
7153
7154
static void
7155
print_distance_range(FILE* f, OnigLen a, OnigLen b)
7156
{
7157
  if (a == INFINITE_LEN)
7158
    fputs("inf", f);
7159
  else
7160
    fprintf(f, "(%u)", a);
7161
7162
  fputs("-", f);
7163
7164
  if (b == INFINITE_LEN)
7165
    fputs("inf", f);
7166
  else
7167
    fprintf(f, "(%u)", b);
7168
}
7169
7170
static void
7171
print_anchor(FILE* f, int anchor)
7172
{
7173
  int q = 0;
7174
7175
  fprintf(f, "[");
7176
7177
  if (anchor & ANCR_BEGIN_BUF) {
7178
    fprintf(f, "begin-buf");
7179
    q = 1;
7180
  }
7181
  if (anchor & ANCR_BEGIN_LINE) {
7182
    if (q) fprintf(f, ", ");
7183
    q = 1;
7184
    fprintf(f, "begin-line");
7185
  }
7186
  if (anchor & ANCR_BEGIN_POSITION) {
7187
    if (q) fprintf(f, ", ");
7188
    q = 1;
7189
    fprintf(f, "begin-pos");
7190
  }
7191
  if (anchor & ANCR_END_BUF) {
7192
    if (q) fprintf(f, ", ");
7193
    q = 1;
7194
    fprintf(f, "end-buf");
7195
  }
7196
  if (anchor & ANCR_SEMI_END_BUF) {
7197
    if (q) fprintf(f, ", ");
7198
    q = 1;
7199
    fprintf(f, "semi-end-buf");
7200
  }
7201
  if (anchor & ANCR_END_LINE) {
7202
    if (q) fprintf(f, ", ");
7203
    q = 1;
7204
    fprintf(f, "end-line");
7205
  }
7206
  if (anchor & ANCR_ANYCHAR_INF) {
7207
    if (q) fprintf(f, ", ");
7208
    q = 1;
7209
    fprintf(f, "anychar-inf");
7210
  }
7211
  if (anchor & ANCR_ANYCHAR_INF_ML) {
7212
    if (q) fprintf(f, ", ");
7213
    fprintf(f, "anychar-inf-ml");
7214
  }
7215
7216
  fprintf(f, "]");
7217
}
7218
7219
static void
7220
print_optimize_info(FILE* f, regex_t* reg)
7221
{
7222
  static const char* on[] =
7223
    { "NONE", "STR", "STR_FAST", "STR_FAST_STEP_FORWARD", "MAP" };
7224
7225
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
7226
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
7227
  if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
7228
    print_distance_range(f, reg->anc_dist_min, reg->anc_dist_max);
7229
  fprintf(f, "\n");
7230
7231
  if (reg->optimize) {
7232
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
7233
    fprintf(f, "\n");
7234
  }
7235
  fprintf(f, "\n");
7236
7237
  if (reg->exact) {
7238
    UChar *p;
7239
    fprintf(f, "exact: [");
7240
    for (p = reg->exact; p < reg->exact_end; p++) {
7241
      fputc(*p, f);
7242
    }
7243
    fprintf(f, "]: length: %ld, dmin: %u, ",
7244
            (reg->exact_end - reg->exact), reg->dist_min);
7245
    if (reg->dist_max == INFINITE_LEN)
7246
      fprintf(f, "dmax: inf.\n");
7247
    else
7248
      fprintf(f, "dmax: %u\n", reg->dist_max);
7249
  }
7250
  else if (reg->optimize & OPTIMIZE_MAP) {
7251
    int c, i, n = 0;
7252
7253
    for (i = 0; i < CHAR_MAP_SIZE; i++)
7254
      if (reg->map[i]) n++;
7255
7256
    fprintf(f, "map: n=%d, dmin: %u, dmax: %u\n",
7257
            n, reg->dist_min, reg->dist_max);
7258
    if (n > 0) {
7259
      c = 0;
7260
      fputc('[', f);
7261
      for (i = 0; i < CHAR_MAP_SIZE; i++) {
7262
        if (reg->map[i] != 0) {
7263
          if (c > 0)  fputs(", ", f);
7264
          c++;
7265
          if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
7266
              ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
7267
            fputc(i, f);
7268
          else
7269
            fprintf(f, "0x%02x", i);
7270
        }
7271
      }
7272
      fprintf(f, "]\n");
7273
    }
7274
  }
7275
}
7276
#endif /* ONIG_DONT_OPTIMIZE */
7277
#endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
7278
7279
7280
extern RegexExt*
7281
onig_get_regex_ext(regex_t* reg)
7282
10.3k
{
7283
10.3k
  if (IS_NULL(reg->extp)) {
7284
2.34k
    RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
7285
2.34k
    if (IS_NULL(ext)) return 0;
7286
7287
2.34k
    ext->pattern      = 0;
7288
2.34k
    ext->pattern_end  = 0;
7289
2.34k
#ifdef USE_CALLOUT
7290
2.34k
    ext->tag_table    = 0;
7291
2.34k
    ext->callout_num  = 0;
7292
2.34k
    ext->callout_list_alloc = 0;
7293
2.34k
    ext->callout_list = 0;
7294
2.34k
#endif
7295
7296
2.34k
    reg->extp = ext;
7297
2.34k
  }
7298
7299
10.3k
  return reg->extp;
7300
10.3k
}
7301
7302
static void
7303
free_regex_ext(RegexExt* ext)
7304
2.34k
{
7305
2.34k
  if (IS_NOT_NULL(ext)) {
7306
2.34k
    if (IS_NOT_NULL(ext->pattern))
7307
2.34k
      xfree((void* )ext->pattern);
7308
7309
2.34k
#ifdef USE_CALLOUT
7310
2.34k
    if (IS_NOT_NULL(ext->tag_table))
7311
430
      onig_callout_tag_table_free(ext->tag_table);
7312
7313
2.34k
    if (IS_NOT_NULL(ext->callout_list))
7314
2.34k
      onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
7315
2.34k
#endif
7316
7317
2.34k
    xfree(ext);
7318
2.34k
  }
7319
2.34k
}
7320
7321
extern int
7322
onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
7323
2.34k
{
7324
2.34k
  RegexExt* ext;
7325
2.34k
  UChar* s;
7326
7327
2.34k
  ext = onig_get_regex_ext(reg);
7328
2.34k
  CHECK_NULL_RETURN_MEMERR(ext);
7329
7330
2.34k
  s = onigenc_strdup(reg->enc, pattern, pattern_end);
7331
2.34k
  CHECK_NULL_RETURN_MEMERR(s);
7332
7333
2.34k
  ext->pattern     = s;
7334
2.34k
  ext->pattern_end = s + (pattern_end - pattern);
7335
7336
2.34k
  return ONIG_NORMAL;
7337
2.34k
}
7338
7339
extern void
7340
onig_free_body(regex_t* reg)
7341
51.3k
{
7342
51.3k
  if (IS_NOT_NULL(reg)) {
7343
51.3k
    ops_free(reg);
7344
51.3k
    if (IS_NOT_NULL(reg->string_pool)) {
7345
2.96k
      xfree(reg->string_pool);
7346
2.96k
      reg->string_pool_end = reg->string_pool = 0;
7347
2.96k
    }
7348
51.3k
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
7349
51.3k
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
7350
51.3k
    if (IS_NOT_NULL(reg->extp)) {
7351
2.34k
      free_regex_ext(reg->extp);
7352
2.34k
      reg->extp = 0;
7353
2.34k
    }
7354
7355
51.3k
    onig_names_free(reg);
7356
51.3k
  }
7357
51.3k
}
7358
7359
extern void
7360
onig_free(regex_t* reg)
7361
51.3k
{
7362
51.3k
  if (IS_NOT_NULL(reg)) {
7363
51.3k
    onig_free_body(reg);
7364
51.3k
    xfree(reg);
7365
51.3k
  }
7366
51.3k
}
7367
7368
7369
#ifdef ONIG_DEBUG_PARSE
7370
static void print_tree P_((FILE* f, Node* node));
7371
#endif
7372
7373
extern int onig_init_for_match_at(regex_t* reg);
7374
7375
static int parse_and_tune(regex_t* reg, const UChar* pattern,
7376
  const UChar* pattern_end, ParseEnv *scan_env, Node** rroot,
7377
  OnigErrorInfo* einfo
7378
#ifdef USE_CALL
7379
  , UnsetAddrList* uslist
7380
#endif
7381
)
7382
51.3k
{
7383
51.3k
  int r;
7384
51.3k
  Node* root;
7385
7386
51.3k
  root = NULL_NODE;
7387
51.3k
  if (IS_NOT_NULL(einfo)) {
7388
25.6k
    einfo->enc = reg->enc;
7389
25.6k
    einfo->par = (UChar* )NULL;
7390
25.6k
  }
7391
7392
51.3k
  r = onig_parse_tree(&root, pattern, pattern_end, reg, scan_env);
7393
51.3k
  if (r != 0) goto err;
7394
7395
38.8k
#ifdef USE_WHOLE_OPTIONS
7396
38.8k
  if ((scan_env->flags & PE_FLAG_HAS_WHOLE_OPTIONS) != 0) {
7397
20
    r = check_whole_options_position(root);
7398
20
    if (r != 0) goto err;
7399
20
  }
7400
38.8k
#endif
7401
7402
38.8k
  r = reduce_string_list(root, reg->enc);
7403
38.8k
  if (r != 0) goto err;
7404
7405
  /* mixed use named group and no-named group */
7406
38.7k
  if (scan_env->num_named > 0 &&
7407
38.7k
      IS_SYNTAX_BV(scan_env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
7408
38.7k
      ! OPTON_CAPTURE_GROUP(reg->options)) {
7409
950
    if (scan_env->num_named != scan_env->num_mem)
7410
372
      r = disable_noname_group_capture(&root, reg, scan_env);
7411
578
    else
7412
578
      r = numbered_ref_check(root);
7413
7414
950
    if (r != 0) goto err;
7415
950
  }
7416
7417
38.6k
  r = check_backrefs(root, scan_env);
7418
38.6k
  if (r != 0) goto err;
7419
7420
38.3k
#ifdef USE_CALL
7421
38.3k
  if (scan_env->num_call > 0) {
7422
3.47k
    r = unset_addr_list_init(uslist, scan_env->num_call);
7423
3.47k
    if (r != 0) goto err;
7424
3.47k
    scan_env->unset_addr_list = uslist;
7425
3.47k
    r = tune_call(root, scan_env, 0);
7426
3.47k
    if (r != 0) goto err_unset;
7427
3.10k
    r = tune_call2(root);
7428
3.10k
    if (r != 0) goto err_unset;
7429
3.10k
    r = recursive_call_check_trav(root, scan_env, 0);
7430
3.10k
    if (r  < 0) goto err_unset;
7431
3.10k
    r = infinite_recursive_call_check_trav(root, scan_env);
7432
3.10k
    if (r != 0) goto err_unset;
7433
7434
2.87k
    tune_called_state(root, 0);
7435
2.87k
  }
7436
7437
37.7k
  reg->num_call = scan_env->num_call;
7438
37.7k
#endif
7439
7440
#ifdef ONIG_DEBUG_PARSE
7441
  fprintf(DBGFP, "MAX PARSE DEPTH: %d\n", scan_env->max_parse_depth);
7442
#endif
7443
7444
37.7k
  r = tune_tree(root, reg, 0, scan_env);
7445
37.7k
  if (r != 0) {
7446
#ifdef ONIG_DEBUG_PARSE
7447
    fprintf(DBGFP, "TREE (error in tune)\n");
7448
    print_tree(DBGFP, root);
7449
    fprintf(DBGFP, "\n");
7450
#endif
7451
624
    goto err_unset;
7452
624
  }
7453
7454
37.0k
  if (scan_env->backref_num != 0) {
7455
5.45k
    set_parent_node_trav(root, NULL_NODE);
7456
5.45k
    r = set_empty_repeat_node_trav(root, NULL_NODE, scan_env);
7457
5.45k
    if (r != 0) goto err_unset;
7458
5.45k
    set_empty_status_check_trav(root, scan_env);
7459
5.45k
  }
7460
7461
37.0k
  *rroot = root;
7462
37.0k
  return r;
7463
7464
1.22k
 err_unset:
7465
1.22k
#ifdef USE_CALL
7466
1.22k
  if (scan_env->num_call > 0) {
7467
682
    unset_addr_list_end(uslist);
7468
682
  }
7469
1.22k
#endif
7470
14.2k
 err:
7471
14.2k
  if (IS_NOT_NULL(scan_env->error)) {
7472
2.40k
    if (IS_NOT_NULL(einfo)) {
7473
1.20k
      einfo->par     = scan_env->error;
7474
1.20k
      einfo->par_end = scan_env->error_end;
7475
1.20k
    }
7476
2.40k
  }
7477
7478
14.2k
  onig_node_free(root);
7479
14.2k
  if (IS_NOT_NULL(scan_env->mem_env_dynamic))
7480
84
    xfree(scan_env->mem_env_dynamic);
7481
7482
14.2k
  *rroot = NULL_NODE;
7483
14.2k
  return r;
7484
1.22k
}
7485
7486
extern int
7487
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
7488
             OnigErrorInfo* einfo)
7489
25.6k
{
7490
25.6k
  int r;
7491
25.6k
  Node* root;
7492
25.6k
  ParseEnv scan_env;
7493
25.6k
#ifdef USE_CALL
7494
25.6k
  UnsetAddrList uslist = {0};
7495
25.6k
#endif
7496
7497
#if defined(ONIG_DEBUG_PARSE)  || defined(ONIG_DEBUG_MATCH) || \
7498
    defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE)
7499
  fprintf(DBGFP, "\nPATTERN: /");
7500
  print_enc_string(DBGFP, reg->enc, pattern, pattern_end);
7501
  fprintf(DBGFP, "/\n");
7502
  fprintf(DBGFP, "OPTIONS:");
7503
  print_options(DBGFP, reg->options);
7504
  fprintf(DBGFP, "\n");
7505
#endif
7506
7507
25.6k
  if (reg->ops_alloc == 0) {
7508
25.6k
    r = ops_init(reg, OPS_INIT_SIZE);
7509
25.6k
    if (r != 0) {
7510
0
      if (IS_NOT_NULL(einfo)) {
7511
0
        einfo->enc = reg->enc;
7512
0
        einfo->par = (UChar* )NULL;
7513
0
      }
7514
0
      return r;
7515
0
    }
7516
25.6k
  }
7517
0
  else
7518
0
    reg->ops_used = 0;
7519
7520
25.6k
  r = parse_and_tune(reg, pattern, pattern_end, &scan_env, &root, einfo
7521
25.6k
#ifdef USE_CALL
7522
25.6k
                     , &uslist
7523
25.6k
#endif
7524
25.6k
                    );
7525
25.6k
  if (r != 0) return r;
7526
7527
#ifdef ONIG_DEBUG_PARSE
7528
  fprintf(DBGFP, "TREE (after tune)\n");
7529
  print_tree(DBGFP, root);
7530
  fprintf(DBGFP, "\n");
7531
#endif
7532
7533
18.5k
  reg->capture_history = scan_env.cap_history;
7534
18.5k
  reg->push_mem_start  = scan_env.backtrack_mem | scan_env.cap_history;
7535
7536
18.5k
#ifdef USE_CALLOUT
7537
18.5k
  if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) {
7538
1.11k
    reg->push_mem_end = reg->push_mem_start;
7539
1.11k
  }
7540
17.4k
  else {
7541
17.4k
    if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
7542
136
      reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
7543
17.3k
    else
7544
17.3k
      reg->push_mem_end = reg->push_mem_start &
7545
17.3k
                        (scan_env.backrefed_mem | scan_env.cap_history);
7546
17.4k
  }
7547
#else
7548
  if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
7549
    reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
7550
  else
7551
    reg->push_mem_end = reg->push_mem_start &
7552
                      (scan_env.backrefed_mem | scan_env.cap_history);
7553
#endif
7554
7555
18.5k
  clear_optimize_info(reg);
7556
18.5k
#ifndef ONIG_DONT_OPTIMIZE
7557
18.5k
  r = set_optimize_info_from_tree(root, reg, &scan_env);
7558
18.5k
  if (r != 0)  {
7559
0
#ifdef USE_CALL
7560
0
    if (scan_env.num_call > 0) {
7561
0
      unset_addr_list_end(&uslist);
7562
0
    }
7563
0
#endif
7564
0
    goto err;
7565
0
  }
7566
18.5k
#endif
7567
7568
18.5k
  if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
7569
380
    xfree(scan_env.mem_env_dynamic);
7570
380
    scan_env.mem_env_dynamic = (MemEnv* )NULL;
7571
380
  }
7572
7573
18.5k
  r = compile_tree(root, reg, &scan_env);
7574
18.5k
  if (r == 0) {
7575
18.5k
    if (scan_env.keep_num > 0) {
7576
208
      r = add_op(reg, OP_UPDATE_VAR);
7577
208
      if (r != 0) goto err;
7578
7579
208
      COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;
7580
208
      COP(reg)->update_var.id   = 0; /* not used */
7581
208
      COP(reg)->update_var.clear = FALSE;
7582
208
    }
7583
7584
18.5k
    r = add_op(reg, OP_END);
7585
18.5k
    if (r != 0) goto err;
7586
7587
18.5k
#ifdef USE_CALL
7588
18.5k
    if (scan_env.num_call > 0) {
7589
1.39k
      r = fix_unset_addr_list(&uslist, reg);
7590
1.39k
      unset_addr_list_end(&uslist);
7591
1.39k
      if (r != 0) goto err;
7592
1.39k
    }
7593
18.5k
#endif
7594
7595
18.5k
    r = ops_resize(reg, reg->ops_used);
7596
18.5k
    if (r != ONIG_NORMAL) goto err;
7597
7598
18.5k
    set_addr_in_repeat_range(reg);
7599
7600
18.5k
    if ((reg->push_mem_end != 0)
7601
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
7602
        || (reg->num_repeat      != 0)
7603
        || (reg->num_empty_check != 0)
7604
#endif
7605
18.5k
#ifdef USE_CALLOUT
7606
18.5k
        || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
7607
18.5k
#endif
7608
18.5k
#ifdef USE_CALL
7609
18.5k
        || scan_env.num_call > 0
7610
18.5k
#endif
7611
18.5k
        )
7612
4.61k
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
7613
13.9k
    else {
7614
13.9k
      if (reg->push_mem_start != 0)
7615
521
        reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
7616
13.4k
      else
7617
13.4k
        reg->stack_pop_level = STACK_POP_LEVEL_FREE;
7618
13.9k
    }
7619
7620
18.5k
    r = ops_make_string_pool(reg);
7621
18.5k
    if (r != 0) goto err;
7622
18.5k
  }
7623
0
#ifdef USE_CALL
7624
0
  else if (scan_env.num_call > 0) {
7625
0
    unset_addr_list_end(&uslist);
7626
0
  }
7627
18.5k
#endif
7628
18.5k
  onig_node_free(root);
7629
7630
#ifdef ONIG_DEBUG_COMPILE
7631
  onig_print_names(DBGFP, reg);
7632
  onig_print_compiled_byte_code_list(DBGFP, reg);
7633
#endif
7634
7635
18.5k
#ifdef USE_DIRECT_THREADED_CODE
7636
  /* opcode -> opaddr */
7637
18.5k
  onig_init_for_match_at(reg);
7638
18.5k
#endif
7639
7640
18.5k
  return r;
7641
7642
0
 err:
7643
0
  if (IS_NOT_NULL(scan_env.error)) {
7644
0
    if (IS_NOT_NULL(einfo)) {
7645
0
      einfo->par     = scan_env.error;
7646
0
      einfo->par_end = scan_env.error_end;
7647
0
    }
7648
0
  }
7649
7650
0
  onig_node_free(root);
7651
0
  if (IS_NOT_NULL(scan_env.mem_env_dynamic))
7652
0
      xfree(scan_env.mem_env_dynamic);
7653
0
  return r;
7654
18.5k
}
7655
7656
7657
static int onig_inited = 0;
7658
7659
extern int
7660
onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
7661
              OnigEncoding enc, OnigSyntaxType* syntax)
7662
51.3k
{
7663
51.3k
  int r;
7664
7665
51.3k
  xmemset(reg, 0, sizeof(*reg));
7666
7667
51.3k
  if (onig_inited == 0) {
7668
#if 0
7669
    return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
7670
#else
7671
25.6k
    r = onig_initialize(&enc, 1);
7672
25.6k
    if (r != 0)
7673
0
      return ONIGERR_FAIL_TO_INITIALIZE;
7674
7675
25.6k
    onig_warning("You didn't call onig_initialize() explicitly");
7676
25.6k
#endif
7677
25.6k
  }
7678
7679
51.3k
  if (IS_NULL(reg))
7680
0
    return ONIGERR_INVALID_ARGUMENT;
7681
7682
51.3k
  if (ONIGENC_IS_UNDEF(enc))
7683
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
7684
7685
51.3k
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
7686
51.3k
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
7687
10
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
7688
10
  }
7689
7690
51.3k
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
7691
11.4k
    option |= syntax->options;
7692
11.4k
    option &= ~ONIG_OPTION_SINGLELINE;
7693
11.4k
  }
7694
39.9k
  else
7695
39.9k
    option |= syntax->options;
7696
7697
51.3k
  if ((option & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) {
7698
5.63k
    case_fold_flag &= ~(INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR |
7699
5.63k
                        ONIGENC_CASE_FOLD_TURKISH_AZERI);
7700
5.63k
    case_fold_flag |= ONIGENC_CASE_FOLD_ASCII_ONLY;
7701
5.63k
  }
7702
7703
51.3k
  (reg)->enc            = enc;
7704
51.3k
  (reg)->options        = option;
7705
51.3k
  (reg)->syntax         = syntax;
7706
51.3k
  (reg)->optimize       = 0;
7707
51.3k
  (reg)->exact          = (UChar* )NULL;
7708
51.3k
  (reg)->extp           = (RegexExt* )NULL;
7709
51.3k
  (reg)->ops            = (Operation* )NULL;
7710
51.3k
  (reg)->ops_curr       = (Operation* )NULL;
7711
51.3k
  (reg)->ops_used       = 0;
7712
51.3k
  (reg)->ops_alloc      = 0;
7713
51.3k
  (reg)->name_table     = (void* )NULL;
7714
51.3k
  (reg)->case_fold_flag = case_fold_flag;
7715
51.3k
  return 0;
7716
51.3k
}
7717
7718
extern int
7719
onig_new_without_alloc(regex_t* reg,
7720
                       const UChar* pattern, const UChar* pattern_end,
7721
                       OnigOptionType option, OnigEncoding enc,
7722
                       OnigSyntaxType* syntax, OnigErrorInfo* einfo)
7723
0
{
7724
0
  int r;
7725
7726
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
7727
0
  if (r != 0) return r;
7728
7729
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
7730
0
  return r;
7731
0
}
7732
7733
extern int
7734
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
7735
         OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
7736
         OnigErrorInfo* einfo)
7737
25.6k
{
7738
25.6k
  int r;
7739
7740
25.6k
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
7741
25.6k
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
7742
7743
25.6k
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
7744
25.6k
  if (r != 0) {
7745
5
    xfree(*reg);
7746
5
    *reg = NULL;
7747
5
    return r;
7748
5
  }
7749
7750
25.6k
  r = onig_compile(*reg, pattern, pattern_end, einfo);
7751
25.6k
  if (r != 0) {
7752
7.14k
    onig_free(*reg);
7753
7.14k
    *reg = NULL;
7754
7.14k
  }
7755
25.6k
  return r;
7756
25.6k
}
7757
7758
extern int
7759
onig_initialize(OnigEncoding encodings[], int n)
7760
51.3k
{
7761
51.3k
  int i;
7762
51.3k
  int r;
7763
7764
51.3k
  if (onig_inited != 0)
7765
25.6k
    return 0;
7766
7767
25.6k
  onigenc_init();
7768
7769
25.6k
  onig_inited = 1;
7770
7771
51.3k
  for (i = 0; i < n; i++) {
7772
25.6k
    OnigEncoding enc = encodings[i];
7773
25.6k
    r = onig_initialize_encoding(enc);
7774
25.6k
    if (r != 0)
7775
0
      return r;
7776
25.6k
  }
7777
7778
25.6k
  return ONIG_NORMAL;
7779
25.6k
}
7780
7781
typedef struct EndCallListItem {
7782
  struct EndCallListItem* next;
7783
  void (*func)(void);
7784
} EndCallListItemType;
7785
7786
static EndCallListItemType* EndCallTop;
7787
7788
extern void onig_add_end_call(void (*func)(void))
7789
0
{
7790
0
  EndCallListItemType* item;
7791
7792
0
  item = (EndCallListItemType* )xmalloc(sizeof(*item));
7793
0
  if (item == 0) return ;
7794
7795
0
  item->next = EndCallTop;
7796
0
  item->func = func;
7797
7798
0
  EndCallTop = item;
7799
0
}
7800
7801
static void
7802
exec_end_call_list(void)
7803
25.6k
{
7804
25.6k
  EndCallListItemType* prev;
7805
25.6k
  void (*func)(void);
7806
7807
25.6k
  while (EndCallTop != 0) {
7808
0
    func = EndCallTop->func;
7809
0
    (*func)();
7810
7811
0
    prev = EndCallTop;
7812
0
    EndCallTop = EndCallTop->next;
7813
0
    xfree(prev);
7814
0
  }
7815
25.6k
}
7816
7817
extern int
7818
onig_end(void)
7819
25.6k
{
7820
25.6k
  exec_end_call_list();
7821
7822
25.6k
#ifdef USE_CALLOUT
7823
25.6k
  onig_global_callout_names_free();
7824
25.6k
#endif
7825
7826
25.6k
  onigenc_end();
7827
7828
25.6k
  onig_inited = 0;
7829
7830
25.6k
  return 0;
7831
25.6k
}
7832
7833
extern int
7834
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
7835
5.47M
{
7836
5.47M
  OnigCodePoint n, *data;
7837
5.47M
  OnigCodePoint low, high, x;
7838
7839
5.47M
  GET_CODE_POINT(n, p);
7840
5.47M
  data = (OnigCodePoint* )p;
7841
5.47M
  data++;
7842
7843
26.5M
  for (low = 0, high = n; low < high; ) {
7844
21.1M
    x = (low + high) >> 1;
7845
21.1M
    if (code > data[x * 2 + 1])
7846
9.83M
      low = x + 1;
7847
11.2M
    else
7848
11.2M
      high = x;
7849
21.1M
  }
7850
7851
5.47M
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
7852
5.47M
}
7853
7854
extern int
7855
onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
7856
9.74M
{
7857
9.74M
  int found;
7858
9.74M
  CClassNode* cc = (CClassNode* )cc_arg;
7859
7860
9.74M
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
7861
9.01M
    if (IS_NULL(cc->mbuf)) {
7862
3.59M
      found = 0;
7863
3.59M
    }
7864
5.41M
    else {
7865
5.41M
      found = onig_is_in_code_range(cc->mbuf->p, code) != 0;
7866
5.41M
    }
7867
9.01M
  }
7868
730k
  else {
7869
730k
    found = BITSET_AT(cc->bs, code) != 0;
7870
730k
  }
7871
7872
9.74M
  if (IS_NCCLASS_NOT(cc))
7873
668k
    return !found;
7874
9.07M
  else
7875
9.07M
    return found;
7876
9.74M
}
7877
7878
extern int
7879
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
7880
9.74M
{
7881
9.74M
  int len;
7882
7883
9.74M
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
7884
420k
    len = 2;
7885
420k
  }
7886
9.32M
  else {
7887
9.32M
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
7888
9.32M
    if (len < 0) return 0;
7889
9.32M
  }
7890
9.74M
  return onig_is_code_in_cc_len(len, code, cc);
7891
9.74M
}
7892
7893
7894
4.94k
#define MANY_REPEAT_OF_ANYCHAR   20
7895
7896
typedef enum {
7897
  MJ_NO     = 0,
7898
  MJ_YES    = 1,
7899
  MJ_IGNORE = 2,
7900
} MJ_RESULT;
7901
7902
static MJ_RESULT
7903
mostly_just_anychar(Node* node, int in_reluctant)
7904
497k
{
7905
497k
  MJ_RESULT r;
7906
7907
497k
  r = MJ_NO;
7908
497k
  switch (ND_TYPE(node)) {
7909
94.2k
  case ND_LIST:
7910
94.2k
    {
7911
94.2k
      int found = FALSE;
7912
197k
      do {
7913
197k
        r = mostly_just_anychar(ND_CAR(node), in_reluctant);
7914
197k
        if (r == MJ_NO) break;
7915
106k
        if (r == MJ_YES) found = TRUE;
7916
106k
      } while (IS_NOT_NULL(node = ND_CDR(node)));
7917
94.2k
      if (r == MJ_IGNORE) {
7918
3.10k
        if (found == TRUE) r = MJ_YES;
7919
3.10k
      }
7920
94.2k
    }
7921
0
    break;
7922
7923
13.9k
  case ND_ALT:
7924
13.9k
    r = MJ_IGNORE;
7925
32.4k
    do {
7926
32.4k
      r = mostly_just_anychar(ND_CAR(node), in_reluctant);
7927
32.4k
      if (r == MJ_YES) break;
7928
32.4k
    } while (IS_NOT_NULL(node = ND_CDR(node)));
7929
0
    break;
7930
7931
16.2k
  case ND_QUANT:
7932
16.2k
    {
7933
16.2k
      QuantNode* qn = QUANT_(node);
7934
7935
16.2k
      if (qn->upper == 0)
7936
582
        r = MJ_IGNORE;
7937
15.6k
      else {
7938
15.6k
        if (in_reluctant == FALSE) {
7939
13.7k
          if (qn->greedy != 0 &&
7940
13.7k
              (! IS_INFINITE_REPEAT(qn->upper) &&
7941
12.9k
               qn->upper <= MANY_REPEAT_OF_ANYCHAR)) {
7942
3.78k
            in_reluctant = TRUE;
7943
3.78k
          }
7944
13.7k
        }
7945
15.6k
        r = mostly_just_anychar(ND_BODY(node), in_reluctant);
7946
15.6k
      }
7947
16.2k
    }
7948
16.2k
    break;
7949
7950
50.3k
  case ND_ANCHOR:
7951
50.3k
    switch (ANCHOR_(node)->type) {
7952
28.5k
    case ANCR_PREC_READ:
7953
29.9k
    case ANCR_PREC_READ_NOT:
7954
30.5k
    case ANCR_LOOK_BEHIND:
7955
30.9k
    case ANCR_LOOK_BEHIND_NOT:
7956
40.8k
    case ANCR_TEXT_SEGMENT_BOUNDARY: /* \y */
7957
40.8k
      r = MJ_IGNORE;
7958
40.8k
      break;
7959
9.52k
    default:
7960
9.52k
      break;
7961
50.3k
    }
7962
50.3k
    break;
7963
7964
120k
  case ND_BAG:
7965
120k
    {
7966
120k
      BagNode* en = BAG_(node);
7967
7968
120k
      if (en->type == BAG_IF_ELSE) {
7969
3.32k
        if (IS_NOT_NULL(en->te.Then)) {
7970
2.71k
          r = mostly_just_anychar(en->te.Then, in_reluctant);
7971
2.71k
          if (r == MJ_YES) break;
7972
2.71k
        }
7973
2.75k
        if (IS_NOT_NULL(en->te.Else)) {
7974
1.25k
          r = mostly_just_anychar(en->te.Else, in_reluctant);
7975
1.25k
        }
7976
2.75k
      }
7977
117k
      else {
7978
117k
        r = mostly_just_anychar(ND_BODY(node), in_reluctant);
7979
117k
      }
7980
120k
    }
7981
120k
    break;
7982
7983
120k
  case ND_CTYPE:
7984
12.9k
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
7985
11.1k
      r = MJ_YES;
7986
1.83k
    else
7987
1.83k
      r = MJ_NO;
7988
12.9k
    break;
7989
7990
117k
  case ND_STRING:
7991
117k
    if (ND_STRING_LEN(node) == 0) {
7992
10.2k
      r = MJ_IGNORE;
7993
10.2k
      break;
7994
10.2k
    }
7995
    /* fall */
7996
123k
  case ND_CCLASS:
7997
123k
    r = MJ_NO;
7998
123k
    break;
7999
8000
0
#ifdef USE_CALL
8001
46.6k
  case ND_CALL:
8002
    /* ignore call */
8003
46.6k
#endif
8004
47.8k
  case ND_BACKREF:
8005
55.8k
  case ND_GIMMICK:
8006
55.8k
    r = MJ_IGNORE;
8007
55.8k
    break;
8008
8009
0
  default:
8010
0
    break;
8011
497k
  }
8012
8013
497k
  return r;
8014
497k
}
8015
8016
488k
#define MAX_CALLS_IN_DETECT   10
8017
8018
typedef struct {
8019
  int prec_read;
8020
  int look_behind;
8021
  int backref;
8022
  int backref_with_level;
8023
  int call;
8024
  int is_keep;
8025
  int anychar_reluctant_many;
8026
  int empty_check_nest_level;
8027
  int max_empty_check_nest_level;
8028
  int heavy_element;
8029
} SlowElementCount;
8030
8031
static int
8032
detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
8033
11.5M
{
8034
11.5M
  int r;
8035
8036
11.5M
  r = 0;
8037
11.5M
  switch (ND_TYPE(node)) {
8038
1.48M
  case ND_LIST:
8039
1.97M
  case ND_ALT:
8040
9.36M
    do {
8041
9.36M
      r = detect_can_be_slow(ND_CAR(node), ct, ncall, calls);
8042
9.36M
      if (r != 0) return r;
8043
9.36M
    } while (IS_NOT_NULL(node = ND_CDR(node)));
8044
1.97M
    break;
8045
8046
1.97M
  case ND_QUANT:
8047
151k
    {
8048
151k
      QuantNode* qn;
8049
151k
      Node* body;
8050
151k
      int prev_heavy_element = 0;
8051
8052
151k
      qn = QUANT_(node);
8053
151k
      body = ND_BODY(node);
8054
8055
151k
      if (qn->emptiness != BODY_IS_NOT_EMPTY) {
8056
17.4k
        prev_heavy_element = ct->heavy_element;
8057
17.4k
        ct->empty_check_nest_level++;
8058
17.4k
        if (ct->empty_check_nest_level > ct->max_empty_check_nest_level)
8059
4.90k
          ct->max_empty_check_nest_level = ct->empty_check_nest_level;
8060
17.4k
      }
8061
134k
      else if (IS_INFINITE_REPEAT(qn->upper) || qn->upper > 0) {
8062
130k
        MJ_RESULT mr = mostly_just_anychar(body, (qn->greedy == 0));
8063
130k
        if (mr == MJ_YES)
8064
9.11k
          ct->anychar_reluctant_many++;
8065
130k
      }
8066
8067
151k
      r = detect_can_be_slow(body, ct, ncall, calls);
8068
8069
151k
      if (qn->emptiness != BODY_IS_NOT_EMPTY) {
8070
17.4k
        if (ND_IS_INPEEK(node)) {
8071
6.62k
          if (ct->empty_check_nest_level > 2) {
8072
1.90k
            if (prev_heavy_element == ct->heavy_element)
8073
1.15k
              ct->heavy_element++;
8074
1.90k
          }
8075
6.62k
        }
8076
17.4k
        ct->empty_check_nest_level--;
8077
17.4k
      }
8078
151k
    }
8079
151k
    break;
8080
8081
366k
  case ND_ANCHOR:
8082
366k
    switch (ANCHOR_(node)->type) {
8083
254k
    case ANCR_PREC_READ:
8084
287k
    case ANCR_PREC_READ_NOT:
8085
287k
      ct->prec_read++;
8086
287k
      break;
8087
3.99k
    case ANCR_LOOK_BEHIND:
8088
7.28k
    case ANCR_LOOK_BEHIND_NOT:
8089
7.28k
      ct->look_behind++;
8090
7.28k
      break;
8091
71.7k
    default:
8092
71.7k
      break;
8093
366k
    }
8094
8095
366k
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
8096
295k
      r = detect_can_be_slow(ND_BODY(node), ct, ncall, calls);
8097
366k
    break;
8098
8099
1.20M
  case ND_BAG:
8100
1.20M
    {
8101
1.20M
      BagNode* en = BAG_(node);
8102
8103
1.20M
      r = detect_can_be_slow(ND_BODY(node), ct, ncall, calls);
8104
1.20M
      if (r != 0) return r;
8105
8106
1.20M
      if (en->type == BAG_IF_ELSE) {
8107
10.5k
        if (IS_NOT_NULL(en->te.Then)) {
8108
6.43k
          r = detect_can_be_slow(en->te.Then, ct, ncall, calls);
8109
6.43k
          if (r != 0) return r;
8110
6.43k
        }
8111
10.5k
        if (IS_NOT_NULL(en->te.Else)) {
8112
5.53k
          r = detect_can_be_slow(en->te.Else, ct, ncall, calls);
8113
5.53k
          if (r != 0) return r;
8114
5.53k
        }
8115
10.5k
      }
8116
1.20M
    }
8117
1.20M
    break;
8118
8119
1.20M
#ifdef USE_BACKREF_WITH_LEVEL
8120
1.20M
  case ND_BACKREF:
8121
78.7k
    if (ND_IS_NEST_LEVEL(node))
8122
788
      ct->heavy_element++;
8123
77.9k
    else
8124
77.9k
      ct->backref++;
8125
78.7k
    break;
8126
0
#endif
8127
8128
0
#ifdef USE_CALL
8129
3.72M
  case ND_CALL:
8130
3.72M
    {
8131
3.72M
      int i;
8132
3.72M
      int found;
8133
3.72M
      int gnum;
8134
8135
3.72M
      gnum = CALL_(node)->called_gnum;
8136
3.72M
      ct->call++;
8137
8138
3.72M
      if (ND_IS_RECURSION(node) && ND_IS_INPEEK(node) &&
8139
3.72M
          ND_IS_IN_REAL_REPEAT(node)) {
8140
3.30M
         ct->heavy_element += 10;
8141
3.30M
      }
8142
8143
3.72M
      found = FALSE;
8144
13.7M
      for (i = 0; i < ncall; i++) {
8145
13.2M
        if (gnum == calls[i]) {
8146
3.23M
          found = TRUE;
8147
3.23M
          break;
8148
3.23M
        }
8149
13.2M
      }
8150
8151
3.72M
      if (! found) {
8152
488k
        if (ncall + 1 < MAX_CALLS_IN_DETECT) {
8153
483k
          calls[ncall] = gnum;
8154
483k
          r = detect_can_be_slow(ND_BODY(node), ct, ncall + 1, calls);
8155
483k
        }
8156
4.77k
        else {
8157
4.77k
          ct->heavy_element++;
8158
4.77k
        }
8159
488k
      }
8160
3.72M
    }
8161
3.72M
    break;
8162
0
#endif
8163
28.0k
  case ND_GIMMICK:
8164
28.0k
    {
8165
28.0k
      GimmickNode* g = GIMMICK_(node);
8166
28.0k
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
8167
2.02k
        ct->is_keep = TRUE;
8168
28.0k
    }
8169
28.0k
    break;
8170
8171
4.00M
  default:
8172
4.00M
    break;
8173
11.5M
  }
8174
8175
11.5M
  return r;
8176
11.5M
}
8177
8178
extern int
8179
onig_detect_can_be_slow_pattern(const UChar* pattern,
8180
  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
8181
  OnigSyntaxType* syntax)
8182
25.6k
{
8183
25.6k
  int r;
8184
25.6k
  regex_t* reg;
8185
25.6k
  Node* root;
8186
25.6k
  ParseEnv scan_env;
8187
25.6k
  SlowElementCount count;
8188
25.6k
  int calls[MAX_CALLS_IN_DETECT];
8189
25.6k
#ifdef USE_CALL
8190
25.6k
  UnsetAddrList  uslist = {0};
8191
25.6k
#endif
8192
8193
25.6k
  reg = (regex_t* )xmalloc(sizeof(regex_t));
8194
25.6k
  if (IS_NULL(reg)) return ONIGERR_MEMORY;
8195
8196
25.6k
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
8197
25.6k
  if (r != 0) {
8198
5
    xfree(reg);
8199
5
    return r;
8200
5
  }
8201
8202
25.6k
  r = parse_and_tune(reg, pattern, pattern_end, &scan_env, &root, NULL
8203
25.6k
#ifdef USE_CALL
8204
25.6k
                     , &uslist
8205
25.6k
#endif
8206
25.6k
                    );
8207
25.6k
  if (r != 0) goto err;
8208
8209
18.5k
#ifdef USE_CALL
8210
18.5k
  if (scan_env.num_call > 0) {
8211
1.39k
    unset_addr_list_end(&uslist);
8212
1.39k
  }
8213
18.5k
#endif
8214
8215
18.5k
  count.prec_read          = 0;
8216
18.5k
  count.look_behind        = 0;
8217
18.5k
  count.backref            = 0;
8218
18.5k
  count.backref_with_level = 0;
8219
18.5k
  count.call               = 0;
8220
18.5k
  count.is_keep            = FALSE;
8221
18.5k
  count.anychar_reluctant_many     = 0;
8222
18.5k
  count.empty_check_nest_level     = 0;
8223
18.5k
  count.max_empty_check_nest_level = 0;
8224
18.5k
  count.heavy_element = 0;
8225
8226
18.5k
  r = detect_can_be_slow(root, &count, 0, calls);
8227
18.5k
  if (r == 0) {
8228
18.5k
    int n;
8229
8230
18.5k
    n = count.prec_read + count.look_behind
8231
18.5k
      + count.backref + count.backref_with_level + count.call
8232
18.5k
      + count.anychar_reluctant_many;
8233
8234
18.5k
    if (count.is_keep) count.max_empty_check_nest_level++;
8235
8236
18.5k
    if (count.max_empty_check_nest_level > 2)
8237
456
      n += count.max_empty_check_nest_level - 2;
8238
18.5k
    if (count.heavy_element != 0) {
8239
503
      if (count.heavy_element < 0x10000)
8240
491
        n += count.heavy_element << 8;
8241
12
      else
8242
12
        n += count.heavy_element;
8243
503
    }
8244
8245
18.5k
    r = n;
8246
8247
#ifdef ONIG_DEBUG_PARSE
8248
    fprintf(DBGFP, "-- detect can be slow --\n");
8249
    fprintf(DBGFP, "  prec_read:          %d\n", count.prec_read);
8250
    fprintf(DBGFP, "  look_behind:        %d\n", count.look_behind);
8251
    fprintf(DBGFP, "  backref:            %d\n", count.backref);
8252
    fprintf(DBGFP, "  backref_with_level: %d\n", count.backref_with_level);
8253
    fprintf(DBGFP, "  call:               %d\n", count.call);
8254
    fprintf(DBGFP, "  is_keep:            %d\n", count.is_keep);
8255
    fprintf(DBGFP, "  any_reluctant_many: %d\n", count.anychar_reluctant_many);
8256
    fprintf(DBGFP, "  max_empty_check_nest_level: %d\n", count.max_empty_check_nest_level);
8257
    fprintf(DBGFP, "  heavy_element:      %d\n", count.heavy_element);
8258
    fprintf(DBGFP, "  r:                  %d\n", r);
8259
    fprintf(DBGFP, "\n");
8260
#endif
8261
18.5k
  }
8262
8263
18.5k
  if (IS_NOT_NULL(scan_env.mem_env_dynamic))
8264
380
    xfree(scan_env.mem_env_dynamic);
8265
8266
25.6k
 err:
8267
25.6k
  onig_node_free(root);
8268
25.6k
  onig_free(reg);
8269
25.6k
  return r;
8270
18.5k
}
8271
8272
8273
#ifdef ONIG_DEBUG_PARSE
8274
8275
#ifdef USE_CALL
8276
static void
8277
p_string(FILE* f, int len, UChar* s)
8278
{
8279
  fputs(":", f);
8280
  while (len-- > 0) { fputc(*s++, f); }
8281
}
8282
#endif
8283
8284
static void
8285
Indent(FILE* f, int indent)
8286
{
8287
  int i;
8288
  for (i = 0; i < indent; i++) putc(' ', f);
8289
}
8290
8291
static void
8292
print_indent_tree(FILE* f, Node* node, int indent)
8293
{
8294
  static char* emptiness_name[] = { "", " empty", " empty_mem", " empty_rec" };
8295
8296
  int i;
8297
  NodeType type;
8298
  UChar* p;
8299
  int add = 3;
8300
8301
  Indent(f, indent);
8302
  if (IS_NULL(node)) {
8303
    fprintf(f, "ERROR: null node!!!\n");
8304
    exit(0);
8305
  }
8306
8307
  type = ND_TYPE(node);
8308
  switch (type) {
8309
  case ND_LIST:
8310
  case ND_ALT:
8311
    if (type == ND_LIST)
8312
      fprintf(f, "<list:%p>\n", node);
8313
    else
8314
      fprintf(f, "<alt:%p>\n", node);
8315
8316
    print_indent_tree(f, ND_CAR(node), indent + add);
8317
    while (IS_NOT_NULL(node = ND_CDR(node))) {
8318
      if (ND_TYPE(node) != type) {
8319
        fprintf(f, "ERROR: list/alt right is not a cons. %d\n", ND_TYPE(node));
8320
        exit(0);
8321
      }
8322
      print_indent_tree(f, ND_CAR(node), indent + add);
8323
    }
8324
    break;
8325
8326
  case ND_STRING:
8327
    {
8328
      char* str;
8329
      char* mode;
8330
8331
      if (ND_STRING_IS_CRUDE(node))
8332
        mode = "-crude";
8333
      else if (ND_IS_IGNORECASE(node))
8334
        mode = "-ignorecase";
8335
      else
8336
        mode = "";
8337
8338
      if (STR_(node)->s == STR_(node)->end)
8339
        str = "empty-string";
8340
      else
8341
        str = "string";
8342
8343
      fprintf(f, "<%s%s:%p>", str, mode, node);
8344
      for (p = STR_(node)->s; p < STR_(node)->end; p++) {
8345
        if (*p >= 0x20 && *p < 0x7f)
8346
          fputc(*p, f);
8347
        else {
8348
          fprintf(f, " 0x%02x", *p);
8349
        }
8350
      }
8351
    }
8352
    break;
8353
8354
  case ND_CCLASS:
8355
#define CCLASS_MBUF_MAX_OUTPUT_NUM   10
8356
8357
    fprintf(f, "<cclass:%p>", node);
8358
    if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
8359
    if (CCLASS_(node)->mbuf) {
8360
      BBuf* bbuf = CCLASS_(node)->mbuf;
8361
      fprintf(f, " mbuf(%u) ", bbuf->used);
8362
      for (i = 0; i < bbuf->used && i < CCLASS_MBUF_MAX_OUTPUT_NUM; i++) {
8363
        if (i > 0) fprintf(f, ",");
8364
        fprintf(f, "%0x", bbuf->p[i]);
8365
      }
8366
      if (i < bbuf->used) fprintf(f, "...");
8367
    }
8368
    break;
8369
8370
  case ND_CTYPE:
8371
    fprintf(f, "<ctype:%p> ", node);
8372
    switch (CTYPE_(node)->ctype) {
8373
    case CTYPE_ANYCHAR:
8374
      fprintf(f, "anychar");
8375
      break;
8376
8377
    case ONIGENC_CTYPE_WORD:
8378
      if (CTYPE_(node)->not != 0)
8379
        fputs("not word", f);
8380
      else
8381
        fputs("word",     f);
8382
8383
      if (CTYPE_(node)->ascii_mode != 0)
8384
        fputs(" (ascii)", f);
8385
8386
      break;
8387
8388
    default:
8389
      fprintf(f, "ERROR: undefined ctype.\n");
8390
      exit(0);
8391
    }
8392
    break;
8393
8394
  case ND_ANCHOR:
8395
    fprintf(f, "<anchor:%p> ", node);
8396
    switch (ANCHOR_(node)->type) {
8397
    case ANCR_BEGIN_BUF:        fputs("begin buf",      f); break;
8398
    case ANCR_END_BUF:          fputs("end buf",        f); break;
8399
    case ANCR_BEGIN_LINE:       fputs("begin line",     f); break;
8400
    case ANCR_END_LINE:         fputs("end line",       f); break;
8401
    case ANCR_SEMI_END_BUF:     fputs("semi end buf",   f); break;
8402
    case ANCR_BEGIN_POSITION:   fputs("begin position", f); break;
8403
8404
    case ANCR_WORD_BOUNDARY:    fputs("word boundary",     f); break;
8405
    case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
8406
#ifdef USE_WORD_BEGIN_END
8407
    case ANCR_WORD_BEGIN:       fputs("word begin", f);     break;
8408
    case ANCR_WORD_END:         fputs("word end", f);       break;
8409
#endif
8410
    case ANCR_TEXT_SEGMENT_BOUNDARY:
8411
      fputs("text-segment boundary", f); break;
8412
    case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
8413
      fputs("no text-segment boundary", f); break;
8414
    case ANCR_PREC_READ:
8415
      fprintf(f, "prec read\n");
8416
      print_indent_tree(f, ND_BODY(node), indent + add);
8417
      break;
8418
    case ANCR_PREC_READ_NOT:
8419
      fprintf(f, "prec read not\n");
8420
      print_indent_tree(f, ND_BODY(node), indent + add);
8421
      break;
8422
    case ANCR_LOOK_BEHIND:
8423
      fprintf(f, "look behind\n");
8424
      print_indent_tree(f, ND_BODY(node), indent + add);
8425
      break;
8426
    case ANCR_LOOK_BEHIND_NOT:
8427
      fprintf(f, "look behind not\n");
8428
      print_indent_tree(f, ND_BODY(node), indent + add);
8429
      break;
8430
8431
    default:
8432
      fprintf(f, "ERROR: undefined anchor type.\n");
8433
      break;
8434
    }
8435
    break;
8436
8437
  case ND_BACKREF:
8438
    {
8439
      int* p;
8440
      BackRefNode* br = BACKREF_(node);
8441
      p = BACKREFS_P(br);
8442
      fprintf(f, "<backref%s:%p>", ND_IS_CHECKER(node) ? "-checker" : "", node);
8443
      for (i = 0; i < br->back_num; i++) {
8444
        if (i > 0) fputs(", ", f);
8445
        fprintf(f, "%d", p[i]);
8446
      }
8447
#ifdef USE_BACKREF_WITH_LEVEL
8448
      if (ND_IS_NEST_LEVEL(node)) {
8449
        fprintf(f, ", level: %d", br->nest_level);
8450
      }
8451
#endif
8452
    }
8453
    break;
8454
8455
#ifdef USE_CALL
8456
  case ND_CALL:
8457
    {
8458
      CallNode* cn = CALL_(node);
8459
      fprintf(f, "<call:%p>", node);
8460
      fprintf(f, " num: %d, name", cn->called_gnum);
8461
      p_string(f, cn->name_end - cn->name, cn->name);
8462
      if (ND_IS_RECURSION(node)) fprintf(f, ", recursion");
8463
      if (ND_IS_INPEEK(node))    fprintf(f, ", in-peek");
8464
      if (ND_IS_IN_REAL_REPEAT(node)) fprintf(f, ", in-real-repeat");
8465
    }
8466
    break;
8467
#endif
8468
8469
  case ND_QUANT:
8470
    {
8471
      fprintf(f, "<quantifier:%p>{%d,%d}%s%s%s", node,
8472
              QUANT_(node)->lower, QUANT_(node)->upper,
8473
              (QUANT_(node)->greedy ? "" : "?"),
8474
              QUANT_(node)->include_referred == 0 ? "" : " referred",
8475
              emptiness_name[QUANT_(node)->emptiness]);
8476
      if (ND_IS_INPEEK(node)) fprintf(f, ", in-peek");
8477
      fprintf(f, "\n");
8478
      print_indent_tree(f, ND_BODY(node), indent + add);
8479
    }
8480
    break;
8481
8482
  case ND_BAG:
8483
    {
8484
      BagNode* bn = BAG_(node);
8485
      fprintf(f, "<bag:%p> ", node);
8486
      if (bn->type == BAG_IF_ELSE) {
8487
        Node* Then;
8488
        Node* Else;
8489
8490
        fprintf(f, "if-else\n");
8491
        print_indent_tree(f, ND_BODY(node), indent + add);
8492
8493
        Then = bn->te.Then;
8494
        Else = bn->te.Else;
8495
        if (IS_NULL(Then)) {
8496
          Indent(f, indent + add);
8497
          fprintf(f, "THEN empty\n");
8498
        }
8499
        else
8500
          print_indent_tree(f, Then, indent + add);
8501
8502
        if (IS_NULL(Else)) {
8503
          Indent(f, indent + add);
8504
          fprintf(f, "ELSE empty\n");
8505
        }
8506
        else
8507
          print_indent_tree(f, Else, indent + add);
8508
      }
8509
      else {
8510
        switch (bn->type) {
8511
        case BAG_OPTION:
8512
          fprintf(f, "option:%d", bn->o.options);
8513
          break;
8514
        case BAG_MEMORY:
8515
          fprintf(f, "memory:%d", bn->m.regnum);
8516
          if (ND_IS_CALLED(node)) {
8517
            fprintf(f, ", called");
8518
            if (ND_IS_RECURSION(node))
8519
              fprintf(f, ", recursion");
8520
          }
8521
          else if (ND_IS_REFERENCED(node))
8522
            fprintf(f, ", referenced");
8523
8524
          if (ND_IS_FIXED_ADDR(node))
8525
            fprintf(f, ", fixed-addr");
8526
          if ((bn->m.called_state & IN_PEEK) != 0)
8527
            fprintf(f, ", in-peek");
8528
          break;
8529
        case BAG_STOP_BACKTRACK:
8530
          fprintf(f, "stop-bt");
8531
          break;
8532
        default:
8533
          break;
8534
        }
8535
        fprintf(f, "\n");
8536
        print_indent_tree(f, ND_BODY(node), indent + add);
8537
      }
8538
    }
8539
    break;
8540
8541
  case ND_GIMMICK:
8542
    fprintf(f, "<gimmick:%p> ", node);
8543
    switch (GIMMICK_(node)->type) {
8544
    case GIMMICK_FAIL:
8545
      fprintf(f, "fail");
8546
      break;
8547
    case GIMMICK_SAVE:
8548
      fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
8549
      break;
8550
    case GIMMICK_UPDATE_VAR:
8551
      fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
8552
      break;
8553
#ifdef USE_CALLOUT
8554
    case GIMMICK_CALLOUT:
8555
      switch (GIMMICK_(node)->detail_type) {
8556
      case ONIG_CALLOUT_OF_CONTENTS:
8557
        fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
8558
        break;
8559
      case ONIG_CALLOUT_OF_NAME:
8560
        fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
8561
        break;
8562
      }
8563
#endif
8564
    }
8565
    break;
8566
8567
  default:
8568
    fprintf(f, "print_indent_tree: undefined node type %d\n", ND_TYPE(node));
8569
    break;
8570
  }
8571
8572
  if (type != ND_LIST && type != ND_ALT && type != ND_QUANT &&
8573
      type != ND_BAG)
8574
    fprintf(f, "\n");
8575
  fflush(f);
8576
}
8577
8578
static void
8579
print_tree(FILE* f, Node* node)
8580
{
8581
  print_indent_tree(f, node, 0);
8582
}
8583
#endif