Coverage Report

Created: 2023-03-26 07:01

/src/fluent-bit/lib/onigmo/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regparse.h"
32
33
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35
extern OnigCaseFoldType
36
onig_get_default_case_fold_flag(void)
37
0
{
38
0
  return OnigDefaultCaseFoldFlag;
39
0
}
40
41
extern int
42
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43
0
{
44
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
45
0
  return 0;
46
0
}
47
48
49
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51
#endif
52
53
#if 0
54
static UChar*
55
str_dup(UChar* s, UChar* end)
56
{
57
  ptrdiff_t len = end - s;
58
59
  if (len > 0) {
60
    UChar* r = (UChar* )xmalloc(len + 1);
61
    CHECK_NULL_RETURN(r);
62
    xmemcpy(r, s, len);
63
    r[len] = (UChar )0;
64
    return r;
65
  }
66
  else return NULL;
67
}
68
#endif
69
70
static void
71
swap_node(Node* a, Node* b)
72
754k
{
73
754k
  Node c;
74
754k
  c = *a; *a = *b; *b = c;
75
76
754k
  if (NTYPE(a) == NT_STR) {
77
34.3k
    StrNode* sn = NSTR(a);
78
34.3k
    if (sn->capa == 0) {
79
33.6k
      size_t len = sn->end - sn->s;
80
33.6k
      sn->s   = sn->buf;
81
33.6k
      sn->end = sn->s + len;
82
33.6k
    }
83
34.3k
  }
84
85
754k
  if (NTYPE(b) == NT_STR) {
86
45.6k
    StrNode* sn = NSTR(b);
87
45.6k
    if (sn->capa == 0) {
88
42.6k
      size_t len = sn->end - sn->s;
89
42.6k
      sn->s   = sn->buf;
90
42.6k
      sn->end = sn->s + len;
91
42.6k
    }
92
45.6k
  }
93
754k
}
94
95
static OnigDistance
96
distance_add(OnigDistance d1, OnigDistance d2)
97
25.1M
{
98
25.1M
  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99
9.25M
    return ONIG_INFINITE_DISTANCE;
100
15.9M
  else {
101
15.9M
    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102
615
    else return ONIG_INFINITE_DISTANCE;
103
15.9M
  }
104
25.1M
}
105
106
static OnigDistance
107
distance_multiply(OnigDistance d, int m)
108
6.66M
{
109
6.66M
  if (m == 0) return 0;
110
111
5.22M
  if (d < ONIG_INFINITE_DISTANCE / m)
112
5.21M
    return d * m;
113
16.1k
  else
114
16.1k
    return ONIG_INFINITE_DISTANCE;
115
5.22M
}
116
117
static int
118
bitset_is_empty(BitSetRef bs)
119
15.1M
{
120
15.1M
  int i;
121
126M
  for (i = 0; i < BITSET_SIZE; i++) {
122
112M
    if (bs[i] != 0) return 0;
123
112M
  }
124
13.8M
  return 1;
125
15.1M
}
126
127
#ifdef ONIG_DEBUG
128
static int
129
bitset_on_num(BitSetRef bs)
130
{
131
  int i, n;
132
133
  n = 0;
134
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135
    if (BITSET_AT(bs, i)) n++;
136
  }
137
  return n;
138
}
139
#endif
140
141
extern int
142
onig_bbuf_init(BBuf* buf, OnigDistance size)
143
3.31M
{
144
3.31M
  if (size <= 0) {
145
0
    size   = 0;
146
0
    buf->p = NULL;
147
0
  }
148
3.31M
  else {
149
3.31M
    buf->p = (UChar* )xmalloc(size);
150
3.31M
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151
3.31M
  }
152
153
3.31M
  buf->alloc = (unsigned int )size;
154
3.31M
  buf->used  = 0;
155
3.31M
  return 0;
156
3.31M
}
157
158
159
#ifdef USE_SUBEXP_CALL
160
161
static int
162
unset_addr_list_init(UnsetAddrList* uslist, int size)
163
2.74k
{
164
2.74k
  UnsetAddr* p;
165
166
2.74k
  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167
2.74k
  CHECK_NULL_RETURN_MEMERR(p);
168
2.74k
  uslist->num   = 0;
169
2.74k
  uslist->alloc = size;
170
2.74k
  uslist->us    = p;
171
2.74k
  return 0;
172
2.74k
}
173
174
static void
175
unset_addr_list_end(UnsetAddrList* uslist)
176
2.74k
{
177
2.74k
  if (IS_NOT_NULL(uslist->us))
178
2.74k
    xfree(uslist->us);
179
2.74k
}
180
181
static int
182
unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183
14.0k
{
184
14.0k
  UnsetAddr* p;
185
14.0k
  int size;
186
187
14.0k
  if (uslist->num >= uslist->alloc) {
188
2.34k
    size = uslist->alloc * 2;
189
2.34k
    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190
2.34k
    CHECK_NULL_RETURN_MEMERR(p);
191
2.34k
    uslist->alloc = size;
192
2.34k
    uslist->us    = p;
193
2.34k
  }
194
195
14.0k
  uslist->us[uslist->num].offset = offset;
196
14.0k
  uslist->us[uslist->num].target = node;
197
14.0k
  uslist->num++;
198
14.0k
  return 0;
199
14.0k
}
200
#endif /* USE_SUBEXP_CALL */
201
202
203
static int
204
add_opcode(regex_t* reg, int opcode)
205
18.8M
{
206
18.8M
  BBUF_ADD1(reg, opcode);
207
18.8M
  return 0;
208
18.8M
}
209
210
#ifdef USE_COMBINATION_EXPLOSION_CHECK
211
static int
212
add_state_check_num(regex_t* reg, int num)
213
{
214
  StateCheckNumType n = (StateCheckNumType )num;
215
216
  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217
  return 0;
218
}
219
#endif
220
221
static int
222
add_rel_addr(regex_t* reg, int addr)
223
7.51M
{
224
7.51M
  RelAddrType ra = (RelAddrType )addr;
225
226
7.51M
  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227
7.51M
  return 0;
228
7.51M
}
229
230
static int
231
add_abs_addr(regex_t* reg, int addr)
232
29.6k
{
233
29.6k
  AbsAddrType ra = (AbsAddrType )addr;
234
235
29.6k
  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236
29.6k
  return 0;
237
29.6k
}
238
239
static int
240
add_length(regex_t* reg, OnigDistance len)
241
2.91M
{
242
2.91M
  LengthType l = (LengthType )len;
243
244
2.91M
  BBUF_ADD(reg, &l, SIZE_LENGTH);
245
2.91M
  return 0;
246
2.91M
}
247
248
static int
249
add_mem_num(regex_t* reg, int num)
250
878k
{
251
878k
  MemNumType n = (MemNumType )num;
252
253
878k
  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254
878k
  return 0;
255
878k
}
256
257
#if 0
258
static int
259
add_pointer(regex_t* reg, void* addr)
260
{
261
  PointerType ptr = (PointerType )addr;
262
263
  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264
  return 0;
265
}
266
#endif
267
268
static int
269
add_option(regex_t* reg, OnigOptionType option)
270
1.97k
{
271
1.97k
  BBUF_ADD(reg, &option, SIZE_OPTION);
272
1.97k
  return 0;
273
1.97k
}
274
275
static int
276
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277
7.48M
{
278
7.48M
  int r;
279
280
7.48M
  r = add_opcode(reg, opcode);
281
7.48M
  if (r) return r;
282
7.48M
  r = add_rel_addr(reg, addr);
283
7.48M
  return r;
284
7.48M
}
285
286
static int
287
add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288
4.61M
{
289
4.61M
  BBUF_ADD(reg, bytes, len);
290
4.61M
  return 0;
291
4.61M
}
292
293
static int
294
add_bitset(regex_t* reg, BitSetRef bs)
295
1.87M
{
296
1.87M
  BBUF_ADD(reg, bs, SIZE_BITSET);
297
1.87M
  return 0;
298
1.87M
}
299
300
static int
301
add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302
0
{
303
0
  int r;
304
0
305
0
  r = add_opcode(reg, opcode);
306
0
  if (r) return r;
307
0
  r = add_option(reg, option);
308
0
  return r;
309
0
}
310
311
static int compile_length_tree(Node* node, regex_t* reg);
312
static int compile_tree(Node* node, regex_t* reg);
313
314
315
#define IS_NEED_STR_LEN_OP_EXACT(op) \
316
6.89M
   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
317
6.89M
    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
318
319
static int
320
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321
6.89M
{
322
6.89M
  int op;
323
6.89M
  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325
6.89M
  if (ignore_case) {
326
339k
    switch (str_len) {
327
126k
    case 1:  op = OP_EXACT1_IC; break;
328
212k
    default: op = OP_EXACTN_IC; break;
329
339k
    }
330
339k
  }
331
6.55M
  else {
332
6.55M
    switch (mb_len) {
333
6.32M
    case 1:
334
6.32M
      switch (str_len) {
335
2.12M
      case 1:  op = OP_EXACT1; break;
336
1.10M
      case 2:  op = OP_EXACT2; break;
337
1.11M
      case 3:  op = OP_EXACT3; break;
338
206k
      case 4:  op = OP_EXACT4; break;
339
252k
      case 5:  op = OP_EXACT5; break;
340
1.51M
      default: op = OP_EXACTN; break;
341
6.32M
      }
342
6.32M
      break;
343
344
6.32M
    case 2:
345
145k
      switch (str_len) {
346
90.2k
      case 1:  op = OP_EXACTMB2N1; break;
347
14.6k
      case 2:  op = OP_EXACTMB2N2; break;
348
38.0k
      case 3:  op = OP_EXACTMB2N3; break;
349
2.38k
      default: op = OP_EXACTMB2N;  break;
350
145k
      }
351
145k
      break;
352
353
145k
    case 3:
354
61.2k
      op = OP_EXACTMB3N;
355
61.2k
      break;
356
357
21.9k
    default:
358
21.9k
      op = OP_EXACTMBN;
359
21.9k
      break;
360
6.55M
    }
361
6.55M
  }
362
6.89M
  return op;
363
6.89M
}
364
365
static int
366
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367
1.54M
{
368
1.54M
  int r;
369
1.54M
  int saved_num_null_check = reg->num_null_check;
370
371
1.54M
  if (empty_info != 0) {
372
33.4k
    r = add_opcode(reg, OP_NULL_CHECK_START);
373
33.4k
    if (r) return r;
374
33.4k
    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375
33.4k
    if (r) return r;
376
33.4k
    reg->num_null_check++;
377
33.4k
  }
378
379
1.54M
  r = compile_tree(node, reg);
380
1.54M
  if (r) return r;
381
382
1.54M
  if (empty_info != 0) {
383
33.4k
    if (empty_info == NQ_TARGET_IS_EMPTY)
384
29.8k
      r = add_opcode(reg, OP_NULL_CHECK_END);
385
3.63k
    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386
1.93k
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387
1.69k
    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388
1.69k
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390
33.4k
    if (r) return r;
391
33.4k
    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392
33.4k
  }
393
1.54M
  return r;
394
1.54M
}
395
396
#ifdef USE_SUBEXP_CALL
397
static int
398
compile_call(CallNode* node, regex_t* reg)
399
14.0k
{
400
14.0k
  int r;
401
402
14.0k
  r = add_opcode(reg, OP_CALL);
403
14.0k
  if (r) return r;
404
14.0k
  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405
14.0k
                          node->target);
406
14.0k
  if (r) return r;
407
14.0k
  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408
14.0k
  return r;
409
14.0k
}
410
#endif
411
412
static int
413
compile_tree_n_times(Node* node, int n, regex_t* reg)
414
2.64M
{
415
2.64M
  int i, r;
416
417
4.43M
  for (i = 0; i < n; i++) {
418
1.79M
    r = compile_tree(node, reg);
419
1.79M
    if (r) return r;
420
1.79M
  }
421
2.64M
  return 0;
422
2.64M
}
423
424
static int
425
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426
                          regex_t* reg ARG_UNUSED, int ignore_case)
427
4.50M
{
428
4.50M
  int len;
429
4.50M
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431
4.50M
  len = SIZE_OPCODE;
432
433
4.50M
  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
434
4.50M
  if (IS_NEED_STR_LEN_OP_EXACT(op))
435
1.04M
    len += SIZE_LENGTH;
436
437
4.50M
  len += (int )byte_len;
438
4.50M
  return len;
439
4.50M
}
440
441
static int
442
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443
                   regex_t* reg, int ignore_case)
444
2.39M
{
445
2.39M
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446
2.39M
  add_opcode(reg, op);
447
448
2.39M
  if (op == OP_EXACTMBN)
449
7.23k
    add_length(reg, mb_len);
450
451
2.39M
  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452
772k
    if (op == OP_EXACTN_IC)
453
46.4k
      add_length(reg, byte_len);
454
725k
    else
455
725k
      add_length(reg, byte_len / mb_len);
456
772k
  }
457
458
2.39M
  add_bytes(reg, s, byte_len);
459
2.39M
  return 0;
460
2.39M
}
461
462
463
static int
464
compile_length_string_node(Node* node, regex_t* reg)
465
3.56M
{
466
3.56M
  int rlen, r, len, prev_len, blen, ambig;
467
3.56M
  OnigEncoding enc = reg->enc;
468
3.56M
  UChar *p, *prev;
469
3.56M
  StrNode* sn;
470
471
3.56M
  sn = NSTR(node);
472
3.56M
  if (sn->end <= sn->s)
473
375k
    return 0;
474
475
3.18M
  ambig = NSTRING_IS_AMBIG(node);
476
477
3.18M
  p = prev = sn->s;
478
3.18M
  prev_len = enclen(enc, p, sn->end);
479
3.18M
  p += prev_len;
480
3.18M
  blen = prev_len;
481
3.18M
  rlen = 0;
482
483
16.5M
  for (; p < sn->end; ) {
484
13.3M
    len = enclen(enc, p, sn->end);
485
13.3M
    if (len == prev_len || ambig) {
486
13.2M
      blen += len;
487
13.2M
    }
488
65.5k
    else {
489
65.5k
      r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490
65.5k
      rlen += r;
491
65.5k
      prev = p;
492
65.5k
      blen = len;
493
65.5k
      prev_len = len;
494
65.5k
    }
495
13.3M
    p += len;
496
13.3M
  }
497
3.18M
  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498
3.18M
  rlen += r;
499
3.18M
  return rlen;
500
3.56M
}
501
502
static int
503
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504
1.24M
{
505
1.24M
  if (sn->end <= sn->s)
506
0
    return 0;
507
508
1.24M
  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509
1.24M
}
510
511
static int
512
compile_string_node(Node* node, regex_t* reg)
513
2.20M
{
514
2.20M
  int r, len, prev_len, blen, ambig;
515
2.20M
  OnigEncoding enc = reg->enc;
516
2.20M
  UChar *p, *prev, *end;
517
2.20M
  StrNode* sn;
518
519
2.20M
  sn = NSTR(node);
520
2.20M
  if (sn->end <= sn->s)
521
55.9k
    return 0;
522
523
2.14M
  end = sn->end;
524
2.14M
  ambig = NSTRING_IS_AMBIG(node);
525
526
2.14M
  p = prev = sn->s;
527
2.14M
  prev_len = enclen(enc, p, end);
528
2.14M
  p += prev_len;
529
2.14M
  blen = prev_len;
530
531
15.7M
  for (; p < end; ) {
532
13.6M
    len = enclen(enc, p, end);
533
13.6M
    if (len == prev_len || ambig) {
534
13.5M
      blen += len;
535
13.5M
    }
536
27.4k
    else {
537
27.4k
      r = add_compile_string(prev, prev_len, blen, reg, ambig);
538
27.4k
      if (p + len > end) {
539
0
        return 0;
540
0
      }
541
27.4k
      if (r) return r;
542
543
27.4k
      prev  = p;
544
27.4k
      blen  = len;
545
27.4k
      prev_len = len;
546
27.4k
    }
547
548
13.6M
    p += len;
549
13.6M
  }
550
2.14M
  return add_compile_string(prev, prev_len, blen, reg, ambig);
551
2.14M
}
552
553
static int
554
compile_string_raw_node(StrNode* sn, regex_t* reg)
555
212k
{
556
212k
  if (sn->end <= sn->s)
557
0
    return 0;
558
559
212k
  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560
212k
}
561
562
static int
563
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
564
2.12M
{
565
2.12M
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566
2.12M
  add_length(reg, mbuf->used);
567
2.12M
  return add_bytes(reg, mbuf->p, mbuf->used);
568
#else
569
  int r, pad_size;
570
  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
571
572
  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573
  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575
576
  r = add_bytes(reg, mbuf->p, mbuf->used);
577
578
  /* padding for return value from compile_length_cclass_node() to be fix. */
579
  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581
  return r;
582
#endif
583
2.12M
}
584
585
static int
586
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
587
14.4M
{
588
14.4M
  int len;
589
590
14.4M
  if (IS_NULL(cc->mbuf)) {
591
1.36M
    len = SIZE_OPCODE + SIZE_BITSET;
592
1.36M
  }
593
13.0M
  else {
594
13.0M
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595
11.9M
      len = SIZE_OPCODE;
596
11.9M
    }
597
1.15M
    else {
598
1.15M
      len = SIZE_OPCODE + SIZE_BITSET;
599
1.15M
    }
600
13.0M
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601
13.0M
    len += SIZE_LENGTH + cc->mbuf->used;
602
#else
603
    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604
#endif
605
13.0M
  }
606
607
14.4M
  return len;
608
14.4M
}
609
610
static int
611
compile_cclass_node(CClassNode* cc, regex_t* reg)
612
3.77M
{
613
3.77M
  int r;
614
615
3.77M
  if (IS_NULL(cc->mbuf)) {
616
1.65M
    if (IS_NCCLASS_NOT(cc))
617
468k
      add_opcode(reg, OP_CCLASS_NOT);
618
1.18M
    else
619
1.18M
      add_opcode(reg, OP_CCLASS);
620
621
1.65M
    r = add_bitset(reg, cc->bs);
622
1.65M
  }
623
2.12M
  else {
624
2.12M
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
625
1.90M
      if (IS_NCCLASS_NOT(cc))
626
574
  add_opcode(reg, OP_CCLASS_MB_NOT);
627
1.89M
      else
628
1.89M
  add_opcode(reg, OP_CCLASS_MB);
629
630
1.90M
      r = add_multi_byte_cclass(cc->mbuf, reg);
631
1.90M
    }
632
220k
    else {
633
220k
      if (IS_NCCLASS_NOT(cc))
634
3.85k
  add_opcode(reg, OP_CCLASS_MIX_NOT);
635
216k
      else
636
216k
  add_opcode(reg, OP_CCLASS_MIX);
637
638
220k
      r = add_bitset(reg, cc->bs);
639
220k
      if (r) return r;
640
220k
      r = add_multi_byte_cclass(cc->mbuf, reg);
641
220k
    }
642
2.12M
  }
643
644
3.77M
  return r;
645
3.77M
}
646
647
static int
648
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
649
25.1k
{
650
25.1k
#define REPEAT_RANGE_ALLOC  4
651
652
25.1k
  OnigRepeatRange* p;
653
654
25.1k
  if (reg->repeat_range_alloc == 0) {
655
2.09k
    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
656
2.09k
    CHECK_NULL_RETURN_MEMERR(p);
657
2.09k
    reg->repeat_range = p;
658
2.09k
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
659
2.09k
  }
660
23.0k
  else if (reg->repeat_range_alloc <= id) {
661
5.10k
    int n;
662
5.10k
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
663
5.10k
    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
664
5.10k
                                    sizeof(OnigRepeatRange) * n);
665
5.10k
    CHECK_NULL_RETURN_MEMERR(p);
666
5.10k
    reg->repeat_range = p;
667
5.10k
    reg->repeat_range_alloc = n;
668
5.10k
  }
669
17.9k
  else {
670
17.9k
    p = reg->repeat_range;
671
17.9k
  }
672
673
25.1k
  p[id].lower = lower;
674
25.1k
  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
675
25.1k
  return 0;
676
25.1k
}
677
678
static int
679
compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
680
                          regex_t* reg)
681
25.1k
{
682
25.1k
  int r;
683
25.1k
  int num_repeat = reg->num_repeat;
684
685
25.1k
  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
686
25.1k
  if (r) return r;
687
25.1k
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
688
25.1k
  reg->num_repeat++;
689
25.1k
  if (r) return r;
690
25.1k
  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
691
25.1k
  if (r) return r;
692
693
25.1k
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
694
25.1k
  if (r) return r;
695
696
25.1k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
697
25.1k
  if (r) return r;
698
699
25.1k
  if (
700
25.1k
#ifdef USE_SUBEXP_CALL
701
25.1k
      reg->num_call > 0 ||
702
25.1k
#endif
703
25.1k
      IS_QUANTIFIER_IN_REPEAT(qn)) {
704
19.7k
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
705
19.7k
  }
706
5.44k
  else {
707
5.44k
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
708
5.44k
  }
709
25.1k
  if (r) return r;
710
25.1k
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
711
25.1k
  return r;
712
25.1k
}
713
714
static int
715
is_anychar_star_quantifier(QtfrNode* qn)
716
1.97M
{
717
1.97M
  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
718
1.97M
      NTYPE(qn->target) == NT_CANY)
719
255k
    return 1;
720
1.71M
  else
721
1.71M
    return 0;
722
1.97M
}
723
724
10.0M
#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
725
#define CKN_ON   (ckn > 0)
726
727
#ifdef USE_COMBINATION_EXPLOSION_CHECK
728
729
static int
730
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
731
{
732
  int len, mod_tlen, cklen;
733
  int ckn;
734
  int infinite = IS_REPEAT_INFINITE(qn->upper);
735
  int empty_info = qn->target_empty_info;
736
  int tlen = compile_length_tree(qn->target, reg);
737
738
  if (tlen < 0) return tlen;
739
740
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
741
742
  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
743
744
  /* anychar repeat */
745
  if (NTYPE(qn->target) == NT_CANY) {
746
    if (qn->greedy && infinite) {
747
      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
748
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
749
      else
750
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
751
    }
752
  }
753
754
  if (empty_info != 0)
755
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
756
  else
757
    mod_tlen = tlen;
758
759
  if (infinite && qn->lower <= 1) {
760
    if (qn->greedy) {
761
      if (qn->lower == 1)
762
  len = SIZE_OP_JUMP;
763
      else
764
  len = 0;
765
766
      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
767
    }
768
    else {
769
      if (qn->lower == 0)
770
  len = SIZE_OP_JUMP;
771
      else
772
  len = 0;
773
774
      len += mod_tlen + SIZE_OP_PUSH + cklen;
775
    }
776
  }
777
  else if (qn->upper == 0) {
778
    if (qn->is_referred != 0) /* /(?<n>..){0}/ */
779
      len = SIZE_OP_JUMP + tlen;
780
    else
781
      len = 0;
782
  }
783
  else if (qn->upper == 1 && qn->greedy) {
784
    if (qn->lower == 0) {
785
      if (CKN_ON) {
786
  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
787
      }
788
      else {
789
  len = SIZE_OP_PUSH + tlen;
790
      }
791
    }
792
    else {
793
      len = tlen;
794
    }
795
  }
796
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
797
    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
798
  }
799
  else {
800
    len = SIZE_OP_REPEAT_INC
801
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
802
    if (CKN_ON)
803
      len += SIZE_OP_STATE_CHECK;
804
  }
805
806
  return len;
807
}
808
809
static int
810
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
811
{
812
  int r, mod_tlen;
813
  int ckn;
814
  int infinite = IS_REPEAT_INFINITE(qn->upper);
815
  int empty_info = qn->target_empty_info;
816
  int tlen = compile_length_tree(qn->target, reg);
817
818
  if (tlen < 0) return tlen;
819
820
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
821
822
  if (is_anychar_star_quantifier(qn)) {
823
    r = compile_tree_n_times(qn->target, qn->lower, reg);
824
    if (r) return r;
825
    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
826
      if (IS_MULTILINE(reg->options))
827
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
828
      else
829
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
830
      if (r) return r;
831
      if (CKN_ON) {
832
  r = add_state_check_num(reg, ckn);
833
  if (r) return r;
834
      }
835
836
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
837
    }
838
    else {
839
      if (IS_MULTILINE(reg->options)) {
840
  r = add_opcode(reg, (CKN_ON ?
841
             OP_STATE_CHECK_ANYCHAR_ML_STAR
842
           : OP_ANYCHAR_ML_STAR));
843
      }
844
      else {
845
  r = add_opcode(reg, (CKN_ON ?
846
             OP_STATE_CHECK_ANYCHAR_STAR
847
           : OP_ANYCHAR_STAR));
848
      }
849
      if (r) return r;
850
      if (CKN_ON)
851
  r = add_state_check_num(reg, ckn);
852
853
      return r;
854
    }
855
  }
856
857
  if (empty_info != 0)
858
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
859
  else
860
    mod_tlen = tlen;
861
862
  if (infinite && qn->lower <= 1) {
863
    if (qn->greedy) {
864
      if (qn->lower == 1) {
865
  r = add_opcode_rel_addr(reg, OP_JUMP,
866
      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
867
  if (r) return r;
868
      }
869
870
      if (CKN_ON) {
871
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
872
  if (r) return r;
873
  r = add_state_check_num(reg, ckn);
874
  if (r) return r;
875
  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
876
      }
877
      else {
878
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
879
      }
880
      if (r) return r;
881
      r = compile_tree_empty_check(qn->target, reg, empty_info);
882
      if (r) return r;
883
      r = add_opcode_rel_addr(reg, OP_JUMP,
884
        -(mod_tlen + (int )SIZE_OP_JUMP
885
    + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
886
    }
887
    else {
888
      if (qn->lower == 0) {
889
  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
890
  if (r) return r;
891
      }
892
      r = compile_tree_empty_check(qn->target, reg, empty_info);
893
      if (r) return r;
894
      if (CKN_ON) {
895
  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
896
  if (r) return r;
897
  r = add_state_check_num(reg, ckn);
898
  if (r) return r;
899
  r = add_rel_addr(reg,
900
     -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
901
      }
902
      else
903
  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
904
    }
905
  }
906
  else if (qn->upper == 0) {
907
    if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
908
      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
909
      if (r) return r;
910
      r = compile_tree(qn->target, reg);
911
    }
912
    else
913
      r = 0;
914
  }
915
  else if (qn->upper == 1 && qn->greedy) {
916
    if (qn->lower == 0) {
917
      if (CKN_ON) {
918
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
919
  if (r) return r;
920
  r = add_state_check_num(reg, ckn);
921
  if (r) return r;
922
  r = add_rel_addr(reg, tlen);
923
      }
924
      else {
925
  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
926
      }
927
      if (r) return r;
928
    }
929
930
    r = compile_tree(qn->target, reg);
931
  }
932
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
933
    if (CKN_ON) {
934
      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
935
      if (r) return r;
936
      r = add_state_check_num(reg, ckn);
937
      if (r) return r;
938
      r = add_rel_addr(reg, SIZE_OP_JUMP);
939
    }
940
    else {
941
      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
942
    }
943
944
    if (r) return r;
945
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
946
    if (r) return r;
947
    r = compile_tree(qn->target, reg);
948
  }
949
  else {
950
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
951
    if (CKN_ON) {
952
      if (r) return r;
953
      r = add_opcode(reg, OP_STATE_CHECK);
954
      if (r) return r;
955
      r = add_state_check_num(reg, ckn);
956
    }
957
  }
958
  return r;
959
}
960
961
#else /* USE_COMBINATION_EXPLOSION_CHECK */
962
963
static int
964
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
965
14.1M
{
966
14.1M
  int len, mod_tlen;
967
14.1M
  int infinite = IS_REPEAT_INFINITE(qn->upper);
968
14.1M
  int empty_info = qn->target_empty_info;
969
14.1M
  int tlen = compile_length_tree(qn->target, reg);
970
971
14.1M
  if (tlen < 0) return tlen;
972
973
  /* anychar repeat */
974
14.1M
  if (NTYPE(qn->target) == NT_CANY) {
975
19.9k
    if (qn->greedy && infinite) {
976
9.96k
      if (IS_NOT_NULL(qn->next_head_exact))
977
4.22k
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
978
5.74k
      else
979
5.74k
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
980
9.96k
    }
981
19.9k
  }
982
983
14.1M
  if (empty_info != 0)
984
3.91M
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
985
10.2M
  else
986
10.2M
    mod_tlen = tlen;
987
988
14.1M
  if (infinite &&
989
14.1M
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
990
13.3M
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
991
5.72M
      len = SIZE_OP_JUMP;
992
5.72M
    }
993
7.62M
    else {
994
7.62M
      len = tlen * qn->lower;
995
7.62M
    }
996
997
13.3M
    if (qn->greedy) {
998
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
999
      if (IS_NOT_NULL(qn->head_exact))
1000
  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1001
      else
1002
#endif
1003
13.3M
      if (IS_NOT_NULL(qn->next_head_exact))
1004
1.14M
  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1005
12.1M
      else
1006
12.1M
  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1007
13.3M
    }
1008
36.3k
    else
1009
36.3k
      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1010
13.3M
  }
1011
832k
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1012
2.37k
    len = SIZE_OP_JUMP + tlen;
1013
2.37k
  }
1014
830k
  else if (!infinite && qn->greedy &&
1015
830k
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1016
755k
                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1017
755k
    len = tlen * qn->lower;
1018
755k
    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1019
755k
  }
1020
75.3k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1021
4.90k
    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1022
4.90k
  }
1023
70.4k
  else {
1024
70.4k
    len = SIZE_OP_REPEAT_INC
1025
70.4k
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1026
70.4k
  }
1027
1028
14.1M
  return len;
1029
14.1M
}
1030
1031
static int
1032
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1033
1.97M
{
1034
1.97M
  int i, r, mod_tlen;
1035
1.97M
  int infinite = IS_REPEAT_INFINITE(qn->upper);
1036
1.97M
  int empty_info = qn->target_empty_info;
1037
1.97M
  int tlen = compile_length_tree(qn->target, reg);
1038
1039
1.97M
  if (tlen < 0) return tlen;
1040
1041
1.97M
  if (is_anychar_star_quantifier(qn)) {
1042
255k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1043
255k
    if (r) return r;
1044
255k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1045
74.4k
      if (IS_MULTILINE(reg->options))
1046
1.05k
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1047
73.4k
      else
1048
73.4k
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1049
74.4k
      if (r) return r;
1050
74.4k
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1051
74.4k
    }
1052
181k
    else {
1053
181k
      if (IS_MULTILINE(reg->options))
1054
327
  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1055
180k
      else
1056
180k
  return add_opcode(reg, OP_ANYCHAR_STAR);
1057
181k
    }
1058
255k
  }
1059
1060
1.71M
  if (empty_info != 0)
1061
36.3k
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1062
1.67M
  else
1063
1.67M
    mod_tlen = tlen;
1064
1065
1.71M
  if (infinite &&
1066
1.71M
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1067
1.52M
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1068
34.1k
      if (qn->greedy) {
1069
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1070
  if (IS_NOT_NULL(qn->head_exact))
1071
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1072
  else
1073
#endif
1074
33.2k
  if (IS_NOT_NULL(qn->next_head_exact))
1075
4.88k
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1076
28.3k
  else
1077
28.3k
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1078
33.2k
      }
1079
937
      else {
1080
937
  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1081
937
      }
1082
34.1k
      if (r) return r;
1083
34.1k
    }
1084
1.48M
    else {
1085
1.48M
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1086
1.48M
      if (r) return r;
1087
1.48M
    }
1088
1089
1.52M
    if (qn->greedy) {
1090
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1091
      if (IS_NOT_NULL(qn->head_exact)) {
1092
  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093
           mod_tlen + SIZE_OP_JUMP);
1094
  if (r) return r;
1095
  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097
  if (r) return r;
1098
  r = add_opcode_rel_addr(reg, OP_JUMP,
1099
  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100
      }
1101
      else
1102
#endif
1103
1.45M
      if (IS_NOT_NULL(qn->next_head_exact)) {
1104
29.5k
  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1105
29.5k
        mod_tlen + SIZE_OP_JUMP);
1106
29.5k
  if (r) return r;
1107
29.5k
  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1108
29.5k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1109
29.5k
  if (r) return r;
1110
29.5k
  r = add_opcode_rel_addr(reg, OP_JUMP,
1111
29.5k
          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1112
29.5k
      }
1113
1.42M
      else {
1114
1.42M
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1115
1.42M
  if (r) return r;
1116
1.42M
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1117
1.42M
  if (r) return r;
1118
1.42M
  r = add_opcode_rel_addr(reg, OP_JUMP,
1119
1.42M
         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1120
1.42M
      }
1121
1.45M
    }
1122
66.8k
    else {
1123
66.8k
      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1124
66.8k
      if (r) return r;
1125
66.8k
      r = compile_tree_empty_check(qn->target, reg, empty_info);
1126
66.8k
      if (r) return r;
1127
66.8k
      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1128
66.8k
    }
1129
1.52M
  }
1130
193k
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1131
109
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1132
109
    if (r) return r;
1133
109
    r = compile_tree(qn->target, reg);
1134
109
  }
1135
193k
  else if (!infinite && qn->greedy &&
1136
193k
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1137
166k
                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1138
166k
    int n = qn->upper - qn->lower;
1139
1140
166k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1141
166k
    if (r) return r;
1142
1143
225k
    for (i = 0; i < n; i++) {
1144
59.3k
      r = add_opcode_rel_addr(reg, OP_PUSH,
1145
59.3k
         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1146
59.3k
      if (r) return r;
1147
59.3k
      r = compile_tree(qn->target, reg);
1148
59.3k
      if (r) return r;
1149
59.3k
    }
1150
166k
  }
1151
27.2k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1152
2.10k
    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1153
2.10k
    if (r) return r;
1154
2.10k
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1155
2.10k
    if (r) return r;
1156
2.10k
    r = compile_tree(qn->target, reg);
1157
2.10k
  }
1158
25.1k
  else {
1159
25.1k
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1160
25.1k
  }
1161
1.71M
  return r;
1162
1.71M
}
1163
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164
1165
static int
1166
compile_length_option_node(EncloseNode* node, regex_t* reg)
1167
940k
{
1168
940k
  int tlen;
1169
940k
  OnigOptionType prev = reg->options;
1170
1171
940k
  reg->options = node->option;
1172
940k
  tlen = compile_length_tree(node->target, reg);
1173
940k
  reg->options = prev;
1174
1175
940k
  if (tlen < 0) return tlen;
1176
1177
939k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178
0
    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1179
0
           + tlen + SIZE_OP_SET_OPTION;
1180
0
  }
1181
939k
  else
1182
939k
    return tlen;
1183
939k
}
1184
1185
static int
1186
compile_option_node(EncloseNode* node, regex_t* reg)
1187
250k
{
1188
250k
  int r;
1189
250k
  OnigOptionType prev = reg->options;
1190
1191
250k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1192
0
    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1193
0
    if (r) return r;
1194
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1195
0
    if (r) return r;
1196
0
    r = add_opcode(reg, OP_FAIL);
1197
0
    if (r) return r;
1198
0
  }
1199
1200
250k
  reg->options = node->option;
1201
250k
  r = compile_tree(node->target, reg);
1202
250k
  reg->options = prev;
1203
1204
250k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1205
0
    if (r) return r;
1206
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1207
0
  }
1208
250k
  return r;
1209
250k
}
1210
1211
static int
1212
compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1213
8.53M
{
1214
8.53M
  int len;
1215
8.53M
  int tlen;
1216
1217
8.53M
  if (node->type == ENCLOSE_OPTION)
1218
940k
    return compile_length_option_node(node, reg);
1219
1220
7.59M
  if (node->target) {
1221
7.59M
    tlen = compile_length_tree(node->target, reg);
1222
7.59M
    if (tlen < 0) return tlen;
1223
7.59M
  }
1224
0
  else
1225
0
    tlen = 0;
1226
1227
7.58M
  switch (node->type) {
1228
256k
  case ENCLOSE_MEMORY:
1229
256k
#ifdef USE_SUBEXP_CALL
1230
256k
    if (IS_ENCLOSE_CALLED(node)) {
1231
25.8k
      len = SIZE_OP_MEMORY_START_PUSH + tlen
1232
25.8k
    + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1233
25.8k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1234
2.21k
  len += (IS_ENCLOSE_RECURSION(node)
1235
2.21k
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1236
23.6k
      else
1237
23.6k
  len += (IS_ENCLOSE_RECURSION(node)
1238
23.6k
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1239
25.8k
    }
1240
230k
    else if (IS_ENCLOSE_RECURSION(node)) {
1241
15.2k
      len = SIZE_OP_MEMORY_START_PUSH;
1242
15.2k
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1243
15.2k
         ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1244
15.2k
    }
1245
215k
    else
1246
215k
#endif
1247
215k
    {
1248
215k
      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1249
215k
  len = SIZE_OP_MEMORY_START_PUSH;
1250
27
      else
1251
27
  len = SIZE_OP_MEMORY_START;
1252
1253
215k
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1254
215k
         ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1255
215k
    }
1256
256k
    break;
1257
1258
7.26M
  case ENCLOSE_STOP_BACKTRACK:
1259
7.26M
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1260
1.15M
      QtfrNode* qn = NQTFR(node->target);
1261
1.15M
      tlen = compile_length_tree(qn->target, reg);
1262
1.15M
      if (tlen < 0) return tlen;
1263
1264
1.15M
      len = tlen * qn->lower
1265
1.15M
    + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1266
1.15M
    }
1267
6.10M
    else {
1268
6.10M
      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1269
6.10M
    }
1270
7.26M
    break;
1271
1272
7.26M
  case ENCLOSE_CONDITION:
1273
48.8k
    len = SIZE_OP_CONDITION;
1274
48.8k
    if (NTYPE(node->target) == NT_ALT) {
1275
48.8k
      Node* x = node->target;
1276
1277
48.8k
      tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1278
48.8k
      if (tlen < 0) return tlen;
1279
48.8k
      len += tlen + SIZE_OP_JUMP;
1280
48.8k
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1281
48.8k
      x = NCDR(x);
1282
48.8k
      tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1283
48.8k
      if (tlen < 0) return tlen;
1284
48.8k
      len += tlen;
1285
48.8k
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1286
48.8k
    }
1287
0
    else {
1288
0
      return ONIGERR_PARSER_BUG;
1289
0
    }
1290
48.5k
    break;
1291
1292
48.5k
  case ENCLOSE_ABSENT:
1293
22.8k
    len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1294
22.8k
    break;
1295
1296
0
  default:
1297
0
    return ONIGERR_TYPE_BUG;
1298
0
    break;
1299
7.58M
  }
1300
1301
7.58M
  return len;
1302
7.58M
}
1303
1304
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1305
1306
static int
1307
compile_enclose_node(EncloseNode* node, regex_t* reg)
1308
1.52M
{
1309
1.52M
  int r, len;
1310
1311
1.52M
  if (node->type == ENCLOSE_OPTION)
1312
250k
    return compile_option_node(node, reg);
1313
1314
1.27M
  switch (node->type) {
1315
369k
  case ENCLOSE_MEMORY:
1316
369k
#ifdef USE_SUBEXP_CALL
1317
369k
    if (IS_ENCLOSE_CALLED(node)) {
1318
15.6k
      r = add_opcode(reg, OP_CALL);
1319
15.6k
      if (r) return r;
1320
15.6k
      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1321
15.6k
      node->state |= NST_ADDR_FIXED;
1322
15.6k
      r = add_abs_addr(reg, (int )node->call_addr);
1323
15.6k
      if (r) return r;
1324
15.6k
      len = compile_length_tree(node->target, reg);
1325
15.6k
      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1326
15.6k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1327
1.41k
  len += (IS_ENCLOSE_RECURSION(node)
1328
1.41k
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1329
14.2k
      else
1330
14.2k
  len += (IS_ENCLOSE_RECURSION(node)
1331
14.2k
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1332
1333
15.6k
      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1334
15.6k
      if (r) return r;
1335
15.6k
    }
1336
369k
#endif
1337
369k
    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1338
41.8k
      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1339
327k
    else
1340
327k
      r = add_opcode(reg, OP_MEMORY_START);
1341
369k
    if (r) return r;
1342
369k
    r = add_mem_num(reg, node->regnum);
1343
369k
    if (r) return r;
1344
369k
    r = compile_tree(node->target, reg);
1345
369k
    if (r) return r;
1346
369k
#ifdef USE_SUBEXP_CALL
1347
369k
    if (IS_ENCLOSE_CALLED(node)) {
1348
15.6k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1349
1.41k
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350
1.41k
           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1351
14.2k
      else
1352
14.2k
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1353
14.2k
           ? OP_MEMORY_END_REC : OP_MEMORY_END));
1354
1355
15.6k
      if (r) return r;
1356
15.6k
      r = add_mem_num(reg, node->regnum);
1357
15.6k
      if (r) return r;
1358
15.6k
      r = add_opcode(reg, OP_RETURN);
1359
15.6k
    }
1360
353k
    else if (IS_ENCLOSE_RECURSION(node)) {
1361
3.50k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1362
1.50k
  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1363
2.00k
      else
1364
2.00k
  r = add_opcode(reg, OP_MEMORY_END_REC);
1365
3.50k
      if (r) return r;
1366
3.50k
      r = add_mem_num(reg, node->regnum);
1367
3.50k
    }
1368
350k
    else
1369
350k
#endif
1370
350k
    {
1371
350k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1372
4.68k
  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1373
345k
      else
1374
345k
  r = add_opcode(reg, OP_MEMORY_END);
1375
350k
      if (r) return r;
1376
350k
      r = add_mem_num(reg, node->regnum);
1377
350k
    }
1378
369k
    break;
1379
1380
903k
  case ENCLOSE_STOP_BACKTRACK:
1381
903k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1382
735k
      QtfrNode* qn = NQTFR(node->target);
1383
735k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1384
735k
      if (r) return r;
1385
1386
735k
      len = compile_length_tree(qn->target, reg);
1387
735k
      if (len < 0) return len;
1388
1389
735k
      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1390
735k
      if (r) return r;
1391
735k
      r = compile_tree(qn->target, reg);
1392
735k
      if (r) return r;
1393
735k
      r = add_opcode(reg, OP_POP);
1394
735k
      if (r) return r;
1395
735k
      r = add_opcode_rel_addr(reg, OP_JUMP,
1396
735k
   -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1397
735k
    }
1398
168k
    else {
1399
168k
      r = add_opcode(reg, OP_PUSH_STOP_BT);
1400
168k
      if (r) return r;
1401
168k
      r = compile_tree(node->target, reg);
1402
168k
      if (r) return r;
1403
168k
      r = add_opcode(reg, OP_POP_STOP_BT);
1404
168k
    }
1405
903k
    break;
1406
1407
903k
  case ENCLOSE_CONDITION:
1408
1.33k
    r = add_opcode(reg, OP_CONDITION);
1409
1.33k
    if (r) return r;
1410
1.33k
    r = add_mem_num(reg, node->regnum);
1411
1.33k
    if (r) return r;
1412
1413
1.33k
    if (NTYPE(node->target) == NT_ALT) {
1414
1.33k
      Node* x = node->target;
1415
1.33k
      int len2;
1416
1417
1.33k
      len = compile_length_tree(NCAR(x), reg);  /* yes-node */
1418
1.33k
      if (len < 0) return len;
1419
1.31k
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1420
1.31k
      x = NCDR(x);
1421
1.31k
      len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1422
1.31k
      if (len2 < 0) return len2;
1423
1.31k
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1424
1425
1.29k
      x = node->target;
1426
1.29k
      r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1427
1.29k
      if (r) return r;
1428
1.29k
      r = compile_tree(NCAR(x), reg);   /* yes-node */
1429
1.29k
      if (r) return r;
1430
1.29k
      r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1431
1.29k
      if (r) return r;
1432
1.29k
      x = NCDR(x);
1433
1.29k
      r = compile_tree(NCAR(x), reg);   /* no-node */
1434
1.29k
    }
1435
0
    else {
1436
0
      return ONIGERR_PARSER_BUG;
1437
0
    }
1438
1.29k
    break;
1439
1440
1.29k
  case ENCLOSE_ABSENT:
1441
1.04k
    len = compile_length_tree(node->target, reg);
1442
1.04k
    if (len < 0) return len;
1443
1444
1.03k
    r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1445
1.03k
    if (r) return r;
1446
1.03k
    r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1447
1.03k
    if (r) return r;
1448
1.03k
    r = compile_tree(node->target, reg);
1449
1.03k
    if (r) return r;
1450
1.03k
    r = add_opcode(reg, OP_ABSENT_END);
1451
1.03k
    break;
1452
1453
0
  default:
1454
0
    return ONIGERR_TYPE_BUG;
1455
0
    break;
1456
1.27M
  }
1457
1458
1.27M
  return r;
1459
1.27M
}
1460
1461
static int
1462
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1463
276k
{
1464
276k
  int len;
1465
276k
  int tlen = 0;
1466
1467
276k
  if (node->target) {
1468
48.6k
    tlen = compile_length_tree(node->target, reg);
1469
48.6k
    if (tlen < 0) return tlen;
1470
48.6k
  }
1471
1472
275k
  switch (node->type) {
1473
9.49k
  case ANCHOR_PREC_READ:
1474
9.49k
    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1475
9.49k
    break;
1476
22.5k
  case ANCHOR_PREC_READ_NOT:
1477
22.5k
    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1478
22.5k
    break;
1479
8.56k
  case ANCHOR_LOOK_BEHIND:
1480
8.56k
    len = SIZE_OP_LOOK_BEHIND + tlen;
1481
8.56k
    break;
1482
7.54k
  case ANCHOR_LOOK_BEHIND_NOT:
1483
7.54k
    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1484
7.54k
    break;
1485
1486
227k
  default:
1487
227k
    len = SIZE_OPCODE;
1488
227k
    break;
1489
275k
  }
1490
1491
275k
  return len;
1492
275k
}
1493
1494
static int
1495
compile_anchor_node(AnchorNode* node, regex_t* reg)
1496
1.21M
{
1497
1.21M
  int r, len;
1498
1499
1.21M
  switch (node->type) {
1500
770
  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1501
1.15k
  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1502
798k
  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1503
361k
  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1504
872
  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1505
532
  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1506
1507
37.1k
  case ANCHOR_WORD_BOUND:
1508
37.1k
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1509
36.8k
    else                      r = add_opcode(reg, OP_WORD_BOUND);
1510
37.1k
    break;
1511
507
  case ANCHOR_NOT_WORD_BOUND:
1512
507
    if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1513
195
    else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
1514
507
    break;
1515
0
#ifdef USE_WORD_BEGIN_END
1516
0
  case ANCHOR_WORD_BEGIN:
1517
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1518
0
    else                      r = add_opcode(reg, OP_WORD_BEGIN);
1519
0
    break;
1520
0
  case ANCHOR_WORD_END:
1521
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
1522
0
    else                      r = add_opcode(reg, OP_WORD_END);
1523
0
    break;
1524
0
#endif
1525
526
  case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
1526
1527
2.51k
  case ANCHOR_PREC_READ:
1528
2.51k
    r = add_opcode(reg, OP_PUSH_POS);
1529
2.51k
    if (r) return r;
1530
2.51k
    r = compile_tree(node->target, reg);
1531
2.51k
    if (r) return r;
1532
2.42k
    r = add_opcode(reg, OP_POP_POS);
1533
2.42k
    break;
1534
1535
684
  case ANCHOR_PREC_READ_NOT:
1536
684
    len = compile_length_tree(node->target, reg);
1537
684
    if (len < 0) return len;
1538
662
    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1539
662
    if (r) return r;
1540
662
    r = compile_tree(node->target, reg);
1541
662
    if (r) return r;
1542
662
    r = add_opcode(reg, OP_FAIL_POS);
1543
662
    break;
1544
1545
2.21k
  case ANCHOR_LOOK_BEHIND:
1546
2.21k
    {
1547
2.21k
      int n;
1548
2.21k
      r = add_opcode(reg, OP_LOOK_BEHIND);
1549
2.21k
      if (r) return r;
1550
2.21k
      if (node->char_len < 0) {
1551
1.81k
  r = get_char_length_tree(node->target, reg, &n);
1552
1.81k
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1553
1.81k
      }
1554
405
      else
1555
405
  n = node->char_len;
1556
2.21k
      r = add_length(reg, n);
1557
2.21k
      if (r) return r;
1558
2.21k
      r = compile_tree(node->target, reg);
1559
2.21k
    }
1560
0
    break;
1561
1562
4.25k
  case ANCHOR_LOOK_BEHIND_NOT:
1563
4.25k
    {
1564
4.25k
      int n;
1565
4.25k
      len = compile_length_tree(node->target, reg);
1566
4.25k
      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1567
4.25k
         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1568
4.25k
      if (r) return r;
1569
4.25k
      if (node->char_len < 0) {
1570
3.85k
  r = get_char_length_tree(node->target, reg, &n);
1571
3.85k
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1572
3.85k
      }
1573
399
      else
1574
399
  n = node->char_len;
1575
4.25k
      r = add_length(reg, n);
1576
4.25k
      if (r) return r;
1577
4.25k
      r = compile_tree(node->target, reg);
1578
4.25k
      if (r) return r;
1579
4.25k
      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1580
4.25k
    }
1581
0
    break;
1582
1583
0
  default:
1584
0
    return ONIGERR_TYPE_BUG;
1585
0
    break;
1586
1.21M
  }
1587
1588
1.21M
  return r;
1589
1.21M
}
1590
1591
static int
1592
compile_length_tree(Node* node, regex_t* reg)
1593
49.2M
{
1594
49.2M
  int len, type, r;
1595
1596
49.2M
  type = NTYPE(node);
1597
49.2M
  switch (type) {
1598
4.21M
  case NT_LIST:
1599
4.21M
    len = 0;
1600
11.6M
    do {
1601
11.6M
      r = compile_length_tree(NCAR(node), reg);
1602
11.6M
      if (r < 0) return r;
1603
11.6M
      len += r;
1604
11.6M
    } while (IS_NOT_NULL(node = NCDR(node)));
1605
4.21M
    r = len;
1606
4.21M
    break;
1607
1608
1.71M
  case NT_ALT:
1609
1.71M
    {
1610
1.71M
      int n = 0;
1611
1.71M
      len = 0;
1612
6.85M
      do {
1613
6.85M
  r = compile_length_tree(NCAR(node), reg);
1614
6.85M
  if (r < 0) return r;
1615
6.85M
  len += r;
1616
6.85M
  n++;
1617
6.85M
      } while (IS_NOT_NULL(node = NCDR(node)));
1618
1.71M
      r = len;
1619
1.71M
      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1620
1.71M
    }
1621
0
    break;
1622
1623
4.81M
  case NT_STR:
1624
4.81M
    if (NSTRING_IS_RAW(node))
1625
1.24M
      r = compile_length_string_raw_node(NSTR(node), reg);
1626
3.56M
    else
1627
3.56M
      r = compile_length_string_node(node, reg);
1628
4.81M
    break;
1629
1630
14.4M
  case NT_CCLASS:
1631
14.4M
    r = compile_length_cclass_node(NCCLASS(node), reg);
1632
14.4M
    break;
1633
1634
28.4k
  case NT_CTYPE:
1635
955k
  case NT_CANY:
1636
955k
    r = SIZE_OPCODE;
1637
955k
    break;
1638
1639
53.8k
  case NT_BREF:
1640
53.8k
    {
1641
53.8k
      BRefNode* br = NBREF(node);
1642
1643
53.8k
#ifdef USE_BACKREF_WITH_LEVEL
1644
53.8k
      if (IS_BACKREF_NEST_LEVEL(br)) {
1645
5.12k
  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1646
5.12k
            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1647
5.12k
      }
1648
48.7k
      else
1649
48.7k
#endif
1650
48.7k
      if (br->back_num == 1) {
1651
44.9k
  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1652
44.9k
       ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1653
44.9k
      }
1654
3.79k
      else {
1655
3.79k
  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1656
3.79k
      }
1657
53.8k
    }
1658
53.8k
    break;
1659
1660
0
#ifdef USE_SUBEXP_CALL
1661
39.3k
  case NT_CALL:
1662
39.3k
    r = SIZE_OP_CALL;
1663
39.3k
    break;
1664
0
#endif
1665
1666
14.1M
  case NT_QTFR:
1667
14.1M
    r = compile_length_quantifier_node(NQTFR(node), reg);
1668
14.1M
    break;
1669
1670
8.53M
  case NT_ENCLOSE:
1671
8.53M
    r = compile_length_enclose_node(NENCLOSE(node), reg);
1672
8.53M
    break;
1673
1674
276k
  case NT_ANCHOR:
1675
276k
    r = compile_length_anchor_node(NANCHOR(node), reg);
1676
276k
    break;
1677
1678
0
  default:
1679
0
    return ONIGERR_TYPE_BUG;
1680
0
    break;
1681
49.2M
  }
1682
1683
49.2M
  return r;
1684
49.2M
}
1685
1686
static int
1687
compile_tree(Node* node, regex_t* reg)
1688
13.8M
{
1689
13.8M
  int n, type, len, pos, r = 0;
1690
1691
13.8M
  type = NTYPE(node);
1692
13.8M
  switch (type) {
1693
1.54M
  case NT_LIST:
1694
5.98M
    do {
1695
5.98M
      r = compile_tree(NCAR(node), reg);
1696
5.98M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1697
1.54M
    break;
1698
1699
559k
  case NT_ALT:
1700
559k
    {
1701
559k
      Node* x = node;
1702
559k
      len = 0;
1703
1.98M
      do {
1704
1.98M
  len += compile_length_tree(NCAR(x), reg);
1705
1.98M
  if (NCDR(x) != NULL) {
1706
1.42M
    len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1707
1.42M
  }
1708
1.98M
      } while (IS_NOT_NULL(x = NCDR(x)));
1709
559k
      pos = reg->used + len;  /* goal position */
1710
1711
1.98M
      do {
1712
1.98M
  len = compile_length_tree(NCAR(node), reg);
1713
1.98M
  if (IS_NOT_NULL(NCDR(node))) {
1714
1.42M
    r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1715
1.42M
    if (r) break;
1716
1.42M
  }
1717
1.98M
  r = compile_tree(NCAR(node), reg);
1718
1.98M
  if (r) break;
1719
1.98M
  if (IS_NOT_NULL(NCDR(node))) {
1720
1.42M
    len = pos - (reg->used + SIZE_OP_JUMP);
1721
1.42M
    r = add_opcode_rel_addr(reg, OP_JUMP, len);
1722
1.42M
    if (r) break;
1723
1.42M
  }
1724
1.98M
      } while (IS_NOT_NULL(node = NCDR(node)));
1725
559k
    }
1726
0
    break;
1727
1728
2.41M
  case NT_STR:
1729
2.41M
    if (NSTRING_IS_RAW(node))
1730
212k
      r = compile_string_raw_node(NSTR(node), reg);
1731
2.20M
    else
1732
2.20M
      r = compile_string_node(node, reg);
1733
2.41M
    break;
1734
1735
3.77M
  case NT_CCLASS:
1736
3.77M
    r = compile_cclass_node(NCCLASS(node), reg);
1737
3.77M
    break;
1738
1739
9.73k
  case NT_CTYPE:
1740
9.73k
    {
1741
9.73k
      int op;
1742
1743
9.73k
      switch (NCTYPE(node)->ctype) {
1744
9.73k
      case ONIGENC_CTYPE_WORD:
1745
9.73k
  if (NCTYPE(node)->ascii_range != 0) {
1746
7.96k
    if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
1747
2.16k
    else                         op = OP_ASCII_WORD;
1748
7.96k
  }
1749
1.77k
  else {
1750
1.77k
    if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1751
1.10k
    else                         op = OP_WORD;
1752
1.77k
  }
1753
9.73k
  break;
1754
0
      default:
1755
0
  return ONIGERR_TYPE_BUG;
1756
0
  break;
1757
9.73k
      }
1758
9.73k
      r = add_opcode(reg, op);
1759
9.73k
    }
1760
0
    break;
1761
1762
826k
  case NT_CANY:
1763
826k
    if (IS_MULTILINE(reg->options))
1764
304k
      r = add_opcode(reg, OP_ANYCHAR_ML);
1765
522k
    else
1766
522k
      r = add_opcode(reg, OP_ANYCHAR);
1767
826k
    break;
1768
1769
9.95k
  case NT_BREF:
1770
9.95k
    {
1771
9.95k
      BRefNode* br = NBREF(node);
1772
1773
9.95k
#ifdef USE_BACKREF_WITH_LEVEL
1774
9.95k
      if (IS_BACKREF_NEST_LEVEL(br)) {
1775
1.97k
  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1776
1.97k
  if (r) return r;
1777
1.97k
  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1778
1.97k
  if (r) return r;
1779
1.97k
  r = add_length(reg, br->nest_level);
1780
1.97k
  if (r) return r;
1781
1782
1.97k
  goto add_bacref_mems;
1783
1.97k
      }
1784
7.98k
      else
1785
7.98k
#endif
1786
7.98k
      if (br->back_num == 1) {
1787
5.71k
  n = br->back_static[0];
1788
5.71k
  if (IS_IGNORECASE(reg->options)) {
1789
482
    r = add_opcode(reg, OP_BACKREFN_IC);
1790
482
    if (r) return r;
1791
482
    r = add_mem_num(reg, n);
1792
482
  }
1793
5.23k
  else {
1794
5.23k
    switch (n) {
1795
1.82k
    case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1796
2.26k
    case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1797
1.15k
    default:
1798
1.15k
      r = add_opcode(reg, OP_BACKREFN);
1799
1.15k
      if (r) return r;
1800
1.15k
      r = add_mem_num(reg, n);
1801
1.15k
      break;
1802
5.23k
    }
1803
5.23k
  }
1804
5.71k
      }
1805
2.26k
      else {
1806
2.26k
  int i;
1807
2.26k
  int* p;
1808
1809
2.26k
  if (IS_IGNORECASE(reg->options)) {
1810
88
    r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1811
88
  }
1812
2.18k
  else {
1813
2.18k
    r = add_opcode(reg, OP_BACKREF_MULTI);
1814
2.18k
  }
1815
2.26k
  if (r) return r;
1816
1817
2.26k
#ifdef USE_BACKREF_WITH_LEVEL
1818
4.23k
      add_bacref_mems:
1819
4.23k
#endif
1820
4.23k
  r = add_length(reg, br->back_num);
1821
4.23k
  if (r) return r;
1822
4.23k
  p = BACKREFS_P(br);
1823
23.8k
  for (i = br->back_num - 1; i >= 0; i--) {
1824
19.6k
    r = add_mem_num(reg, p[i]);
1825
19.6k
    if (r) return r;
1826
19.6k
  }
1827
4.23k
      }
1828
9.95k
    }
1829
9.95k
    break;
1830
1831
9.95k
#ifdef USE_SUBEXP_CALL
1832
14.0k
  case NT_CALL:
1833
14.0k
    r = compile_call(NCALL(node), reg);
1834
14.0k
    break;
1835
0
#endif
1836
1837
1.97M
  case NT_QTFR:
1838
1.97M
    r = compile_quantifier_node(NQTFR(node), reg);
1839
1.97M
    break;
1840
1841
1.52M
  case NT_ENCLOSE:
1842
1.52M
    r = compile_enclose_node(NENCLOSE(node), reg);
1843
1.52M
    break;
1844
1845
1.21M
  case NT_ANCHOR:
1846
1.21M
    r = compile_anchor_node(NANCHOR(node), reg);
1847
1.21M
    break;
1848
1849
0
  default:
1850
#ifdef ONIG_DEBUG
1851
    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1852
#endif
1853
0
    break;
1854
13.8M
  }
1855
1856
13.8M
  return r;
1857
13.8M
}
1858
1859
#ifdef USE_NAMED_GROUP
1860
1861
static int
1862
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1863
134k
{
1864
134k
  int r = 0;
1865
134k
  Node* node = *plink;
1866
1867
134k
  switch (NTYPE(node)) {
1868
12.0k
  case NT_LIST:
1869
17.8k
  case NT_ALT:
1870
81.8k
    do {
1871
81.8k
      r = noname_disable_map(&(NCAR(node)), map, counter);
1872
81.8k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873
17.8k
    break;
1874
1875
36.4k
  case NT_QTFR:
1876
36.4k
    {
1877
36.4k
      Node** ptarget = &(NQTFR(node)->target);
1878
36.4k
      Node*  old = *ptarget;
1879
36.4k
      r = noname_disable_map(ptarget, map, counter);
1880
36.4k
      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1881
1.61k
  onig_reduce_nested_quantifier(node, *ptarget);
1882
1.61k
      }
1883
36.4k
    }
1884
36.4k
    break;
1885
1886
15.5k
  case NT_ENCLOSE:
1887
15.5k
    {
1888
15.5k
      EncloseNode* en = NENCLOSE(node);
1889
15.5k
      if (en->type == ENCLOSE_MEMORY) {
1890
5.27k
  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1891
2.12k
    (*counter)++;
1892
2.12k
    map[en->regnum].new_val = *counter;
1893
2.12k
    en->regnum = *counter;
1894
2.12k
  }
1895
3.15k
  else if (en->regnum != 0) {
1896
3.12k
    *plink = en->target;
1897
3.12k
    en->target = NULL_NODE;
1898
3.12k
    onig_node_free(node);
1899
3.12k
    r = noname_disable_map(plink, map, counter);
1900
3.12k
    break;
1901
3.12k
  }
1902
5.27k
      }
1903
12.3k
      r = noname_disable_map(&(en->target), map, counter);
1904
12.3k
    }
1905
0
    break;
1906
1907
2.88k
  case NT_ANCHOR:
1908
2.88k
    if (NANCHOR(node)->target)
1909
802
      r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1910
2.88k
    break;
1911
1912
62.2k
  default:
1913
62.2k
    break;
1914
134k
  }
1915
1916
134k
  return r;
1917
134k
}
1918
1919
static int
1920
renumber_node_backref(Node* node, GroupNumRemap* map)
1921
1.05k
{
1922
1.05k
  int i, pos, n, old_num;
1923
1.05k
  int *backs;
1924
1.05k
  BRefNode* bn = NBREF(node);
1925
1926
1.05k
  if (! IS_BACKREF_NAME_REF(bn))
1927
31
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1928
1929
1.02k
  old_num = bn->back_num;
1930
1.02k
  if (IS_NULL(bn->back_dynamic))
1931
549
    backs = bn->back_static;
1932
472
  else
1933
472
    backs = bn->back_dynamic;
1934
1935
9.00k
  for (i = 0, pos = 0; i < old_num; i++) {
1936
7.98k
    n = map[backs[i]].new_val;
1937
7.98k
    if (n > 0) {
1938
7.98k
      backs[pos] = n;
1939
7.98k
      pos++;
1940
7.98k
    }
1941
7.98k
  }
1942
1943
1.02k
  bn->back_num = pos;
1944
1.02k
  return 0;
1945
1.05k
}
1946
1947
static int
1948
renumber_by_map(Node* node, GroupNumRemap* map)
1949
128k
{
1950
128k
  int r = 0;
1951
1952
128k
  switch (NTYPE(node)) {
1953
12.0k
  case NT_LIST:
1954
17.7k
  case NT_ALT:
1955
80.3k
    do {
1956
80.3k
      r = renumber_by_map(NCAR(node), map);
1957
80.3k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1958
17.7k
    break;
1959
35.2k
  case NT_QTFR:
1960
35.2k
    r = renumber_by_map(NQTFR(node)->target, map);
1961
35.2k
    break;
1962
12.1k
  case NT_ENCLOSE:
1963
12.1k
    {
1964
12.1k
      EncloseNode* en = NENCLOSE(node);
1965
12.1k
      if (en->type == ENCLOSE_CONDITION)
1966
263
  en->regnum = map[en->regnum].new_val;
1967
12.1k
      r = renumber_by_map(en->target, map);
1968
12.1k
    }
1969
12.1k
    break;
1970
1971
1.05k
  case NT_BREF:
1972
1.05k
    r = renumber_node_backref(node, map);
1973
1.05k
    break;
1974
1975
2.76k
  case NT_ANCHOR:
1976
2.76k
    if (NANCHOR(node)->target)
1977
802
      r = renumber_by_map(NANCHOR(node)->target, map);
1978
2.76k
    break;
1979
1980
59.8k
  default:
1981
59.8k
    break;
1982
128k
  }
1983
1984
128k
  return r;
1985
128k
}
1986
1987
static int
1988
numbered_ref_check(Node* node)
1989
1.54M
{
1990
1.54M
  int r = 0;
1991
1992
1.54M
  switch (NTYPE(node)) {
1993
110k
  case NT_LIST:
1994
200k
  case NT_ALT:
1995
920k
    do {
1996
920k
      r = numbered_ref_check(NCAR(node));
1997
920k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998
200k
    break;
1999
255k
  case NT_QTFR:
2000
255k
    r = numbered_ref_check(NQTFR(node)->target);
2001
255k
    break;
2002
282k
  case NT_ENCLOSE:
2003
282k
    r = numbered_ref_check(NENCLOSE(node)->target);
2004
282k
    break;
2005
2006
926
  case NT_BREF:
2007
926
    if (! IS_BACKREF_NAME_REF(NBREF(node)))
2008
5
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2009
921
    break;
2010
2011
166k
  case NT_ANCHOR:
2012
166k
    if (NANCHOR(node)->target)
2013
143
      r = numbered_ref_check(NANCHOR(node)->target);
2014
166k
    break;
2015
2016
635k
  default:
2017
635k
    break;
2018
1.54M
  }
2019
2020
1.54M
  return r;
2021
1.54M
}
2022
2023
static int
2024
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025
294
{
2026
294
  int r, i, pos, counter;
2027
294
  BitStatusType loc;
2028
294
  GroupNumRemap* map;
2029
2030
294
  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2031
294
  CHECK_NULL_RETURN_MEMERR(map);
2032
5.54k
  for (i = 1; i <= env->num_mem; i++) {
2033
5.24k
    map[i].new_val = 0;
2034
5.24k
  }
2035
294
  counter = 0;
2036
294
  r = noname_disable_map(root, map, &counter);
2037
294
  if (r != 0) return r;
2038
2039
294
  r = renumber_by_map(*root, map);
2040
294
  if (r != 0) return r;
2041
2042
4.45k
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043
4.19k
    if (map[i].new_val > 0) {
2044
2.08k
      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2045
2.08k
      pos++;
2046
2.08k
    }
2047
4.19k
  }
2048
2049
263
  loc = env->capture_history;
2050
263
  BIT_STATUS_CLEAR(env->capture_history);
2051
8.41k
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052
8.15k
    if (BIT_STATUS_AT(loc, i)) {
2053
0
      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054
0
    }
2055
8.15k
  }
2056
2057
263
  env->num_mem = env->num_named;
2058
263
  reg->num_mem = env->num_named;
2059
2060
263
  return onig_renumber_name_table(reg, map);
2061
294
}
2062
#endif /* USE_NAMED_GROUP */
2063
2064
#ifdef USE_SUBEXP_CALL
2065
static int
2066
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067
2.64k
{
2068
2.64k
  int i, offset;
2069
2.64k
  EncloseNode* en;
2070
2.64k
  AbsAddrType addr;
2071
2072
16.6k
  for (i = 0; i < uslist->num; i++) {
2073
14.0k
    en = NENCLOSE(uslist->us[i].target);
2074
14.0k
    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075
14.0k
    addr = en->call_addr;
2076
14.0k
    offset = uslist->us[i].offset;
2077
2078
14.0k
    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079
14.0k
  }
2080
2.64k
  return 0;
2081
2.64k
}
2082
#endif
2083
2084
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085
static int
2086
quantifiers_memory_node_info(Node* node)
2087
8.82M
{
2088
8.82M
  int r = 0;
2089
2090
8.82M
  switch (NTYPE(node)) {
2091
12.4k
  case NT_LIST:
2092
20.2k
  case NT_ALT:
2093
20.2k
    {
2094
20.2k
      int v;
2095
72.6k
      do {
2096
72.6k
  v = quantifiers_memory_node_info(NCAR(node));
2097
72.6k
  if (v > r) r = v;
2098
72.6k
      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099
20.2k
    }
2100
20.2k
    break;
2101
2102
0
# ifdef USE_SUBEXP_CALL
2103
3.11k
  case NT_CALL:
2104
3.11k
    if (IS_CALL_RECURSION(NCALL(node))) {
2105
1.87k
      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106
1.87k
    }
2107
1.24k
    else
2108
1.24k
      r = quantifiers_memory_node_info(NCALL(node)->target);
2109
1.24k
    break;
2110
1.24k
# endif
2111
2112
4.37M
  case NT_QTFR:
2113
4.37M
    {
2114
4.37M
      QtfrNode* qn = NQTFR(node);
2115
4.37M
      if (qn->upper != 0) {
2116
4.37M
  r = quantifiers_memory_node_info(qn->target);
2117
4.37M
      }
2118
4.37M
    }
2119
4.37M
    break;
2120
2121
4.34M
  case NT_ENCLOSE:
2122
4.34M
    {
2123
4.34M
      EncloseNode* en = NENCLOSE(node);
2124
4.34M
      switch (en->type) {
2125
3.00k
      case ENCLOSE_MEMORY:
2126
3.00k
  return NQ_TARGET_IS_EMPTY_MEM;
2127
0
  break;
2128
2129
4.19k
      case ENCLOSE_OPTION:
2130
4.33M
      case ENCLOSE_STOP_BACKTRACK:
2131
4.33M
      case ENCLOSE_CONDITION:
2132
4.33M
      case ENCLOSE_ABSENT:
2133
4.33M
  r = quantifiers_memory_node_info(en->target);
2134
4.33M
  break;
2135
0
      default:
2136
0
  break;
2137
4.34M
      }
2138
4.34M
    }
2139
4.33M
    break;
2140
2141
4.33M
  case NT_BREF:
2142
34.5k
  case NT_STR:
2143
34.7k
  case NT_CTYPE:
2144
68.6k
  case NT_CCLASS:
2145
72.0k
  case NT_CANY:
2146
78.2k
  case NT_ANCHOR:
2147
78.2k
  default:
2148
78.2k
    break;
2149
8.82M
  }
2150
2151
8.81M
  return r;
2152
8.82M
}
2153
#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155
static int
2156
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157
11.0M
{
2158
11.0M
  OnigDistance tmin;
2159
11.0M
  int r = 0;
2160
2161
11.0M
  *min = 0;
2162
11.0M
  switch (NTYPE(node)) {
2163
6.55k
  case NT_BREF:
2164
6.55k
    {
2165
6.55k
      int i;
2166
6.55k
      int* backs;
2167
6.55k
      Node** nodes = SCANENV_MEM_NODES(env);
2168
6.55k
      BRefNode* br = NBREF(node);
2169
6.55k
      if (br->state & NST_RECURSION) break;
2170
2171
4.50k
      backs = BACKREFS_P(br);
2172
4.50k
      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2173
4.44k
      r = get_min_match_length(nodes[backs[0]], min, env);
2174
4.44k
      if (r != 0) break;
2175
13.1k
      for (i = 1; i < br->back_num; i++) {
2176
8.67k
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2177
8.67k
  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178
8.67k
  if (r != 0) break;
2179
8.67k
  if (*min > tmin) *min = tmin;
2180
8.67k
      }
2181
4.44k
    }
2182
4.44k
    break;
2183
2184
4.44k
#ifdef USE_SUBEXP_CALL
2185
10.9k
  case NT_CALL:
2186
10.9k
    if (IS_CALL_RECURSION(NCALL(node))) {
2187
4.99k
      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188
4.99k
      if (IS_ENCLOSE_MIN_FIXED(en))
2189
2.06k
  *min = en->min_len;
2190
4.99k
    }
2191
5.93k
    else
2192
5.93k
      r = get_min_match_length(NCALL(node)->target, min, env);
2193
10.9k
    break;
2194
0
#endif
2195
2196
294k
  case NT_LIST:
2197
839k
    do {
2198
839k
      r = get_min_match_length(NCAR(node), &tmin, env);
2199
839k
      if (r == 0) *min += tmin;
2200
839k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201
294k
    break;
2202
2203
78.0k
  case NT_ALT:
2204
78.0k
    {
2205
78.0k
      Node *x, *y;
2206
78.0k
      y = node;
2207
341k
      do {
2208
341k
  x = NCAR(y);
2209
341k
  r = get_min_match_length(x, &tmin, env);
2210
341k
  if (r != 0) break;
2211
341k
  if (y == node) *min = tmin;
2212
263k
  else if (*min > tmin) *min = tmin;
2213
341k
      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214
78.0k
    }
2215
0
    break;
2216
2217
475k
  case NT_STR:
2218
475k
    {
2219
475k
      StrNode* sn = NSTR(node);
2220
475k
      *min = sn->end - sn->s;
2221
475k
    }
2222
475k
    break;
2223
2224
5.54k
  case NT_CTYPE:
2225
5.54k
    *min = 1;
2226
5.54k
    break;
2227
2228
2.35M
  case NT_CCLASS:
2229
2.70M
  case NT_CANY:
2230
2.70M
    *min = 1;
2231
2.70M
    break;
2232
2233
3.89M
  case NT_QTFR:
2234
3.89M
    {
2235
3.89M
      QtfrNode* qn = NQTFR(node);
2236
2237
3.89M
      if (qn->lower > 0) {
2238
3.60M
  r = get_min_match_length(qn->target, min, env);
2239
3.60M
  if (r == 0)
2240
3.60M
    *min = distance_multiply(*min, qn->lower);
2241
3.60M
      }
2242
3.89M
    }
2243
3.89M
    break;
2244
2245
3.52M
  case NT_ENCLOSE:
2246
3.52M
    {
2247
3.52M
      EncloseNode* en = NENCLOSE(node);
2248
3.52M
      switch (en->type) {
2249
60.1k
      case ENCLOSE_MEMORY:
2250
60.1k
        if (IS_ENCLOSE_MIN_FIXED(en))
2251
42.3k
          *min = en->min_len;
2252
17.8k
        else {
2253
17.8k
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254
378
      *min = 0;  /* recursive */
2255
17.4k
    else {
2256
17.4k
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2257
17.4k
      r = get_min_match_length(en->target, min, env);
2258
17.4k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2259
17.4k
      if (r == 0) {
2260
17.3k
        en->min_len = *min;
2261
17.3k
        SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2262
17.3k
      }
2263
17.4k
    }
2264
17.8k
        }
2265
60.1k
        break;
2266
2267
48.8k
      case ENCLOSE_OPTION:
2268
3.46M
      case ENCLOSE_STOP_BACKTRACK:
2269
3.46M
      case ENCLOSE_CONDITION:
2270
3.46M
  r = get_min_match_length(en->target, min, env);
2271
3.46M
  break;
2272
2273
802
      case ENCLOSE_ABSENT:
2274
802
  break;
2275
3.52M
      }
2276
3.52M
    }
2277
3.52M
    break;
2278
2279
3.52M
  case NT_ANCHOR:
2280
13.7k
  default:
2281
13.7k
    break;
2282
11.0M
  }
2283
2284
11.0M
  return r;
2285
11.0M
}
2286
2287
static int
2288
get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289
403k
{
2290
403k
  OnigDistance tmax;
2291
403k
  int r = 0;
2292
2293
403k
  *max = 0;
2294
403k
  switch (NTYPE(node)) {
2295
43.1k
  case NT_LIST:
2296
147k
    do {
2297
147k
      r = get_max_match_length(NCAR(node), &tmax, env);
2298
147k
      if (r == 0)
2299
147k
  *max = distance_add(*max, tmax);
2300
147k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301
43.1k
    break;
2302
2303
22.9k
  case NT_ALT:
2304
101k
    do {
2305
101k
      r = get_max_match_length(NCAR(node), &tmax, env);
2306
101k
      if (r == 0 && *max < tmax) *max = tmax;
2307
101k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308
22.9k
    break;
2309
2310
52.6k
  case NT_STR:
2311
52.6k
    {
2312
52.6k
      StrNode* sn = NSTR(node);
2313
52.6k
      *max = sn->end - sn->s;
2314
52.6k
    }
2315
52.6k
    break;
2316
2317
531
  case NT_CTYPE:
2318
531
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319
531
    break;
2320
2321
114k
  case NT_CCLASS:
2322
123k
  case NT_CANY:
2323
123k
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324
123k
    break;
2325
2326
3.01k
  case NT_BREF:
2327
3.01k
    {
2328
3.01k
      int i;
2329
3.01k
      int* backs;
2330
3.01k
      Node** nodes = SCANENV_MEM_NODES(env);
2331
3.01k
      BRefNode* br = NBREF(node);
2332
3.01k
      if (br->state & NST_RECURSION) {
2333
1.45k
  *max = ONIG_INFINITE_DISTANCE;
2334
1.45k
  break;
2335
1.45k
      }
2336
1.56k
      backs = BACKREFS_P(br);
2337
3.64k
      for (i = 0; i < br->back_num; i++) {
2338
2.07k
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2339
2.07k
  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340
2.07k
  if (r != 0) break;
2341
2.07k
  if (*max < tmax) *max = tmax;
2342
2.07k
      }
2343
1.56k
    }
2344
1.56k
    break;
2345
2346
1.56k
#ifdef USE_SUBEXP_CALL
2347
1.56k
  case NT_CALL:
2348
1.42k
    if (! IS_CALL_RECURSION(NCALL(node)))
2349
607
      r = get_max_match_length(NCALL(node)->target, max, env);
2350
814
    else
2351
814
      *max = ONIG_INFINITE_DISTANCE;
2352
1.42k
    break;
2353
0
#endif
2354
2355
95.6k
  case NT_QTFR:
2356
95.6k
    {
2357
95.6k
      QtfrNode* qn = NQTFR(node);
2358
2359
95.6k
      if (qn->upper != 0) {
2360
95.3k
  r = get_max_match_length(qn->target, max, env);
2361
95.3k
  if (r == 0 && *max != 0) {
2362
94.6k
    if (! IS_REPEAT_INFINITE(qn->upper))
2363
14.6k
      *max = distance_multiply(*max, qn->upper);
2364
80.0k
    else
2365
80.0k
      *max = ONIG_INFINITE_DISTANCE;
2366
94.6k
  }
2367
95.3k
      }
2368
95.6k
    }
2369
95.6k
    break;
2370
2371
56.9k
  case NT_ENCLOSE:
2372
56.9k
    {
2373
56.9k
      EncloseNode* en = NENCLOSE(node);
2374
56.9k
      switch (en->type) {
2375
23.0k
      case ENCLOSE_MEMORY:
2376
23.0k
  if (IS_ENCLOSE_MAX_FIXED(en))
2377
16.2k
    *max = en->max_len;
2378
6.77k
  else {
2379
6.77k
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2380
415
      *max = ONIG_INFINITE_DISTANCE;
2381
6.36k
    else {
2382
6.36k
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2383
6.36k
      r = get_max_match_length(en->target, max, env);
2384
6.36k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2385
6.36k
      if (r == 0) {
2386
6.36k
        en->max_len = *max;
2387
6.36k
        SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2388
6.36k
      }
2389
6.36k
    }
2390
6.77k
  }
2391
23.0k
  break;
2392
2393
14.3k
      case ENCLOSE_OPTION:
2394
33.4k
      case ENCLOSE_STOP_BACKTRACK:
2395
33.5k
      case ENCLOSE_CONDITION:
2396
33.5k
  r = get_max_match_length(en->target, max, env);
2397
33.5k
  break;
2398
2399
295
      case ENCLOSE_ABSENT:
2400
295
  break;
2401
56.9k
      }
2402
56.9k
    }
2403
56.9k
    break;
2404
2405
56.9k
  case NT_ANCHOR:
2406
3.35k
  default:
2407
3.35k
    break;
2408
403k
  }
2409
2410
403k
  return r;
2411
403k
}
2412
2413
487
#define GET_CHAR_LEN_VARLEN           -1
2414
794
#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2415
2416
/* fixed size pattern node only */
2417
static int
2418
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419
31.3k
{
2420
31.3k
  int tlen;
2421
31.3k
  int r = 0;
2422
2423
31.3k
  level++;
2424
31.3k
  *len = 0;
2425
31.3k
  switch (NTYPE(node)) {
2426
3.14k
  case NT_LIST:
2427
9.79k
    do {
2428
9.79k
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429
9.79k
      if (r == 0)
2430
8.96k
  *len = (int )distance_add(*len, tlen);
2431
9.79k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432
3.14k
    break;
2433
2434
1.29k
  case NT_ALT:
2435
1.29k
    {
2436
1.29k
      int tlen2;
2437
1.29k
      int varlen = 0;
2438
2439
1.29k
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440
10.3k
      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441
9.03k
  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442
9.03k
  if (r == 0) {
2443
8.62k
    if (tlen != tlen2)
2444
4.71k
      varlen = 1;
2445
8.62k
  }
2446
9.03k
      }
2447
1.29k
      if (r == 0) {
2448
591
  if (varlen != 0) {
2449
410
    if (level == 1)
2450
397
      r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2451
13
    else
2452
13
      r = GET_CHAR_LEN_VARLEN;
2453
410
  }
2454
181
  else
2455
181
    *len = tlen;
2456
591
      }
2457
1.29k
    }
2458
1.29k
    break;
2459
2460
13.2k
  case NT_STR:
2461
13.2k
    {
2462
13.2k
      StrNode* sn = NSTR(node);
2463
13.2k
      UChar *s = sn->s;
2464
85.5k
      while (s < sn->end) {
2465
72.2k
  s += enclen(reg->enc, s, sn->end);
2466
72.2k
  (*len)++;
2467
72.2k
      }
2468
13.2k
    }
2469
13.2k
    break;
2470
2471
1.31k
  case NT_QTFR:
2472
1.31k
    {
2473
1.31k
      QtfrNode* qn = NQTFR(node);
2474
1.31k
      if (qn->lower == qn->upper) {
2475
1.28k
  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476
1.28k
  if (r == 0)
2477
1.22k
    *len = (int )distance_multiply(tlen, qn->lower);
2478
1.28k
      }
2479
30
      else
2480
30
  r = GET_CHAR_LEN_VARLEN;
2481
1.31k
    }
2482
1.31k
    break;
2483
2484
0
#ifdef USE_SUBEXP_CALL
2485
215
  case NT_CALL:
2486
215
    if (! IS_CALL_RECURSION(NCALL(node)))
2487
214
      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488
1
    else
2489
1
      r = GET_CHAR_LEN_VARLEN;
2490
215
    break;
2491
0
#endif
2492
2493
348
  case NT_CTYPE:
2494
348
    *len = 1;
2495
348
    break;
2496
2497
3.43k
  case NT_CCLASS:
2498
5.06k
  case NT_CANY:
2499
5.06k
    *len = 1;
2500
5.06k
    break;
2501
2502
3.23k
  case NT_ENCLOSE:
2503
3.23k
    {
2504
3.23k
      EncloseNode* en = NENCLOSE(node);
2505
3.23k
      switch (en->type) {
2506
2.72k
      case ENCLOSE_MEMORY:
2507
2.72k
#ifdef USE_SUBEXP_CALL
2508
2.72k
  if (IS_ENCLOSE_CLEN_FIXED(en))
2509
458
    *len = en->char_len;
2510
2.26k
  else {
2511
2.26k
    r = get_char_length_tree1(en->target, reg, len, level);
2512
2.26k
    if (r == 0) {
2513
593
      en->char_len = *len;
2514
593
      SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2515
593
    }
2516
2.26k
  }
2517
2.72k
  break;
2518
0
#endif
2519
264
      case ENCLOSE_OPTION:
2520
421
      case ENCLOSE_STOP_BACKTRACK:
2521
421
      case ENCLOSE_CONDITION:
2522
421
  r = get_char_length_tree1(en->target, reg, len, level);
2523
421
  break;
2524
92
      case ENCLOSE_ABSENT:
2525
92
      default:
2526
92
  break;
2527
3.23k
      }
2528
3.23k
    }
2529
3.23k
    break;
2530
2531
3.41k
  case NT_ANCHOR:
2532
3.41k
    break;
2533
2534
1
  default:
2535
1
    r = GET_CHAR_LEN_VARLEN;
2536
1
    break;
2537
31.3k
  }
2538
2539
31.3k
  return r;
2540
31.3k
}
2541
2542
static int
2543
get_char_length_tree(Node* node, regex_t* reg, int* len)
2544
6.99k
{
2545
6.99k
  return get_char_length_tree1(node, reg, len, 0);
2546
6.99k
}
2547
2548
/* x is not included y ==>  1 : 0 */
2549
static int
2550
is_not_included(Node* x, Node* y, regex_t* reg)
2551
727k
{
2552
727k
  int i;
2553
727k
  OnigDistance len;
2554
727k
  OnigCodePoint code;
2555
727k
  UChar *p;
2556
727k
  int ytype;
2557
2558
1.40M
 retry:
2559
1.40M
  ytype = NTYPE(y);
2560
1.40M
  switch (NTYPE(x)) {
2561
2.92k
  case NT_CTYPE:
2562
2.92k
    {
2563
2.92k
      switch (ytype) {
2564
281
      case NT_CTYPE:
2565
281
  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566
281
      NCTYPE(y)->not   != NCTYPE(x)->not &&
2567
281
      NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568
218
    return 1;
2569
63
  else
2570
63
    return 0;
2571
0
  break;
2572
2573
1.17k
      case NT_CCLASS:
2574
678k
      swap:
2575
678k
  {
2576
678k
    Node* tmp;
2577
678k
    tmp = x; x = y; y = tmp;
2578
678k
    goto retry;
2579
1.17k
  }
2580
0
  break;
2581
2582
1.47k
      case NT_STR:
2583
1.47k
  goto swap;
2584
0
  break;
2585
2586
0
      default:
2587
0
  break;
2588
2.92k
      }
2589
2.92k
    }
2590
0
    break;
2591
2592
685k
  case NT_CCLASS:
2593
685k
    {
2594
685k
      CClassNode* xc = NCCLASS(x);
2595
685k
      switch (ytype) {
2596
1.72k
      case NT_CTYPE:
2597
1.72k
  switch (NCTYPE(y)->ctype) {
2598
1.72k
  case ONIGENC_CTYPE_WORD:
2599
1.72k
    if (NCTYPE(y)->not == 0) {
2600
1.32k
      if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601
29.0k
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602
29.0k
    if (BITSET_AT(xc->bs, i)) {
2603
4.83k
      if (NCTYPE(y)->ascii_range) {
2604
1.85k
        if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605
1.85k
      }
2606
2.98k
      else {
2607
2.98k
        if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608
2.98k
      }
2609
4.83k
    }
2610
29.0k
        }
2611
59
        return 1;
2612
266
      }
2613
1.05k
      return 0;
2614
1.32k
    }
2615
402
    else {
2616
402
      if (IS_NOT_NULL(xc->mbuf)) return 0;
2617
22.2k
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618
22.1k
        int is_word;
2619
22.1k
        if (NCTYPE(y)->ascii_range)
2620
17.7k
    is_word = IS_CODE_SB_WORD(reg->enc, i);
2621
4.45k
        else
2622
4.45k
    is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623
22.1k
        if (! is_word) {
2624
15.5k
    if (!IS_NCCLASS_NOT(xc)) {
2625
15.3k
      if (BITSET_AT(xc->bs, i))
2626
67
        return 0;
2627
15.3k
    }
2628
191
    else {
2629
191
      if (! BITSET_AT(xc->bs, i))
2630
191
        return 0;
2631
191
    }
2632
15.5k
        }
2633
22.1k
      }
2634
85
      return 1;
2635
343
    }
2636
0
    break;
2637
2638
0
  default:
2639
0
    break;
2640
1.72k
  }
2641
0
  break;
2642
2643
7.96k
      case NT_CCLASS:
2644
7.96k
  {
2645
7.96k
    int v;
2646
7.96k
    CClassNode* yc = NCCLASS(y);
2647
2648
696k
    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649
695k
      v = BITSET_AT(xc->bs, i);
2650
695k
      if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651
695k
    (v == 0 && IS_NCCLASS_NOT(xc))) {
2652
70.2k
        v = BITSET_AT(yc->bs, i);
2653
70.2k
        if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654
70.2k
      (v == 0 && IS_NCCLASS_NOT(yc)))
2655
7.00k
    return 0;
2656
70.2k
      }
2657
695k
    }
2658
958
    if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659
958
        (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660
629
      return 1;
2661
329
    return 0;
2662
958
  }
2663
0
  break;
2664
2665
675k
      case NT_STR:
2666
675k
  goto swap;
2667
0
  break;
2668
2669
0
      default:
2670
0
  break;
2671
685k
      }
2672
685k
    }
2673
0
    break;
2674
2675
717k
  case NT_STR:
2676
717k
    {
2677
717k
      StrNode* xs = NSTR(x);
2678
717k
      if (NSTRING_LEN(x) == 0)
2679
0
  break;
2680
2681
717k
      switch (ytype) {
2682
2.65k
      case NT_CTYPE:
2683
2.65k
  switch (NCTYPE(y)->ctype) {
2684
2.65k
  case ONIGENC_CTYPE_WORD:
2685
2.65k
    if (NCTYPE(y)->ascii_range) {
2686
2.19k
      if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687
1.14k
        return NCTYPE(y)->not;
2688
1.05k
      else
2689
1.05k
        return !(NCTYPE(y)->not);
2690
2.19k
    }
2691
467
    else {
2692
467
      if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693
311
        return NCTYPE(y)->not;
2694
156
      else
2695
156
        return !(NCTYPE(y)->not);
2696
467
    }
2697
0
    break;
2698
0
  default:
2699
0
    break;
2700
2.65k
  }
2701
0
  break;
2702
2703
678k
      case NT_CCLASS:
2704
678k
  {
2705
678k
    CClassNode* cc = NCCLASS(y);
2706
2707
678k
    code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708
678k
             xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709
678k
    return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710
2.65k
  }
2711
0
  break;
2712
2713
36.4k
      case NT_STR:
2714
36.4k
  {
2715
36.4k
    UChar *q;
2716
36.4k
    StrNode* ys = NSTR(y);
2717
36.4k
    len = NSTRING_LEN(x);
2718
36.4k
    if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719
36.4k
    if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720
      /* tiny version */
2721
5.06k
      return 0;
2722
5.06k
    }
2723
31.4k
    else {
2724
37.8k
      for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725
31.8k
        if (*p != *q) return 1;
2726
31.8k
      }
2727
31.4k
    }
2728
36.4k
  }
2729
6.03k
  break;
2730
2731
6.03k
      default:
2732
0
  break;
2733
717k
      }
2734
717k
    }
2735
6.03k
    break;
2736
2737
6.03k
  default:
2738
0
    break;
2739
1.40M
  }
2740
2741
6.03k
  return 0;
2742
1.40M
}
2743
2744
static Node*
2745
get_head_value_node(Node* node, int exact, regex_t* reg)
2746
4.20M
{
2747
4.20M
  Node* n = NULL_NODE;
2748
2749
4.20M
  switch (NTYPE(node)) {
2750
892
  case NT_BREF:
2751
508k
  case NT_ALT:
2752
739k
  case NT_CANY:
2753
739k
#ifdef USE_SUBEXP_CALL
2754
739k
  case NT_CALL:
2755
739k
#endif
2756
739k
    break;
2757
2758
7.08k
  case NT_CTYPE:
2759
1.25M
  case NT_CCLASS:
2760
1.25M
    if (exact == 0) {
2761
1.17M
      n = node;
2762
1.17M
    }
2763
1.25M
    break;
2764
2765
5.35k
  case NT_LIST:
2766
5.35k
    n = get_head_value_node(NCAR(node), exact, reg);
2767
5.35k
    break;
2768
2769
1.57M
  case NT_STR:
2770
1.57M
    {
2771
1.57M
      StrNode* sn = NSTR(node);
2772
2773
1.57M
      if (sn->end <= sn->s)
2774
159
  break;
2775
2776
1.57M
      if (exact == 0 ||
2777
1.57M
    NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2778
1.56M
  n = node;
2779
1.56M
      }
2780
1.57M
    }
2781
0
    break;
2782
2783
448k
  case NT_QTFR:
2784
448k
    {
2785
448k
      QtfrNode* qn = NQTFR(node);
2786
448k
      if (qn->lower > 0) {
2787
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2788
  if (IS_NOT_NULL(qn->head_exact))
2789
    n = qn->head_exact;
2790
  else
2791
#endif
2792
143k
    n = get_head_value_node(qn->target, exact, reg);
2793
143k
      }
2794
448k
    }
2795
448k
    break;
2796
2797
20.8k
  case NT_ENCLOSE:
2798
20.8k
    {
2799
20.8k
      EncloseNode* en = NENCLOSE(node);
2800
20.8k
      switch (en->type) {
2801
2.10k
      case ENCLOSE_OPTION:
2802
2.10k
  {
2803
2.10k
    OnigOptionType options = reg->options;
2804
2805
2.10k
    reg->options = NENCLOSE(node)->option;
2806
2.10k
    n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2807
2.10k
    reg->options = options;
2808
2.10k
  }
2809
2.10k
  break;
2810
2811
4.31k
      case ENCLOSE_MEMORY:
2812
17.9k
      case ENCLOSE_STOP_BACKTRACK:
2813
17.9k
      case ENCLOSE_CONDITION:
2814
17.9k
  n = get_head_value_node(en->target, exact, reg);
2815
17.9k
  break;
2816
2817
785
      case ENCLOSE_ABSENT:
2818
785
  break;
2819
20.8k
      }
2820
20.8k
    }
2821
20.8k
    break;
2822
2823
159k
  case NT_ANCHOR:
2824
159k
    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2825
687
      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2826
159k
    break;
2827
2828
0
  default:
2829
0
    break;
2830
4.20M
  }
2831
2832
4.20M
  return n;
2833
4.20M
}
2834
2835
static int
2836
check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2837
45.7k
{
2838
45.7k
  int type, r = 0;
2839
2840
45.7k
  type = NTYPE(node);
2841
45.7k
  if ((NTYPE2BIT(type) & type_mask) == 0)
2842
2
    return 1;
2843
2844
45.7k
  switch (type) {
2845
4.66k
  case NT_LIST:
2846
7.63k
  case NT_ALT:
2847
35.1k
    do {
2848
35.1k
      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2849
35.1k
        anchor_mask);
2850
35.1k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2851
7.63k
    break;
2852
2853
3.20k
  case NT_QTFR:
2854
3.20k
    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2855
3.20k
      anchor_mask);
2856
3.20k
    break;
2857
2858
4.64k
  case NT_ENCLOSE:
2859
4.64k
    {
2860
4.64k
      EncloseNode* en = NENCLOSE(node);
2861
4.64k
      if ((en->type & enclose_mask) == 0)
2862
9
  return 1;
2863
2864
4.64k
      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2865
4.64k
    }
2866
0
    break;
2867
2868
3.82k
  case NT_ANCHOR:
2869
3.82k
    type = NANCHOR(node)->type;
2870
3.82k
    if ((type & anchor_mask) == 0)
2871
1
      return 1;
2872
2873
3.82k
    if (NANCHOR(node)->target)
2874
1.31k
      r = check_type_tree(NANCHOR(node)->target,
2875
1.31k
        type_mask, enclose_mask, anchor_mask);
2876
3.82k
    break;
2877
2878
26.4k
  default:
2879
26.4k
    break;
2880
45.7k
  }
2881
45.7k
  return r;
2882
45.7k
}
2883
2884
#ifdef USE_SUBEXP_CALL
2885
2886
383k
# define RECURSION_EXIST       1
2887
739k
# define RECURSION_INFINITE    2
2888
2889
static int
2890
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2891
1.35M
{
2892
1.35M
  int type;
2893
1.35M
  int r = 0;
2894
2895
1.35M
  type = NTYPE(node);
2896
1.35M
  switch (type) {
2897
126k
  case NT_LIST:
2898
126k
    {
2899
126k
      Node *x;
2900
126k
      OnigDistance min;
2901
126k
      int ret;
2902
2903
126k
      x = node;
2904
488k
      do {
2905
488k
  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2906
488k
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2907
488k
  r |= ret;
2908
488k
  if (head) {
2909
6.88k
    ret = get_min_match_length(NCAR(x), &min, env);
2910
6.88k
    if (ret != 0) return ret;
2911
6.85k
    if (min != 0) head = 0;
2912
6.85k
  }
2913
488k
      } while (IS_NOT_NULL(x = NCDR(x)));
2914
126k
    }
2915
125k
    break;
2916
2917
125k
  case NT_ALT:
2918
58.7k
    {
2919
58.7k
      int ret;
2920
58.7k
      r = RECURSION_EXIST;
2921
251k
      do {
2922
251k
  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2923
251k
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2924
250k
  r &= ret;
2925
250k
      } while (IS_NOT_NULL(node = NCDR(node)));
2926
58.7k
    }
2927
58.4k
    break;
2928
2929
305k
  case NT_QTFR:
2930
305k
    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2931
305k
    if (r == RECURSION_EXIST) {
2932
19.2k
      if (NQTFR(node)->lower == 0) r = 0;
2933
19.2k
    }
2934
305k
    break;
2935
2936
13.1k
  case NT_ANCHOR:
2937
13.1k
    {
2938
13.1k
      AnchorNode* an = NANCHOR(node);
2939
13.1k
      switch (an->type) {
2940
3.23k
      case ANCHOR_PREC_READ:
2941
3.77k
      case ANCHOR_PREC_READ_NOT:
2942
4.08k
      case ANCHOR_LOOK_BEHIND:
2943
4.50k
      case ANCHOR_LOOK_BEHIND_NOT:
2944
4.50k
  r = subexp_inf_recursive_check(an->target, env, head);
2945
4.50k
  break;
2946
13.1k
      }
2947
13.1k
    }
2948
13.1k
    break;
2949
2950
118k
  case NT_CALL:
2951
118k
    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2952
118k
    break;
2953
2954
218k
  case NT_ENCLOSE:
2955
218k
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2956
16.1k
      return 0;
2957
202k
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2958
19.3k
      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2959
183k
    else {
2960
183k
      SET_ENCLOSE_STATUS(node, NST_MARK2);
2961
183k
      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962
183k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2963
183k
    }
2964
183k
    break;
2965
2966
511k
  default:
2967
511k
    break;
2968
1.35M
  }
2969
2970
1.31M
  return r;
2971
1.35M
}
2972
2973
static int
2974
subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2975
1.29M
{
2976
1.29M
  int type;
2977
1.29M
  int r = 0;
2978
2979
1.29M
  type = NTYPE(node);
2980
1.29M
  switch (type) {
2981
145k
  case NT_LIST:
2982
226k
  case NT_ALT:
2983
832k
    do {
2984
832k
      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2985
832k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2986
226k
    break;
2987
2988
339k
  case NT_QTFR:
2989
339k
    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2990
339k
    break;
2991
2992
7.98k
  case NT_ANCHOR:
2993
7.98k
    {
2994
7.98k
      AnchorNode* an = NANCHOR(node);
2995
7.98k
      switch (an->type) {
2996
1.22k
      case ANCHOR_PREC_READ:
2997
1.81k
      case ANCHOR_PREC_READ_NOT:
2998
1.90k
      case ANCHOR_LOOK_BEHIND:
2999
2.19k
      case ANCHOR_LOOK_BEHIND_NOT:
3000
2.19k
  r = subexp_inf_recursive_check_trav(an->target, env);
3001
2.19k
  break;
3002
7.98k
      }
3003
7.98k
    }
3004
7.98k
    break;
3005
3006
117k
  case NT_ENCLOSE:
3007
117k
    {
3008
117k
      EncloseNode* en = NENCLOSE(node);
3009
3010
117k
      if (IS_ENCLOSE_RECURSION(en)) {
3011
663
  SET_ENCLOSE_STATUS(node, NST_MARK1);
3012
663
  r = subexp_inf_recursive_check(en->target, env, 1);
3013
663
  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014
648
  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3015
648
      }
3016
117k
      r = subexp_inf_recursive_check_trav(en->target, env);
3017
117k
    }
3018
3019
0
    break;
3020
3021
602k
  default:
3022
602k
    break;
3023
1.29M
  }
3024
3025
1.29M
  return r;
3026
1.29M
}
3027
3028
static int
3029
subexp_recursive_check(Node* node)
3030
2.04M
{
3031
2.04M
  int r = 0;
3032
3033
2.04M
  switch (NTYPE(node)) {
3034
185k
  case NT_LIST:
3035
261k
  case NT_ALT:
3036
1.19M
    do {
3037
1.19M
      r |= subexp_recursive_check(NCAR(node));
3038
1.19M
    } while (IS_NOT_NULL(node = NCDR(node)));
3039
261k
    break;
3040
3041
448k
  case NT_QTFR:
3042
448k
    r = subexp_recursive_check(NQTFR(node)->target);
3043
448k
    break;
3044
3045
27.5k
  case NT_ANCHOR:
3046
27.5k
    {
3047
27.5k
      AnchorNode* an = NANCHOR(node);
3048
27.5k
      switch (an->type) {
3049
4.99k
      case ANCHOR_PREC_READ:
3050
5.53k
      case ANCHOR_PREC_READ_NOT:
3051
5.87k
      case ANCHOR_LOOK_BEHIND:
3052
6.32k
      case ANCHOR_LOOK_BEHIND_NOT:
3053
6.32k
  r = subexp_recursive_check(an->target);
3054
6.32k
  break;
3055
27.5k
      }
3056
27.5k
    }
3057
27.5k
    break;
3058
3059
151k
  case NT_CALL:
3060
151k
    r = subexp_recursive_check(NCALL(node)->target);
3061
151k
    if (r != 0) SET_CALL_RECURSION(node);
3062
151k
    break;
3063
3064
351k
  case NT_ENCLOSE:
3065
351k
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3066
94.6k
      return 0;
3067
257k
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3068
20.6k
      return 1; /* recursion */
3069
236k
    else {
3070
236k
      SET_ENCLOSE_STATUS(node, NST_MARK2);
3071
236k
      r = subexp_recursive_check(NENCLOSE(node)->target);
3072
236k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3073
236k
    }
3074
236k
    break;
3075
3076
801k
  default:
3077
801k
    break;
3078
2.04M
  }
3079
3080
1.92M
  return r;
3081
2.04M
}
3082
3083
3084
static int
3085
subexp_recursive_check_trav(Node* node, ScanEnv* env)
3086
1.29M
{
3087
1.29M
# define FOUND_CALLED_NODE    1
3088
3089
1.29M
  int type;
3090
1.29M
  int r = 0;
3091
3092
1.29M
  type = NTYPE(node);
3093
1.29M
  switch (type) {
3094
145k
  case NT_LIST:
3095
227k
  case NT_ALT:
3096
227k
    {
3097
227k
      int ret;
3098
834k
      do {
3099
834k
  ret = subexp_recursive_check_trav(NCAR(node), env);
3100
834k
  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3101
823k
  else if (ret < 0) return ret;
3102
834k
      } while (IS_NOT_NULL(node = NCDR(node)));
3103
227k
    }
3104
227k
    break;
3105
3106
339k
  case NT_QTFR:
3107
339k
    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3108
339k
    if (NQTFR(node)->upper == 0) {
3109
697
      if (r == FOUND_CALLED_NODE)
3110
173
  NQTFR(node)->is_referred = 1;
3111
697
    }
3112
339k
    break;
3113
3114
8.13k
  case NT_ANCHOR:
3115
8.13k
    {
3116
8.13k
      AnchorNode* an = NANCHOR(node);
3117
8.13k
      switch (an->type) {
3118
1.22k
      case ANCHOR_PREC_READ:
3119
1.90k
      case ANCHOR_PREC_READ_NOT:
3120
1.99k
      case ANCHOR_LOOK_BEHIND:
3121
2.29k
      case ANCHOR_LOOK_BEHIND_NOT:
3122
2.29k
  r = subexp_recursive_check_trav(an->target, env);
3123
2.29k
  break;
3124
8.13k
      }
3125
8.13k
    }
3126
8.13k
    break;
3127
3128
117k
  case NT_ENCLOSE:
3129
117k
    {
3130
117k
      EncloseNode* en = NENCLOSE(node);
3131
3132
117k
      if (! IS_ENCLOSE_RECURSION(en)) {
3133
117k
  if (IS_ENCLOSE_CALLED(en)) {
3134
5.61k
    SET_ENCLOSE_STATUS(node, NST_MARK1);
3135
5.61k
    r = subexp_recursive_check(en->target);
3136
5.61k
    if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137
5.61k
    CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3138
5.61k
  }
3139
117k
      }
3140
117k
      r = subexp_recursive_check_trav(en->target, env);
3141
117k
      if (IS_ENCLOSE_CALLED(en))
3142
5.61k
  r |= FOUND_CALLED_NODE;
3143
117k
    }
3144
117k
    break;
3145
3146
603k
  default:
3147
603k
    break;
3148
1.29M
  }
3149
3150
1.29M
  return r;
3151
1.29M
}
3152
3153
static int
3154
setup_subexp_call(Node* node, ScanEnv* env)
3155
1.31M
{
3156
1.31M
  int type;
3157
1.31M
  int r = 0;
3158
3159
1.31M
  type = NTYPE(node);
3160
1.31M
  switch (type) {
3161
148k
  case NT_LIST:
3162
486k
    do {
3163
486k
      r = setup_subexp_call(NCAR(node), env);
3164
486k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3165
148k
    break;
3166
3167
82.7k
  case NT_ALT:
3168
359k
    do {
3169
359k
      r = setup_subexp_call(NCAR(node), env);
3170
359k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3171
82.7k
    break;
3172
3173
344k
  case NT_QTFR:
3174
344k
    r = setup_subexp_call(NQTFR(node)->target, env);
3175
344k
    break;
3176
119k
  case NT_ENCLOSE:
3177
119k
    r = setup_subexp_call(NENCLOSE(node)->target, env);
3178
119k
    break;
3179
3180
8.63k
  case NT_CALL:
3181
8.63k
    {
3182
8.63k
      CallNode* cn = NCALL(node);
3183
8.63k
      Node** nodes = SCANENV_MEM_NODES(env);
3184
3185
8.63k
      if (cn->group_num != 0) {
3186
8.00k
  int gnum = cn->group_num;
3187
3188
8.00k
# ifdef USE_NAMED_GROUP
3189
8.00k
  if (env->num_named > 0 &&
3190
8.00k
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3191
8.00k
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3192
3
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3193
3
  }
3194
8.00k
# endif
3195
8.00k
  if (gnum > env->num_mem) {
3196
10
    onig_scan_env_set_error_string(env,
3197
10
     ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3198
10
    return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3199
10
  }
3200
3201
7.99k
# ifdef USE_NAMED_GROUP
3202
8.62k
      set_call_attr:
3203
8.62k
# endif
3204
8.62k
  cn->target = nodes[cn->group_num];
3205
8.62k
  if (IS_NULL(cn->target)) {
3206
0
    onig_scan_env_set_error_string(env,
3207
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3208
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3209
0
  }
3210
8.62k
  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3211
8.62k
  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3212
8.62k
  cn->unset_addr_list = env->unset_addr_list;
3213
8.62k
      }
3214
634
# ifdef USE_NAMED_GROUP
3215
634
#  ifdef USE_PERL_SUBEXP_CALL
3216
634
      else if (cn->name == cn->name_end) {
3217
554
  goto set_call_attr;
3218
554
      }
3219
80
#  endif
3220
80
      else {
3221
80
  int *refs;
3222
3223
80
  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3224
80
             &refs);
3225
80
  if (n <= 0) {
3226
5
    onig_scan_env_set_error_string(env,
3227
5
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3228
5
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3229
5
  }
3230
75
  else if (n > 1 &&
3231
75
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3232
0
    onig_scan_env_set_error_string(env,
3233
0
      ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3234
0
    return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3235
0
  }
3236
75
  else {
3237
75
    cn->group_num = refs[0];
3238
75
    goto set_call_attr;
3239
75
  }
3240
80
      }
3241
8.63k
# endif
3242
8.63k
    }
3243
8.62k
    break;
3244
3245
8.62k
  case NT_ANCHOR:
3246
8.35k
    {
3247
8.35k
      AnchorNode* an = NANCHOR(node);
3248
3249
8.35k
      switch (an->type) {
3250
1.22k
      case ANCHOR_PREC_READ:
3251
2.04k
      case ANCHOR_PREC_READ_NOT:
3252
2.13k
      case ANCHOR_LOOK_BEHIND:
3253
2.42k
      case ANCHOR_LOOK_BEHIND_NOT:
3254
2.42k
  r = setup_subexp_call(an->target, env);
3255
2.42k
  break;
3256
8.35k
      }
3257
8.35k
    }
3258
8.35k
    break;
3259
3260
603k
  default:
3261
603k
    break;
3262
1.31M
  }
3263
3264
1.31M
  return r;
3265
1.31M
}
3266
#endif
3267
3268
2.33M
#define IN_ALT          (1<<0)
3269
357k
#define IN_NOT          (1<<1)
3270
5.40M
#define IN_REPEAT       (1<<2)
3271
2.92M
#define IN_VAR_REPEAT   (1<<3)
3272
360k
#define IN_CALL         (1<<4)
3273
355k
#define IN_RECCALL      (1<<5)
3274
44.4k
#define IN_LOOK_BEHIND  (1<<6)
3275
3276
/* divide different length alternatives in look-behind.
3277
  (?<=A|B) ==> (?<=A)|(?<=B)
3278
  (?<!A|B) ==> (?<!A)(?<!B)
3279
*/
3280
static int
3281
divide_look_behind_alternatives(Node* node)
3282
397
{
3283
397
  Node *head, *np, *insert_node;
3284
397
  AnchorNode* an = NANCHOR(node);
3285
397
  int anc_type = an->type;
3286
3287
397
  head = an->target;
3288
397
  np = NCAR(head);
3289
397
  swap_node(node, head);
3290
397
  NCAR(node) = head;
3291
397
  NANCHOR(head)->target = np;
3292
3293
397
  np = node;
3294
5.79k
  while ((np = NCDR(np)) != NULL_NODE) {
3295
5.39k
    insert_node = onig_node_new_anchor(anc_type);
3296
5.39k
    CHECK_NULL_RETURN_MEMERR(insert_node);
3297
5.39k
    NANCHOR(insert_node)->target = NCAR(np);
3298
5.39k
    NCAR(np) = insert_node;
3299
5.39k
  }
3300
3301
397
  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3302
248
    np = node;
3303
3.92k
    do {
3304
3.92k
      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3305
3.92k
    } while ((np = NCDR(np)) != NULL_NODE);
3306
248
  }
3307
397
  return 0;
3308
397
}
3309
3310
static int
3311
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3312
1.32k
{
3313
1.32k
  int r, len;
3314
1.32k
  AnchorNode* an = NANCHOR(node);
3315
3316
1.32k
  r = get_char_length_tree(an->target, reg, &len);
3317
1.32k
  if (r == 0)
3318
885
    an->char_len = len;
3319
442
  else if (r == GET_CHAR_LEN_VARLEN)
3320
45
    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3321
397
  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3322
397
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3323
397
      r = divide_look_behind_alternatives(node);
3324
0
    else
3325
0
      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3326
397
  }
3327
3328
1.32k
  return r;
3329
1.32k
}
3330
3331
static int
3332
next_setup(Node* node, Node* next_node, regex_t* reg)
3333
4.44M
{
3334
4.44M
  int type;
3335
3336
4.76M
 retry:
3337
4.76M
  type = NTYPE(node);
3338
4.76M
  if (type == NT_QTFR) {
3339
1.58M
    QtfrNode* qn = NQTFR(node);
3340
1.58M
    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3341
1.46M
#ifdef USE_QTFR_PEEK_NEXT
3342
1.46M
      Node* n = get_head_value_node(next_node, 1, reg);
3343
      /* '\0': for UTF-16BE etc... */
3344
1.46M
      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3345
803k
  qn->next_head_exact = n;
3346
803k
      }
3347
1.46M
#endif
3348
      /* automatic possessification a*b ==> (?>a*)b */
3349
1.46M
      if (qn->lower <= 1) {
3350
1.46M
  int ttype = NTYPE(qn->target);
3351
1.46M
  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3352
1.36M
    Node *x, *y;
3353
1.36M
    x = get_head_value_node(qn->target, 0, reg);
3354
1.36M
    if (IS_NOT_NULL(x)) {
3355
1.20M
      y = get_head_value_node(next_node,  0, reg);
3356
1.20M
      if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3357
702k
        Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3358
702k
        CHECK_NULL_RETURN_MEMERR(en);
3359
702k
        SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3360
702k
        swap_node(node, en);
3361
702k
        NENCLOSE(node)->target = en;
3362
702k
      }
3363
1.20M
    }
3364
1.36M
  }
3365
1.46M
      }
3366
1.46M
    }
3367
1.58M
  }
3368
3.18M
  else if (type == NT_ENCLOSE) {
3369
484k
    EncloseNode* en = NENCLOSE(node);
3370
484k
    if (en->type == ENCLOSE_MEMORY) {
3371
324k
      node = en->target;
3372
324k
      goto retry;
3373
324k
    }
3374
484k
  }
3375
4.44M
  return 0;
3376
4.76M
}
3377
3378
3379
static int
3380
update_string_node_case_fold(regex_t* reg, Node *node)
3381
52.2k
{
3382
52.2k
  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3383
52.2k
  UChar *sbuf, *ebuf, *sp;
3384
52.2k
  int r, i, len;
3385
52.2k
  OnigDistance sbuf_size;
3386
52.2k
  StrNode* sn = NSTR(node);
3387
3388
52.2k
  end = sn->end;
3389
52.2k
  sbuf_size = (end - sn->s) * 2;
3390
52.2k
  sbuf = (UChar* )xmalloc(sbuf_size);
3391
52.2k
  CHECK_NULL_RETURN_MEMERR(sbuf);
3392
52.2k
  ebuf = sbuf + sbuf_size;
3393
3394
52.2k
  sp = sbuf;
3395
52.2k
  p = sn->s;
3396
352k
  while (p < end) {
3397
300k
    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3398
640k
    for (i = 0; i < len; i++) {
3399
340k
      if (sp >= ebuf) {
3400
231
  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3401
231
  if (IS_NULL(p)) {
3402
0
    xfree(sbuf);
3403
0
    return ONIGERR_MEMORY;
3404
0
  }
3405
231
  sbuf = p;
3406
231
  sp = sbuf + sbuf_size;
3407
231
  sbuf_size *= 2;
3408
231
  ebuf = sbuf + sbuf_size;
3409
231
      }
3410
3411
340k
      *sp++ = buf[i];
3412
340k
    }
3413
300k
  }
3414
3415
52.2k
  r = onig_node_str_set(node, sbuf, sp);
3416
3417
52.2k
  xfree(sbuf);
3418
52.2k
  return r;
3419
52.2k
}
3420
3421
static int
3422
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3423
         regex_t* reg)
3424
8.99k
{
3425
8.99k
  int r;
3426
8.99k
  Node *node;
3427
3428
8.99k
  node = onig_node_new_str(s, end);
3429
8.99k
  if (IS_NULL(node)) return ONIGERR_MEMORY;
3430
3431
8.99k
  r = update_string_node_case_fold(reg, node);
3432
8.99k
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return r;
3435
0
  }
3436
3437
8.99k
  NSTRING_SET_AMBIG(node);
3438
8.99k
  NSTRING_SET_DONT_GET_OPT_INFO(node);
3439
8.99k
  *rnode = node;
3440
8.99k
  return 0;
3441
8.99k
}
3442
3443
static int
3444
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3445
        int slen)
3446
210k
{
3447
210k
  int i;
3448
3449
318k
  for (i = 0; i < item_num; i++) {
3450
120k
    if (items[i].byte_len != slen) {
3451
7.14k
      return 1;
3452
7.14k
    }
3453
113k
    if (items[i].code_len != 1) {
3454
5.83k
      return 1;
3455
5.83k
    }
3456
113k
  }
3457
197k
  return 0;
3458
210k
}
3459
3460
static int
3461
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3462
          UChar *p, int slen, UChar *end,
3463
          regex_t* reg, Node **rnode)
3464
7.84k
{
3465
7.84k
  int r, i, j, len, varlen;
3466
7.84k
  Node *anode, *var_anode, *snode, *xnode, *an;
3467
7.84k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3468
3469
7.84k
  *rnode = var_anode = NULL_NODE;
3470
3471
7.84k
  varlen = 0;
3472
26.3k
  for (i = 0; i < item_num; i++) {
3473
23.9k
    if (items[i].byte_len != slen) {
3474
5.49k
      varlen = 1;
3475
5.49k
      break;
3476
5.49k
    }
3477
23.9k
  }
3478
3479
7.84k
  if (varlen != 0) {
3480
5.49k
    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3481
5.49k
    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3482
3483
5.49k
    xnode = onig_node_new_list(NULL, NULL);
3484
5.49k
    if (IS_NULL(xnode)) goto mem_err;
3485
5.49k
    NCAR(var_anode) = xnode;
3486
3487
5.49k
    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3488
5.49k
    if (IS_NULL(anode)) goto mem_err;
3489
5.49k
    NCAR(xnode) = anode;
3490
5.49k
  }
3491
2.35k
  else {
3492
2.35k
    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3493
2.35k
    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3494
2.35k
  }
3495
3496
7.84k
  snode = onig_node_new_str(p, p + slen);
3497
7.84k
  if (IS_NULL(snode)) goto mem_err;
3498
3499
7.84k
  NCAR(anode) = snode;
3500
3501
33.9k
  for (i = 0; i < item_num; i++) {
3502
26.1k
    snode = onig_node_new_str(NULL, NULL);
3503
26.1k
    if (IS_NULL(snode)) goto mem_err;
3504
3505
70.1k
    for (j = 0; j < items[i].code_len; j++) {
3506
43.9k
      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3507
43.9k
      if (len < 0) {
3508
0
  r = len;
3509
0
  goto mem_err2;
3510
0
      }
3511
3512
43.9k
      r = onig_node_str_cat(snode, buf, buf + len);
3513
43.9k
      if (r != 0) goto mem_err2;
3514
43.9k
    }
3515
3516
26.1k
    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517
26.1k
    if (IS_NULL(an)) {
3518
0
      goto mem_err2;
3519
0
    }
3520
3521
26.1k
    if (items[i].byte_len != slen) {
3522
7.63k
      Node *rem;
3523
7.63k
      UChar *q = p + items[i].byte_len;
3524
3525
7.63k
      if (q < end) {
3526
5.34k
  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3527
5.34k
  if (r != 0) {
3528
0
    onig_node_free(an);
3529
0
    goto mem_err2;
3530
0
  }
3531
3532
5.34k
  xnode = onig_node_list_add(NULL_NODE, snode);
3533
5.34k
  if (IS_NULL(xnode)) {
3534
0
    onig_node_free(an);
3535
0
    onig_node_free(rem);
3536
0
    goto mem_err2;
3537
0
  }
3538
5.34k
  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3539
0
    onig_node_free(an);
3540
0
    onig_node_free(xnode);
3541
0
    onig_node_free(rem);
3542
0
    goto mem_err;
3543
0
  }
3544
3545
5.34k
  NCAR(an) = xnode;
3546
5.34k
      }
3547
2.29k
      else {
3548
2.29k
  NCAR(an) = snode;
3549
2.29k
      }
3550
3551
7.63k
      NCDR(var_anode) = an;
3552
7.63k
      var_anode = an;
3553
7.63k
    }
3554
18.4k
    else {
3555
18.4k
      NCAR(an)     = snode;
3556
18.4k
      NCDR(anode) = an;
3557
18.4k
      anode = an;
3558
18.4k
    }
3559
26.1k
  }
3560
3561
7.84k
  return varlen;
3562
3563
0
 mem_err2:
3564
0
  onig_node_free(snode);
3565
3566
0
 mem_err:
3567
0
  onig_node_free(*rnode);
3568
3569
0
  return ONIGERR_MEMORY;
3570
0
}
3571
3572
11.4k
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3573
3574
static int
3575
expand_case_fold_string(Node* node, regex_t* reg, int state)
3576
59.1k
{
3577
59.1k
  int r, n, len, alt_num;
3578
59.1k
  int varlen = 0;
3579
59.1k
  int is_in_look_behind;
3580
59.1k
  UChar *start, *end, *p;
3581
59.1k
  Node *top_root, *root, *snode, *prev_node;
3582
59.1k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3583
59.1k
  StrNode* sn;
3584
3585
59.1k
  if (NSTRING_IS_AMBIG(node)) return 0;
3586
3587
44.5k
  sn = NSTR(node);
3588
3589
44.5k
  start = sn->s;
3590
44.5k
  end   = sn->end;
3591
44.5k
  if (start >= end) return 0;
3592
3593
42.9k
  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3594
3595
42.9k
  r = 0;
3596
42.9k
  top_root = root = prev_node = snode = NULL_NODE;
3597
42.9k
  alt_num = 1;
3598
42.9k
  p = start;
3599
249k
  while (p < end) {
3600
210k
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3601
210k
             p, end, items);
3602
210k
    if (n < 0) {
3603
0
      r = n;
3604
0
      goto err;
3605
0
    }
3606
3607
210k
    len = enclen(reg->enc, p, end);
3608
3609
210k
    varlen = is_case_fold_variable_len(n, items, len);
3610
210k
    if (n == 0 || varlen == 0 || is_in_look_behind) {
3611
199k
      if (IS_NULL(snode)) {
3612
43.2k
  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3613
659
          onig_node_free(top_root);
3614
659
    top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3615
659
    if (IS_NULL(root)) {
3616
0
      onig_node_free(prev_node);
3617
0
      goto mem_err;
3618
0
    }
3619
659
  }
3620
3621
43.2k
  prev_node = snode = onig_node_new_str(NULL, NULL);
3622
43.2k
  if (IS_NULL(snode)) goto mem_err;
3623
43.2k
  if (IS_NOT_NULL(root)) {
3624
5.55k
    if (IS_NULL(onig_node_list_add(root, snode))) {
3625
0
      onig_node_free(snode);
3626
0
      goto mem_err;
3627
0
    }
3628
5.55k
  }
3629
43.2k
      }
3630
3631
199k
      r = onig_node_str_cat(snode, p, p + len);
3632
199k
      if (r != 0) goto err;
3633
199k
    }
3634
11.4k
    else {
3635
11.4k
      alt_num *= (n + 1);
3636
11.4k
      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3637
3638
7.84k
      if (IS_NOT_NULL(snode)) {
3639
3.23k
  r = update_string_node_case_fold(reg, snode);
3640
3.23k
  if (r == 0) {
3641
3.23k
    NSTRING_SET_AMBIG(snode);
3642
3.23k
  }
3643
3.23k
      }
3644
7.84k
      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3645
3.04k
        onig_node_free(top_root);
3646
3.04k
  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3647
3.04k
  if (IS_NULL(root)) {
3648
0
    onig_node_free(prev_node);
3649
0
    goto mem_err;
3650
0
  }
3651
3.04k
      }
3652
3653
7.84k
      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3654
7.84k
      if (r < 0) goto mem_err;
3655
7.84k
      if (r == 1) {
3656
5.49k
  if (IS_NULL(root)) {
3657
2.98k
    top_root = prev_node;
3658
2.98k
  }
3659
2.51k
  else {
3660
2.51k
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3661
0
      onig_node_free(prev_node);
3662
0
      goto mem_err;
3663
0
    }
3664
2.51k
  }
3665
3666
5.49k
  root = NCAR(prev_node);
3667
5.49k
      }
3668
2.35k
      else { /* r == 0 */
3669
2.35k
  if (IS_NOT_NULL(root)) {
3670
724
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671
0
      onig_node_free(prev_node);
3672
0
      goto mem_err;
3673
0
    }
3674
724
  }
3675
2.35k
      }
3676
3677
7.84k
      snode = NULL_NODE;
3678
7.84k
    }
3679
3680
206k
    p += len;
3681
206k
  }
3682
42.9k
  if (IS_NOT_NULL(snode)) {
3683
40.0k
    r = update_string_node_case_fold(reg, snode);
3684
40.0k
    if (r == 0) {
3685
40.0k
      NSTRING_SET_AMBIG(snode);
3686
40.0k
    }
3687
40.0k
  }
3688
3689
42.9k
  if (p < end) {
3690
3.64k
    Node *srem;
3691
3692
3.64k
    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3693
3.64k
    if (r != 0) goto mem_err;
3694
3695
3.64k
    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3696
968
      onig_node_free(top_root);
3697
968
      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3698
968
      if (IS_NULL(root)) {
3699
0
  onig_node_free(srem);
3700
0
  onig_node_free(prev_node);
3701
0
  goto mem_err;
3702
0
      }
3703
968
    }
3704
3705
3.64k
    if (IS_NULL(root)) {
3706
627
      prev_node = srem;
3707
627
    }
3708
3.02k
    else {
3709
3.02k
      if (IS_NULL(onig_node_list_add(root, srem))) {
3710
0
  onig_node_free(srem);
3711
0
  goto mem_err;
3712
0
      }
3713
3.02k
    }
3714
3.64k
  }
3715
3716
  /* ending */
3717
42.9k
  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3718
42.9k
  swap_node(node, top_root);
3719
42.9k
  onig_node_free(top_root);
3720
42.9k
  return 0;
3721
3722
0
 mem_err:
3723
0
  r = ONIGERR_MEMORY;
3724
3725
0
 err:
3726
0
  onig_node_free(top_root);
3727
0
  return r;
3728
0
}
3729
3730
3731
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3732
3733
# define CEC_THRES_NUM_BIG_REPEAT         512
3734
# define CEC_INFINITE_NUM          0x7fffffff
3735
3736
# define CEC_IN_INFINITE_REPEAT    (1<<0)
3737
# define CEC_IN_FINITE_REPEAT      (1<<1)
3738
# define CEC_CONT_BIG_REPEAT       (1<<2)
3739
3740
static int
3741
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3742
{
3743
  int type;
3744
  int r = state;
3745
3746
  type = NTYPE(node);
3747
  switch (type) {
3748
  case NT_LIST:
3749
    {
3750
      do {
3751
  r = setup_comb_exp_check(NCAR(node), r, env);
3752
      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3753
    }
3754
    break;
3755
3756
  case NT_ALT:
3757
    {
3758
      int ret;
3759
      do {
3760
  ret = setup_comb_exp_check(NCAR(node), state, env);
3761
  r |= ret;
3762
      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3763
    }
3764
    break;
3765
3766
  case NT_QTFR:
3767
    {
3768
      int child_state = state;
3769
      int add_state = 0;
3770
      QtfrNode* qn = NQTFR(node);
3771
      Node* target = qn->target;
3772
      int var_num;
3773
3774
      if (! IS_REPEAT_INFINITE(qn->upper)) {
3775
  if (qn->upper > 1) {
3776
    /* {0,1}, {1,1} are allowed */
3777
    child_state |= CEC_IN_FINITE_REPEAT;
3778
3779
    /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3780
    if (env->backrefed_mem == 0) {
3781
      if (NTYPE(qn->target) == NT_ENCLOSE) {
3782
        EncloseNode* en = NENCLOSE(qn->target);
3783
        if (en->type == ENCLOSE_MEMORY) {
3784
    if (NTYPE(en->target) == NT_QTFR) {
3785
      QtfrNode* q = NQTFR(en->target);
3786
      if (IS_REPEAT_INFINITE(q->upper)
3787
          && q->greedy == qn->greedy) {
3788
        qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3789
        if (qn->upper == 1)
3790
          child_state = state;
3791
      }
3792
    }
3793
        }
3794
      }
3795
    }
3796
  }
3797
      }
3798
3799
      if (state & CEC_IN_FINITE_REPEAT) {
3800
  qn->comb_exp_check_num = -1;
3801
      }
3802
      else {
3803
  if (IS_REPEAT_INFINITE(qn->upper)) {
3804
    var_num = CEC_INFINITE_NUM;
3805
    child_state |= CEC_IN_INFINITE_REPEAT;
3806
  }
3807
  else {
3808
    var_num = qn->upper - qn->lower;
3809
  }
3810
3811
  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3812
    add_state |= CEC_CONT_BIG_REPEAT;
3813
3814
  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3815
      ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3816
       var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3817
    if (qn->comb_exp_check_num == 0) {
3818
      env->num_comb_exp_check++;
3819
      qn->comb_exp_check_num = env->num_comb_exp_check;
3820
      if (env->curr_max_regnum > env->comb_exp_max_regnum)
3821
        env->comb_exp_max_regnum = env->curr_max_regnum;
3822
    }
3823
  }
3824
      }
3825
3826
      r = setup_comb_exp_check(target, child_state, env);
3827
      r |= add_state;
3828
    }
3829
    break;
3830
3831
  case NT_ENCLOSE:
3832
    {
3833
      EncloseNode* en = NENCLOSE(node);
3834
3835
      switch (en->type) {
3836
      case ENCLOSE_MEMORY:
3837
  {
3838
    if (env->curr_max_regnum < en->regnum)
3839
      env->curr_max_regnum = en->regnum;
3840
3841
    r = setup_comb_exp_check(en->target, state, env);
3842
  }
3843
  break;
3844
3845
      default:
3846
  r = setup_comb_exp_check(en->target, state, env);
3847
  break;
3848
      }
3849
    }
3850
    break;
3851
3852
# ifdef USE_SUBEXP_CALL
3853
  case NT_CALL:
3854
    if (IS_CALL_RECURSION(NCALL(node)))
3855
      env->has_recursion = 1;
3856
    else
3857
      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3858
    break;
3859
# endif
3860
3861
  default:
3862
    break;
3863
  }
3864
3865
  return r;
3866
}
3867
#endif
3868
3869
/* setup_tree does the following work.
3870
 1. check empty loop. (set qn->target_empty_info)
3871
 2. expand ignore-case in char class.
3872
 3. set memory status bit flags. (reg->mem_stats)
3873
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3874
 5. find invalid patterns in look-behind.
3875
 6. expand repeated string.
3876
 */
3877
static int
3878
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3879
12.4M
{
3880
12.4M
  int type;
3881
12.4M
  int r = 0;
3882
3883
12.4M
restart:
3884
12.4M
  type = NTYPE(node);
3885
12.4M
  switch (type) {
3886
1.53M
  case NT_LIST:
3887
1.53M
    {
3888
1.53M
      Node* prev = NULL_NODE;
3889
5.98M
      do {
3890
5.98M
  r = setup_tree(NCAR(node), reg, state, env);
3891
5.98M
  if (IS_NOT_NULL(prev) && r == 0) {
3892
4.44M
    r = next_setup(prev, NCAR(node), reg);
3893
4.44M
  }
3894
5.98M
  prev = NCAR(node);
3895
5.98M
      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3896
1.53M
    }
3897
1.53M
    break;
3898
3899
554k
  case NT_ALT:
3900
1.98M
    do {
3901
1.98M
      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3902
1.98M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3903
554k
    break;
3904
3905
2.72M
  case NT_CCLASS:
3906
2.72M
    break;
3907
3908
2.28M
  case NT_STR:
3909
2.28M
    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3910
59.1k
      r = expand_case_fold_string(node, reg, state);
3911
59.1k
    }
3912
2.28M
    break;
3913
3914
6.79k
  case NT_CTYPE:
3915
601k
  case NT_CANY:
3916
601k
    break;
3917
3918
0
#ifdef USE_SUBEXP_CALL
3919
7.98k
  case NT_CALL:
3920
7.98k
    break;
3921
0
#endif
3922
3923
8.27k
  case NT_BREF:
3924
8.27k
    {
3925
8.27k
      int i;
3926
8.27k
      int* p;
3927
8.27k
      Node** nodes = SCANENV_MEM_NODES(env);
3928
8.27k
      BRefNode* br = NBREF(node);
3929
8.27k
      p = BACKREFS_P(br);
3930
26.4k
      for (i = 0; i < br->back_num; i++) {
3931
18.2k
  if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3932
18.2k
  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3933
18.2k
  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3934
18.2k
#ifdef USE_BACKREF_WITH_LEVEL
3935
18.2k
  if (IS_BACKREF_NEST_LEVEL(br)) {
3936
1.85k
    BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3937
1.85k
  }
3938
18.2k
#endif
3939
18.2k
  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3940
18.2k
      }
3941
8.27k
    }
3942
8.23k
    break;
3943
3944
2.70M
  case NT_QTFR:
3945
2.70M
    {
3946
2.70M
      OnigDistance d;
3947
2.70M
      QtfrNode* qn = NQTFR(node);
3948
2.70M
      Node* target = qn->target;
3949
3950
2.70M
      if ((state & IN_REPEAT) != 0) {
3951
487k
  qn->state |= NST_IN_REPEAT;
3952
487k
      }
3953
3954
2.70M
      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3955
2.69M
  r = get_min_match_length(target, &d, env);
3956
2.69M
  if (r) break;
3957
2.69M
  if (d == 0) {
3958
34.4k
    qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3959
34.4k
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3960
34.4k
    r = quantifiers_memory_node_info(target);
3961
34.4k
    if (r < 0) break;
3962
34.4k
    if (r > 0) {
3963
3.48k
      qn->target_empty_info = r;
3964
3.48k
    }
3965
34.4k
#endif
3966
#if 0
3967
    r = get_max_match_length(target, &d, env);
3968
    if (r == 0 && d == 0) {
3969
      /*  ()* ==> ()?, ()+ ==> ()  */
3970
      qn->upper = 1;
3971
      if (qn->lower > 1) qn->lower = 1;
3972
      if (NTYPE(target) == NT_STR) {
3973
        qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3974
      }
3975
    }
3976
#endif
3977
34.4k
  }
3978
2.69M
      }
3979
3980
2.70M
      state |= IN_REPEAT;
3981
2.70M
      if (qn->lower != qn->upper)
3982
2.56M
  state |= IN_VAR_REPEAT;
3983
2.70M
      r = setup_tree(target, reg, state, env);
3984
2.70M
      if (r) break;
3985
3986
      /* expand string */
3987
2.70M
#define EXPAND_STRING_MAX_LENGTH  100
3988
2.70M
      if (NTYPE(target) == NT_STR) {
3989
144k
  if (qn->lower > 1) {
3990
8.38k
    int i, n = qn->lower;
3991
8.38k
    OnigDistance len = NSTRING_LEN(target);
3992
8.38k
    StrNode* sn = NSTR(target);
3993
8.38k
    Node* np;
3994
3995
8.38k
    np = onig_node_new_str(sn->s, sn->end);
3996
8.38k
    if (IS_NULL(np)) return ONIGERR_MEMORY;
3997
8.38k
    NSTR(np)->flag = sn->flag;
3998
3999
464k
    for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4000
455k
      r = onig_node_str_cat(np, sn->s, sn->end);
4001
455k
      if (r) {
4002
0
        onig_node_free(np);
4003
0
        return r;
4004
0
      }
4005
455k
    }
4006
8.38k
    if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4007
5.73k
      Node *np1, *np2;
4008
4009
5.73k
      qn->lower -= i;
4010
5.73k
      if (! IS_REPEAT_INFINITE(qn->upper))
4011
5.53k
        qn->upper -= i;
4012
4013
5.73k
      np1 = onig_node_new_list(np, NULL);
4014
5.73k
      if (IS_NULL(np1)) {
4015
0
        onig_node_free(np);
4016
0
        return ONIGERR_MEMORY;
4017
0
      }
4018
5.73k
      swap_node(np1, node);
4019
5.73k
      np2 = onig_node_list_add(node, np1);
4020
5.73k
      if (IS_NULL(np2)) {
4021
0
        onig_node_free(np1);
4022
0
        return ONIGERR_MEMORY;
4023
0
      }
4024
5.73k
    }
4025
2.64k
    else {
4026
2.64k
      swap_node(np, node);
4027
2.64k
      onig_node_free(np);
4028
2.64k
    }
4029
8.38k
    break; /* break case NT_QTFR: */
4030
8.38k
  }
4031
144k
      }
4032
4033
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4034
      if (qn->greedy && (qn->target_empty_info != 0)) {
4035
  if (NTYPE(target) == NT_QTFR) {
4036
    QtfrNode* tqn = NQTFR(target);
4037
    if (IS_NOT_NULL(tqn->head_exact)) {
4038
      qn->head_exact  = tqn->head_exact;
4039
      tqn->head_exact = NULL;
4040
    }
4041
  }
4042
  else {
4043
    qn->head_exact = get_head_value_node(qn->target, 1, reg);
4044
  }
4045
      }
4046
#endif
4047
2.70M
    }
4048
2.69M
    break;
4049
4050
2.69M
  case NT_ENCLOSE:
4051
798k
    {
4052
798k
      EncloseNode* en = NENCLOSE(node);
4053
4054
798k
      switch (en->type) {
4055
254k
      case ENCLOSE_OPTION:
4056
254k
  {
4057
254k
    OnigOptionType options = reg->options;
4058
254k
    reg->options = NENCLOSE(node)->option;
4059
254k
    r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4060
254k
    reg->options = options;
4061
254k
  }
4062
254k
  break;
4063
4064
355k
      case ENCLOSE_MEMORY:
4065
355k
  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4066
25.7k
    BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4067
    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4068
25.7k
  }
4069
355k
  if (IS_ENCLOSE_CALLED(en))
4070
5.35k
    state |= IN_CALL;
4071
355k
  if (IS_ENCLOSE_RECURSION(en))
4072
611
    state |= IN_RECCALL;
4073
355k
  else if ((state & IN_RECCALL) != 0)
4074
2.93k
    SET_CALL_RECURSION(node);
4075
355k
  r = setup_tree(en->target, reg, state, env);
4076
355k
  break;
4077
4078
185k
      case ENCLOSE_STOP_BACKTRACK:
4079
185k
  {
4080
185k
    Node* target = en->target;
4081
185k
    r = setup_tree(target, reg, state, env);
4082
185k
    if (NTYPE(target) == NT_QTFR) {
4083
76.1k
      QtfrNode* tqn = NQTFR(target);
4084
76.1k
      if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4085
76.1k
    tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4086
72.8k
        int qtype = NTYPE(tqn->target);
4087
72.8k
        if (IS_NODE_TYPE_SIMPLE(qtype))
4088
23.0k
    SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4089
72.8k
      }
4090
76.1k
    }
4091
185k
  }
4092
185k
  break;
4093
4094
1.03k
      case ENCLOSE_CONDITION:
4095
1.03k
#ifdef USE_NAMED_GROUP
4096
1.03k
  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4097
1.03k
      env->num_named > 0 &&
4098
1.03k
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4099
1.03k
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4100
2
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4101
2
  }
4102
1.03k
#endif
4103
1.03k
  if (NENCLOSE(node)->regnum > env->num_mem)
4104
5
    return ONIGERR_INVALID_BACKREF;
4105
1.02k
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4106
1.02k
  break;
4107
4108
1.00k
      case ENCLOSE_ABSENT:
4109
1.00k
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4110
1.00k
  break;
4111
798k
      }
4112
798k
    }
4113
798k
    break;
4114
4115
1.20M
  case NT_ANCHOR:
4116
1.20M
    {
4117
1.20M
      AnchorNode* an = NANCHOR(node);
4118
4119
1.20M
      switch (an->type) {
4120
2.29k
      case ANCHOR_PREC_READ:
4121
2.29k
  r = setup_tree(an->target, reg, state, env);
4122
2.29k
  break;
4123
640
      case ANCHOR_PREC_READ_NOT:
4124
640
  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4125
640
  break;
4126
4127
/* allowed node types in look-behind */
4128
0
#define ALLOWED_TYPE_IN_LB  \
4129
1.45k
  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4130
1.45k
    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4131
4132
652
#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4133
804
#define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
4134
4135
652
#define ALLOWED_ANCHOR_IN_LB \
4136
652
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4137
652
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4138
652
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4139
652
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4140
804
#define ALLOWED_ANCHOR_IN_LB_NOT \
4141
804
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4142
804
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4143
804
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4144
804
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4145
4146
652
      case ANCHOR_LOOK_BEHIND:
4147
652
  {
4148
652
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4149
652
            ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4150
652
    if (r < 0) return r;
4151
652
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4152
650
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4153
650
    r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4154
650
    if (r != 0) return r;
4155
599
    r = setup_look_behind(node, reg, env);
4156
599
  }
4157
0
  break;
4158
4159
804
      case ANCHOR_LOOK_BEHIND_NOT:
4160
804
  {
4161
804
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4162
804
          ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4163
804
    if (r < 0) return r;
4164
804
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4165
794
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4166
794
    r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4167
794
       env);
4168
794
    if (r != 0) return r;
4169
728
    r = setup_look_behind(node, reg, env);
4170
728
  }
4171
0
  break;
4172
1.20M
      }
4173
1.20M
    }
4174
1.20M
    break;
4175
4176
1.20M
  default:
4177
0
    break;
4178
12.4M
  }
4179
4180
12.4M
  return r;
4181
12.4M
}
4182
4183
/* set skip map for Sunday's quick search */
4184
static int
4185
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4186
      UChar skip[], int ignore_case)
4187
319k
{
4188
319k
  OnigDistance i, len;
4189
319k
  int clen, flen, n, j, k;
4190
319k
  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4191
319k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4192
319k
  OnigEncoding enc = reg->enc;
4193
4194
319k
  len = end - s;
4195
319k
  if (len >= ONIG_CHAR_TABLE_SIZE) {
4196
    /* This should not happen. */
4197
0
    return ONIGERR_TYPE_BUG;
4198
0
  }
4199
4200
319k
  if (ignore_case) {
4201
2.61k
    for (i = 0; i < len; i += clen) {
4202
2.37k
      p = s + i;
4203
2.37k
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4204
2.37k
    p, end, items);
4205
2.37k
      clen = enclen(enc, p, end);
4206
2.37k
      if (p + clen > end)
4207
0
  clen = (int )(end - p);
4208
4209
3.65k
      for (j = 0; j < n; j++) {
4210
1.44k
  if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4211
    /* Different length isn't supported. Stop optimization at here. */
4212
108
    end = p;
4213
108
    goto endcheck;
4214
108
  }
4215
1.33k
  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4216
1.33k
  if (flen != clen) {
4217
    /* Different length isn't supported. Stop optimization at here. */
4218
59
    end = p;
4219
59
    goto endcheck;
4220
59
  }
4221
1.33k
      }
4222
2.37k
    }
4223
411
endcheck:
4224
411
    len = end - s;
4225
411
  }
4226
4227
82.0M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4228
81.7M
    skip[i] = (UChar )(len + 1);
4229
319k
  n = 0;
4230
4.70M
  for (i = 0; i < len; i += clen) {
4231
4.38M
    p = s + i;
4232
4.38M
    if (ignore_case)
4233
2.20k
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4234
4.38M
               p, end, items);
4235
4.38M
    clen = enclen(enc, p, end);
4236
4.38M
    if (p + clen > end)
4237
0
      clen = (int )(end - p);
4238
4239
8.77M
    for (j = 0; j < clen; j++) {
4240
4.38M
      skip[s[i + j]] = (UChar )(len - i - j);
4241
4.39M
      for (k = 0; k < n; k++) {
4242
1.37k
  ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4243
1.37k
  skip[buf[j]] = (UChar )(len - i - j);
4244
1.37k
      }
4245
4.38M
    }
4246
4.38M
  }
4247
4248
319k
  return (int )len;
4249
319k
}
4250
4251
typedef struct {
4252
  OnigDistance min;  /* min byte length */
4253
  OnigDistance max;  /* max byte length */
4254
} MinMaxLen;
4255
4256
typedef struct {
4257
  MinMaxLen        mmd;
4258
  OnigEncoding     enc;
4259
  OnigOptionType   options;
4260
  OnigCaseFoldType case_fold_flag;
4261
  ScanEnv*         scan_env;
4262
} OptEnv;
4263
4264
typedef struct {
4265
  int left_anchor;
4266
  int right_anchor;
4267
} OptAncInfo;
4268
4269
typedef struct {
4270
  MinMaxLen  mmd; /* info position */
4271
  OptAncInfo anc;
4272
4273
  int   reach_end;
4274
  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
4275
  int   len;
4276
  UChar s[OPT_EXACT_MAXLEN];
4277
} OptExactInfo;
4278
4279
typedef struct {
4280
  MinMaxLen mmd; /* info position */
4281
  OptAncInfo anc;
4282
4283
  int   value;      /* weighted value */
4284
  UChar map[ONIG_CHAR_TABLE_SIZE];
4285
} OptMapInfo;
4286
4287
typedef struct {
4288
  MinMaxLen    len;
4289
4290
  OptAncInfo   anc;
4291
  OptExactInfo exb;    /* boundary */
4292
  OptExactInfo exm;    /* middle */
4293
  OptExactInfo expr;   /* prec read (?=...) */
4294
4295
  OptMapInfo   map;   /* boundary */
4296
} NodeOptInfo;
4297
4298
4299
static int
4300
map_position_value(OnigEncoding enc, int i)
4301
7.16M
{
4302
7.16M
  static const short int ByteValTable[] = {
4303
7.16M
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4304
7.16M
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4305
7.16M
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4306
7.16M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4307
7.16M
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4308
7.16M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4309
7.16M
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4310
7.16M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4311
7.16M
  };
4312
4313
7.16M
  if (i < numberof(ByteValTable)) {
4314
6.76M
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4315
0
      return 20;
4316
6.76M
    else
4317
6.76M
      return (int )ByteValTable[i];
4318
6.76M
  }
4319
394k
  else
4320
394k
    return 4;   /* Take it easy. */
4321
7.16M
}
4322
4323
static int
4324
distance_value(MinMaxLen* mm)
4325
5.34M
{
4326
  /* 1000 / (min-max-dist + 1) */
4327
5.34M
  static const short int dist_vals[] = {
4328
5.34M
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4329
5.34M
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4330
5.34M
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4331
5.34M
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4332
5.34M
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4333
5.34M
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4334
5.34M
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4335
5.34M
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4336
5.34M
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4337
5.34M
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4338
5.34M
  };
4339
4340
5.34M
  OnigDistance d;
4341
4342
5.34M
  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4343
4344
2.03M
  d = mm->max - mm->min;
4345
2.03M
  if (d < numberof(dist_vals))
4346
    /* return dist_vals[d] * 16 / (mm->min + 12); */
4347
2.02M
    return (int )dist_vals[d];
4348
12.5k
  else
4349
12.5k
    return 1;
4350
2.03M
}
4351
4352
static int
4353
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4354
2.67M
{
4355
2.67M
  if (v2 <= 0) return -1;
4356
2.67M
  if (v1 <= 0) return  1;
4357
4358
2.67M
  v1 *= distance_value(d1);
4359
2.67M
  v2 *= distance_value(d2);
4360
4361
2.67M
  if (v2 > v1) return  1;
4362
2.49M
  if (v2 < v1) return -1;
4363
4364
1.19M
  if (d2->min < d1->min) return  1;
4365
1.16M
  if (d2->min > d1->min) return -1;
4366
240k
  return 0;
4367
1.16M
}
4368
4369
static int
4370
is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4371
276k
{
4372
276k
  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4373
276k
}
4374
4375
4376
static void
4377
set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4378
8.79M
{
4379
8.79M
  mml->min = min;
4380
8.79M
  mml->max = max;
4381
8.79M
}
4382
4383
static void
4384
clear_mml(MinMaxLen* mml)
4385
60.7M
{
4386
60.7M
  mml->min = mml->max = 0;
4387
60.7M
}
4388
4389
static void
4390
copy_mml(MinMaxLen* to, MinMaxLen* from)
4391
41.4M
{
4392
41.4M
  to->min = from->min;
4393
41.4M
  to->max = from->max;
4394
41.4M
}
4395
4396
static void
4397
add_mml(MinMaxLen* to, MinMaxLen* from)
4398
12.5M
{
4399
12.5M
  to->min = distance_add(to->min, from->min);
4400
12.5M
  to->max = distance_add(to->max, from->max);
4401
12.5M
}
4402
4403
#if 0
4404
static void
4405
add_len_mml(MinMaxLen* to, OnigDistance len)
4406
{
4407
  to->min = distance_add(to->min, len);
4408
  to->max = distance_add(to->max, len);
4409
}
4410
#endif
4411
4412
static void
4413
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4414
1.92M
{
4415
1.92M
  if (to->min > from->min) to->min = from->min;
4416
1.92M
  if (to->max < from->max) to->max = from->max;
4417
1.92M
}
4418
4419
static void
4420
copy_opt_env(OptEnv* to, OptEnv* from)
4421
1.62M
{
4422
1.62M
  *to = *from;
4423
1.62M
}
4424
4425
static void
4426
clear_opt_anc_info(OptAncInfo* anc)
4427
75.7M
{
4428
75.7M
  anc->left_anchor  = 0;
4429
75.7M
  anc->right_anchor = 0;
4430
75.7M
}
4431
4432
static void
4433
copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4434
15.9M
{
4435
15.9M
  *to = *from;
4436
15.9M
}
4437
4438
static void
4439
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4440
        OnigDistance left_len, OnigDistance right_len)
4441
15.9M
{
4442
15.9M
  clear_opt_anc_info(to);
4443
4444
15.9M
  to->left_anchor = left->left_anchor;
4445
15.9M
  if (left_len == 0) {
4446
2.73M
    to->left_anchor |= right->left_anchor;
4447
2.73M
  }
4448
4449
15.9M
  to->right_anchor = right->right_anchor;
4450
15.9M
  if (right_len == 0) {
4451
1.24M
    to->right_anchor |= left->right_anchor;
4452
1.24M
  }
4453
14.7M
  else {
4454
14.7M
    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4455
14.7M
  }
4456
15.9M
}
4457
4458
static int
4459
is_left_anchor(int anc)
4460
1.16M
{
4461
1.16M
  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4462
1.16M
      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4463
1.16M
      anc == ANCHOR_PREC_READ_NOT)
4464
365k
    return 0;
4465
4466
802k
  return 1;
4467
1.16M
}
4468
4469
static int
4470
is_set_opt_anc_info(OptAncInfo* to, int anc)
4471
363k
{
4472
363k
  if ((to->left_anchor & anc) != 0) return 1;
4473
4474
362k
  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4475
363k
}
4476
4477
static void
4478
add_opt_anc_info(OptAncInfo* to, int anc)
4479
1.16M
{
4480
1.16M
  if (is_left_anchor(anc))
4481
802k
    to->left_anchor |= anc;
4482
365k
  else
4483
365k
    to->right_anchor |= anc;
4484
1.16M
}
4485
4486
static void
4487
remove_opt_anc_info(OptAncInfo* to, int anc)
4488
77
{
4489
77
  if (is_left_anchor(anc))
4490
77
    to->left_anchor &= ~anc;
4491
0
  else
4492
0
    to->right_anchor &= ~anc;
4493
77
}
4494
4495
static void
4496
alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4497
2.15M
{
4498
2.15M
  to->left_anchor  &= add->left_anchor;
4499
2.15M
  to->right_anchor &= add->right_anchor;
4500
2.15M
}
4501
4502
static int
4503
is_full_opt_exact_info(OptExactInfo* ex)
4504
9.39M
{
4505
9.39M
  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4506
9.39M
}
4507
4508
static void
4509
clear_opt_exact_info(OptExactInfo* ex)
4510
45.9M
{
4511
45.9M
  clear_mml(&ex->mmd);
4512
45.9M
  clear_opt_anc_info(&ex->anc);
4513
45.9M
  ex->reach_end   = 0;
4514
45.9M
  ex->ignore_case = -1;   /* unset */
4515
45.9M
  ex->len         = 0;
4516
45.9M
  ex->s[0]        = '\0';
4517
45.9M
}
4518
4519
static void
4520
copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4521
1.60M
{
4522
1.60M
  *to = *from;
4523
1.60M
}
4524
4525
static void
4526
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4527
9.44M
{
4528
9.44M
  int i, j, len;
4529
9.44M
  UChar *p, *end;
4530
9.44M
  OptAncInfo tanc;
4531
4532
9.44M
  if (to->ignore_case < 0)
4533
0
    to->ignore_case = add->ignore_case;
4534
9.44M
  else if (to->ignore_case != add->ignore_case)
4535
1.28k
    return ;  /* avoid */
4536
4537
9.44M
  p = add->s;
4538
9.44M
  end = p + add->len;
4539
9.71M
  for (i = to->len; p < end; ) {
4540
9.55M
    len = enclen(enc, p, end);
4541
9.55M
    if (i + len > OPT_EXACT_MAXLEN) break;
4542
545k
    for (j = 0; j < len && p < end; j++)
4543
280k
      to->s[i++] = *p++;
4544
264k
  }
4545
4546
9.44M
  to->len = i;
4547
9.44M
  to->reach_end = (p == end ? add->reach_end : 0);
4548
4549
9.44M
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4550
9.44M
  if (! to->reach_end) tanc.right_anchor = 0;
4551
9.44M
  copy_opt_anc_info(&to->anc, &tanc);
4552
9.44M
}
4553
4554
static void
4555
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4556
        int raw ARG_UNUSED, OnigEncoding enc)
4557
2.41M
{
4558
2.41M
  int i, j, len;
4559
2.41M
  UChar *p;
4560
4561
15.8M
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4562
13.4M
    len = enclen(enc, p, end);
4563
13.4M
    if (i + len > OPT_EXACT_MAXLEN) break;
4564
27.3M
    for (j = 0; j < len && p < end; j++)
4565
13.8M
      to->s[i++] = *p++;
4566
13.4M
  }
4567
4568
2.41M
  to->len = i;
4569
2.41M
}
4570
4571
static void
4572
alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4573
4.73M
{
4574
4.73M
  int i, j, len;
4575
4576
4.73M
  if (add->len == 0 || to->len == 0) {
4577
4.46M
    clear_opt_exact_info(to);
4578
4.46M
    return ;
4579
4.46M
  }
4580
4581
276k
  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4582
40.1k
    clear_opt_exact_info(to);
4583
40.1k
    return ;
4584
40.1k
  }
4585
4586
387k
  for (i = 0; i < to->len && i < add->len; ) {
4587
386k
    if (to->s[i] != add->s[i]) break;
4588
155k
    len = enclen(env->enc, to->s + i, to->s + to->len);
4589
4590
155k
    for (j = 1; j < len; j++) {
4591
4.15k
      if (to->s[i+j] != add->s[i+j]) break;
4592
4.15k
    }
4593
155k
    if (j < len) break;
4594
151k
    i += len;
4595
151k
  }
4596
4597
236k
  if (! add->reach_end || i < add->len || i < to->len) {
4598
235k
    to->reach_end = 0;
4599
235k
  }
4600
236k
  to->len = i;
4601
236k
  if (to->ignore_case < 0)
4602
0
    to->ignore_case = add->ignore_case;
4603
236k
  else if (add->ignore_case >= 0)
4604
236k
    to->ignore_case |= add->ignore_case;
4605
4606
236k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4607
236k
  if (! to->reach_end) to->anc.right_anchor = 0;
4608
236k
}
4609
4610
static void
4611
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4612
13.2M
{
4613
13.2M
  int v1, v2;
4614
4615
13.2M
  v1 = now->len;
4616
13.2M
  v2 = alt->len;
4617
4618
13.2M
  if (v2 == 0) {
4619
10.9M
    return ;
4620
10.9M
  }
4621
2.25M
  else if (v1 == 0) {
4622
1.59M
    copy_opt_exact_info(now, alt);
4623
1.59M
    return ;
4624
1.59M
  }
4625
655k
  else if (v1 <= 2 && v2 <= 2) {
4626
    /* ByteValTable[x] is big value --> low price */
4627
118k
    v2 = map_position_value(enc, now->s[0]);
4628
118k
    v1 = map_position_value(enc, alt->s[0]);
4629
4630
118k
    if (now->len > 1) v1 += 5;
4631
118k
    if (alt->len > 1) v2 += 5;
4632
118k
  }
4633
4634
655k
  if (now->ignore_case <= 0) v1 *= 2;
4635
655k
  if (alt->ignore_case <= 0) v2 *= 2;
4636
4637
655k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4638
4.65k
    copy_opt_exact_info(now, alt);
4639
655k
}
4640
4641
static void
4642
clear_opt_map_info(OptMapInfo* map)
4643
13.9M
{
4644
13.9M
  static const OptMapInfo clean_info = {
4645
13.9M
    {0, 0}, {0, 0}, 0,
4646
13.9M
    {
4647
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4648
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4649
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4650
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4651
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4652
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4653
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4654
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4655
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4656
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4657
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4658
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4659
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4660
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4661
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4662
13.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4663
13.9M
    }
4664
13.9M
  };
4665
4666
13.9M
  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4667
13.9M
}
4668
4669
static void
4670
copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4671
1.05M
{
4672
1.05M
  *to = *from;
4673
1.05M
}
4674
4675
static void
4676
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4677
6.26M
{
4678
6.26M
  if (map->map[c] == 0) {
4679
6.24M
    map->map[c] = 1;
4680
6.24M
    map->value += map_position_value(enc, c);
4681
6.24M
  }
4682
6.26M
}
4683
4684
static int
4685
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4686
                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4687
76.2k
{
4688
76.2k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4689
76.2k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4690
76.2k
  int i, n;
4691
4692
76.2k
  add_char_opt_map_info(map, p[0], enc);
4693
4694
76.2k
  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4695
76.2k
  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4696
76.2k
  if (n < 0) return n;
4697
4698
121k
  for (i = 0; i < n; i++) {
4699
45.2k
    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4700
45.2k
    add_char_opt_map_info(map, buf[0], enc);
4701
45.2k
  }
4702
4703
76.2k
  return 0;
4704
76.2k
}
4705
4706
static void
4707
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4708
6.25M
{
4709
6.25M
  const int z = 1<<15; /* 32768: something big value */
4710
4711
6.25M
  int v1, v2;
4712
4713
6.25M
  if (alt->value == 0) return ;
4714
2.37M
  if (now->value == 0) {
4715
1.04M
    copy_opt_map_info(now, alt);
4716
1.04M
    return ;
4717
1.04M
  }
4718
4719
1.33M
  v1 = z / now->value;
4720
1.33M
  v2 = z / alt->value;
4721
1.33M
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4722
8.49k
    copy_opt_map_info(now, alt);
4723
1.33M
}
4724
4725
static int
4726
comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4727
685k
{
4728
1.37M
#define COMP_EM_BASE  20
4729
685k
  int ve, vm;
4730
4731
685k
  if (m->value <= 0) return -1;
4732
4733
685k
  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4734
685k
  vm = COMP_EM_BASE * 5 * 2 / m->value;
4735
685k
  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4736
685k
}
4737
4738
static void
4739
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4740
1.57M
{
4741
1.57M
  int i, val;
4742
4743
  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4744
1.57M
  if (to->value == 0) return ;
4745
467k
  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4746
124k
    clear_opt_map_info(to);
4747
124k
    return ;
4748
124k
  }
4749
4750
343k
  alt_merge_mml(&to->mmd, &add->mmd);
4751
4752
343k
  val = 0;
4753
88.2M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4754
87.8M
    if (add->map[i])
4755
344k
      to->map[i] = 1;
4756
4757
87.8M
    if (to->map[i])
4758
677k
      val += map_position_value(enc, i);
4759
87.8M
  }
4760
343k
  to->value = val;
4761
4762
343k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4763
343k
}
4764
4765
static void
4766
set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4767
13.8M
{
4768
13.8M
  copy_mml(&(opt->exb.mmd),  mmd);
4769
13.8M
  copy_mml(&(opt->expr.mmd), mmd);
4770
13.8M
  copy_mml(&(opt->map.mmd),  mmd);
4771
13.8M
}
4772
4773
static void
4774
clear_node_opt_info(NodeOptInfo* opt)
4775
13.8M
{
4776
13.8M
  clear_mml(&opt->len);
4777
13.8M
  clear_opt_anc_info(&opt->anc);
4778
13.8M
  clear_opt_exact_info(&opt->exb);
4779
13.8M
  clear_opt_exact_info(&opt->exm);
4780
13.8M
  clear_opt_exact_info(&opt->expr);
4781
13.8M
  clear_opt_map_info(&opt->map);
4782
13.8M
}
4783
4784
static void
4785
copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4786
2.01M
{
4787
2.01M
  *to = *from;
4788
2.01M
}
4789
4790
static void
4791
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4792
6.25M
{
4793
6.25M
  int exb_reach, exm_reach;
4794
6.25M
  OptAncInfo tanc;
4795
4796
6.25M
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4797
6.25M
  copy_opt_anc_info(&to->anc, &tanc);
4798
4799
6.25M
  if (add->exb.len > 0 && to->len.max == 0) {
4800
285k
    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4801
285k
      to->len.max, add->len.max);
4802
285k
    copy_opt_anc_info(&add->exb.anc, &tanc);
4803
285k
  }
4804
4805
6.25M
  if (add->map.value > 0 && to->len.max == 0) {
4806
510k
    if (add->map.mmd.max == 0)
4807
455k
      add->map.anc.left_anchor |= to->anc.left_anchor;
4808
510k
  }
4809
4810
6.25M
  exb_reach = to->exb.reach_end;
4811
6.25M
  exm_reach = to->exm.reach_end;
4812
4813
6.25M
  if (add->len.max != 0)
4814
5.01M
    to->exb.reach_end = to->exm.reach_end = 0;
4815
4816
6.25M
  if (add->exb.len > 0) {
4817
1.62M
    if (exb_reach) {
4818
0
      concat_opt_exact_info(&to->exb, &add->exb, enc);
4819
0
      clear_opt_exact_info(&add->exb);
4820
0
    }
4821
1.62M
    else if (exm_reach) {
4822
53.3k
      concat_opt_exact_info(&to->exm, &add->exb, enc);
4823
53.3k
      clear_opt_exact_info(&add->exb);
4824
53.3k
    }
4825
1.62M
  }
4826
6.25M
  select_opt_exact_info(enc, &to->exm, &add->exb);
4827
6.25M
  select_opt_exact_info(enc, &to->exm, &add->exm);
4828
4829
6.25M
  if (to->expr.len > 0) {
4830
11.8k
    if (add->len.max > 0) {
4831
11.2k
      if (to->expr.len > (int )add->len.max)
4832
2.73k
  to->expr.len = (int )add->len.max;
4833
4834
11.2k
      if (to->expr.mmd.max == 0)
4835
389
  select_opt_exact_info(enc, &to->exb, &to->expr);
4836
10.8k
      else
4837
10.8k
  select_opt_exact_info(enc, &to->exm, &to->expr);
4838
11.2k
    }
4839
11.8k
  }
4840
6.24M
  else if (add->expr.len > 0) {
4841
2.80k
    copy_opt_exact_info(&to->expr, &add->expr);
4842
2.80k
  }
4843
4844
6.25M
  select_opt_map_info(&to->map, &add->map);
4845
4846
6.25M
  add_mml(&to->len, &add->len);
4847
6.25M
}
4848
4849
static void
4850
alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4851
1.57M
{
4852
1.57M
  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4853
1.57M
  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4854
1.57M
  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4855
1.57M
  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4856
1.57M
  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4857
4858
1.57M
  alt_merge_mml(&to->len, &add->len);
4859
1.57M
}
4860
4861
4862
365k
#define MAX_NODE_OPT_INFO_REF_COUNT    5
4863
4864
static int
4865
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4866
13.8M
{
4867
13.8M
  int type;
4868
13.8M
  int r = 0;
4869
4870
13.8M
  clear_node_opt_info(opt);
4871
13.8M
  set_bound_node_opt_info(opt, &env->mmd);
4872
4873
13.8M
  type = NTYPE(node);
4874
13.8M
  switch (type) {
4875
1.62M
  case NT_LIST:
4876
1.62M
    {
4877
1.62M
      OptEnv nenv;
4878
1.62M
      NodeOptInfo nopt;
4879
1.62M
      Node* nd = node;
4880
4881
1.62M
      copy_opt_env(&nenv, env);
4882
6.25M
      do {
4883
6.25M
  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4884
6.25M
  if (r == 0) {
4885
6.25M
    add_mml(&nenv.mmd, &nopt.len);
4886
6.25M
    concat_left_node_opt_info(env->enc, opt, &nopt);
4887
6.25M
  }
4888
6.25M
      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4889
1.62M
    }
4890
1.62M
    break;
4891
4892
605k
  case NT_ALT:
4893
605k
    {
4894
605k
      NodeOptInfo nopt;
4895
605k
      Node* nd = node;
4896
4897
2.18M
      do {
4898
2.18M
  r = optimize_node_left(NCAR(nd), &nopt, env);
4899
2.18M
  if (r == 0) {
4900
2.18M
    if (nd == node) copy_node_opt_info(opt, &nopt);
4901
1.57M
    else            alt_merge_node_opt_info(opt, &nopt, env);
4902
2.18M
  }
4903
2.18M
      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4904
605k
    }
4905
605k
    break;
4906
4907
2.42M
  case NT_STR:
4908
2.42M
    {
4909
2.42M
      StrNode* sn = NSTR(node);
4910
2.42M
      OnigDistance slen = sn->end - sn->s;
4911
2.42M
      int is_raw = NSTRING_IS_RAW(node);
4912
4913
2.42M
      if (! NSTRING_IS_AMBIG(node)) {
4914
2.34M
  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4915
2.34M
          is_raw, env->enc);
4916
2.34M
  opt->exb.ignore_case = 0;
4917
2.34M
  if (slen > 0) {
4918
2.28M
    add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4919
2.28M
  }
4920
2.34M
  set_mml(&opt->len, slen, slen);
4921
2.34M
      }
4922
88.5k
      else {
4923
88.5k
  OnigDistance max;
4924
4925
88.5k
  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4926
12.3k
    int n = onigenc_strlen(env->enc, sn->s, sn->end);
4927
12.3k
    max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4928
12.3k
  }
4929
76.2k
  else {
4930
76.2k
    concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4931
76.2k
            is_raw, env->enc);
4932
76.2k
    opt->exb.ignore_case = 1;
4933
4934
76.2k
    if (slen > 0) {
4935
76.2k
      r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4936
76.2k
            env->enc, env->case_fold_flag);
4937
76.2k
      if (r != 0) break;
4938
76.2k
    }
4939
4940
76.2k
    max = slen;
4941
76.2k
  }
4942
4943
88.5k
  set_mml(&opt->len, slen, max);
4944
88.5k
      }
4945
4946
2.42M
      if ((OnigDistance )opt->exb.len == slen)
4947
2.25M
  opt->exb.reach_end = 1;
4948
2.42M
    }
4949
0
    break;
4950
4951
2.89M
  case NT_CCLASS:
4952
2.89M
    {
4953
2.89M
      int i, z;
4954
2.89M
      CClassNode* cc = NCCLASS(node);
4955
4956
      /* no need to check ignore case. (set in setup_tree()) */
4957
4958
2.89M
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4959
2.17M
  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4960
2.17M
  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4961
4962
2.17M
  set_mml(&opt->len, min, max);
4963
2.17M
      }
4964
724k
      else {
4965
186M
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4966
185M
    z = BITSET_AT(cc->bs, i);
4967
185M
    if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4968
3.86M
      add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4969
3.86M
    }
4970
185M
  }
4971
724k
  set_mml(&opt->len, 1, 1);
4972
724k
      }
4973
2.89M
    }
4974
2.89M
    break;
4975
4976
6.70k
  case NT_CTYPE:
4977
6.70k
    {
4978
6.70k
      int i, min, max;
4979
6.70k
      int maxcode;
4980
4981
6.70k
      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4982
4983
6.70k
      if (max == 1) {
4984
0
  min = 1;
4985
4986
0
  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4987
0
  switch (NCTYPE(node)->ctype) {
4988
0
  case ONIGENC_CTYPE_WORD:
4989
0
    if (NCTYPE(node)->not != 0) {
4990
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4991
0
        if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
4992
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4993
0
        }
4994
0
      }
4995
0
    }
4996
0
    else {
4997
0
      for (i = 0; i < maxcode; i++) {
4998
0
        if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4999
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5000
0
        }
5001
0
      }
5002
0
    }
5003
0
    break;
5004
0
  }
5005
0
      }
5006
6.70k
      else {
5007
6.70k
  min = ONIGENC_MBC_MINLEN(env->enc);
5008
6.70k
      }
5009
6.70k
      set_mml(&opt->len, min, max);
5010
6.70k
    }
5011
0
    break;
5012
5013
609k
  case NT_CANY:
5014
609k
    {
5015
609k
      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5016
609k
      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5017
609k
      set_mml(&opt->len, min, max);
5018
609k
    }
5019
609k
    break;
5020
5021
1.21M
  case NT_ANCHOR:
5022
1.21M
    switch (NANCHOR(node)->type) {
5023
796
    case ANCHOR_BEGIN_BUF:
5024
1.36k
    case ANCHOR_BEGIN_POSITION:
5025
800k
    case ANCHOR_BEGIN_LINE:
5026
801k
    case ANCHOR_END_BUF:
5027
802k
    case ANCHOR_SEMI_END_BUF:
5028
1.16M
    case ANCHOR_END_LINE:
5029
1.16M
    case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5030
1.16M
    case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5031
1.16M
      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5032
1.16M
      break;
5033
5034
4.49k
    case ANCHOR_PREC_READ:
5035
4.49k
      {
5036
4.49k
  NodeOptInfo nopt;
5037
5038
4.49k
  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5039
4.49k
  if (r == 0) {
5040
4.49k
    if (nopt.exb.len > 0)
5041
1.91k
      copy_opt_exact_info(&opt->expr, &nopt.exb);
5042
2.58k
    else if (nopt.exm.len > 0)
5043
912
      copy_opt_exact_info(&opt->expr, &nopt.exm);
5044
5045
4.49k
    opt->expr.reach_end = 0;
5046
5047
4.49k
    if (nopt.map.value > 0)
5048
3.68k
      copy_opt_map_info(&opt->map, &nopt.map);
5049
4.49k
  }
5050
4.49k
      }
5051
4.49k
      break;
5052
5053
4.16k
    case ANCHOR_LOOK_BEHIND_NOT:
5054
4.16k
      break;
5055
1.21M
    }
5056
1.21M
    break;
5057
5058
1.21M
  case NT_BREF:
5059
10.1k
    {
5060
10.1k
      int i;
5061
10.1k
      int* backs;
5062
10.1k
      OnigDistance min, max, tmin, tmax;
5063
10.1k
      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5064
10.1k
      BRefNode* br = NBREF(node);
5065
5066
10.1k
      if (br->state & NST_RECURSION) {
5067
2.65k
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5068
2.65k
  break;
5069
2.65k
      }
5070
7.45k
      backs = BACKREFS_P(br);
5071
7.45k
      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5072
7.45k
      if (r != 0) break;
5073
7.45k
      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5074
7.45k
      if (r != 0) break;
5075
16.6k
      for (i = 1; i < br->back_num; i++) {
5076
9.21k
  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5077
9.21k
  if (r != 0) break;
5078
9.21k
  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5079
9.21k
  if (r != 0) break;
5080
9.21k
  if (min > tmin) min = tmin;
5081
9.21k
  if (max < tmax) max = tmax;
5082
9.21k
      }
5083
7.45k
      if (r == 0) set_mml(&opt->len, min, max);
5084
7.45k
    }
5085
0
    break;
5086
5087
0
#ifdef USE_SUBEXP_CALL
5088
10.6k
  case NT_CALL:
5089
10.6k
    if (IS_CALL_RECURSION(NCALL(node)))
5090
2.06k
      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5091
8.58k
    else {
5092
8.58k
      OnigOptionType save = env->options;
5093
8.58k
      env->options = NENCLOSE(NCALL(node)->target)->option;
5094
8.58k
      r = optimize_node_left(NCALL(node)->target, opt, env);
5095
8.58k
      env->options = save;
5096
8.58k
    }
5097
10.6k
    break;
5098
0
#endif
5099
5100
2.83M
  case NT_QTFR:
5101
2.83M
    {
5102
2.83M
      int i;
5103
2.83M
      OnigDistance min, max;
5104
2.83M
      NodeOptInfo nopt;
5105
2.83M
      QtfrNode* qn = NQTFR(node);
5106
5107
2.83M
      r = optimize_node_left(qn->target, &nopt, env);
5108
2.83M
      if (r) break;
5109
5110
2.83M
      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5111
1.36M
  if (env->mmd.max == 0 &&
5112
1.36M
      NTYPE(qn->target) == NT_CANY && qn->greedy) {
5113
1.65k
    if (IS_MULTILINE(env->options))
5114
      /* implicit anchor: /.*a/ ==> /\A.*a/ */
5115
697
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5116
955
    else
5117
955
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5118
1.65k
  }
5119
1.36M
      }
5120
1.47M
      else {
5121
1.47M
  if (qn->lower > 0) {
5122
1.40M
    copy_node_opt_info(opt, &nopt);
5123
1.40M
    if (nopt.exb.len > 0) {
5124
115k
      if (nopt.exb.reach_end) {
5125
9.47M
        for (i = 2; i <= qn->lower &&
5126
9.47M
        ! is_full_opt_exact_info(&opt->exb); i++) {
5127
9.39M
    concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5128
9.39M
        }
5129
84.4k
        if (i < qn->lower) {
5130
5.52k
    opt->exb.reach_end = 0;
5131
5.52k
        }
5132
84.4k
      }
5133
115k
    }
5134
5135
1.40M
    if (qn->lower != qn->upper) {
5136
1.26M
      opt->exb.reach_end = 0;
5137
1.26M
      opt->exm.reach_end = 0;
5138
1.26M
    }
5139
1.40M
    if (qn->lower > 1)
5140
140k
      opt->exm.reach_end = 0;
5141
1.40M
  }
5142
1.47M
      }
5143
5144
2.83M
      min = distance_multiply(nopt.len.min, qn->lower);
5145
2.83M
      if (IS_REPEAT_INFINITE(qn->upper))
5146
2.63M
  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5147
203k
      else
5148
203k
  max = distance_multiply(nopt.len.max, qn->upper);
5149
5150
2.83M
      set_mml(&opt->len, min, max);
5151
2.83M
    }
5152
0
    break;
5153
5154
1.56M
  case NT_ENCLOSE:
5155
1.56M
    {
5156
1.56M
      EncloseNode* en = NENCLOSE(node);
5157
5158
1.56M
      switch (en->type) {
5159
275k
      case ENCLOSE_OPTION:
5160
275k
  {
5161
275k
    OnigOptionType save = env->options;
5162
5163
275k
    env->options = en->option;
5164
275k
    r = optimize_node_left(en->target, opt, env);
5165
275k
    env->options = save;
5166
275k
  }
5167
275k
  break;
5168
5169
365k
      case ENCLOSE_MEMORY:
5170
365k
#ifdef USE_SUBEXP_CALL
5171
365k
  en->opt_count++;
5172
365k
  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5173
2.01k
    OnigDistance min, max;
5174
5175
2.01k
    min = 0;
5176
2.01k
    max = ONIG_INFINITE_DISTANCE;
5177
2.01k
    if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5178
2.01k
    if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5179
2.01k
    set_mml(&opt->len, min, max);
5180
2.01k
  }
5181
363k
  else
5182
363k
#endif
5183
363k
  {
5184
363k
    r = optimize_node_left(en->target, opt, env);
5185
5186
363k
    if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5187
290
      if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5188
77
        remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5189
290
    }
5190
363k
  }
5191
365k
  break;
5192
5193
918k
      case ENCLOSE_STOP_BACKTRACK:
5194
919k
      case ENCLOSE_CONDITION:
5195
919k
  r = optimize_node_left(en->target, opt, env);
5196
919k
  break;
5197
5198
692
      case ENCLOSE_ABSENT:
5199
692
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5200
692
  break;
5201
1.56M
      }
5202
1.56M
    }
5203
1.56M
    break;
5204
5205
1.56M
  default:
5206
#ifdef ONIG_DEBUG
5207
    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5208
      NTYPE(node));
5209
#endif
5210
0
    r = ONIGERR_TYPE_BUG;
5211
0
    break;
5212
13.8M
  }
5213
5214
13.8M
  return r;
5215
13.8M
}
5216
5217
static int
5218
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5219
487k
{
5220
487k
  int allow_reverse;
5221
5222
487k
  if (e->len == 0) return 0;
5223
5224
487k
  reg->exact = (UChar* )xmalloc(e->len);
5225
487k
  CHECK_NULL_RETURN_MEMERR(reg->exact);
5226
487k
  xmemcpy(reg->exact, e->s, e->len);
5227
487k
  reg->exact_end = reg->exact + e->len;
5228
5229
487k
  allow_reverse =
5230
487k
  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5231
5232
487k
  if (e->ignore_case > 0) {
5233
423
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5234
411
      e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5235
411
          reg->map, 1);
5236
411
      reg->exact_end = reg->exact + e->len;
5237
411
      if (e->len >= 3) {
5238
240
  reg->optimize = (allow_reverse != 0
5239
240
       ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5240
240
      }
5241
171
      else if (e->len > 0) {
5242
146
  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5243
146
      }
5244
25
      else
5245
25
  return 0;
5246
411
    }
5247
12
    else {
5248
12
      reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5249
12
    }
5250
423
  }
5251
486k
  else {
5252
486k
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5253
318k
      set_bm_skip(reg->exact, reg->exact_end, reg,
5254
318k
      reg->map, 0);
5255
318k
      reg->optimize = (allow_reverse != 0
5256
318k
         ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5257
318k
    }
5258
167k
    else {
5259
167k
      reg->optimize = ONIG_OPTIMIZE_EXACT;
5260
167k
    }
5261
486k
  }
5262
5263
487k
  reg->dmin = e->mmd.min;
5264
487k
  reg->dmax = e->mmd.max;
5265
5266
487k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5267
487k
    reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5268
487k
  }
5269
5270
487k
  return 0;
5271
487k
}
5272
5273
static void
5274
set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5275
310k
{
5276
310k
  int i;
5277
5278
79.7M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5279
79.4M
    reg->map[i] = m->map[i];
5280
5281
310k
  reg->optimize   = ONIG_OPTIMIZE_MAP;
5282
310k
  reg->dmin       = m->mmd.min;
5283
310k
  reg->dmax       = m->mmd.max;
5284
5285
310k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5286
310k
    reg->threshold_len = (int )(reg->dmin + 1);
5287
310k
  }
5288
310k
}
5289
5290
static void
5291
set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5292
797k
{
5293
797k
  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
5294
797k
  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5295
797k
}
5296
5297
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5298
static void print_optimize_info(FILE* f, regex_t* reg);
5299
#endif
5300
5301
static int
5302
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5303
954k
{
5304
5305
954k
  int r;
5306
954k
  NodeOptInfo opt;
5307
954k
  OptEnv env;
5308
5309
954k
  env.enc            = reg->enc;
5310
954k
  env.options        = reg->options;
5311
954k
  env.case_fold_flag = reg->case_fold_flag;
5312
954k
  env.scan_env   = scan_env;
5313
954k
  clear_mml(&env.mmd);
5314
5315
954k
  r = optimize_node_left(node, &opt, &env);
5316
954k
  if (r) return r;
5317
5318
954k
  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5319
954k
        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5320
954k
        ANCHOR_LOOK_BEHIND);
5321
5322
954k
  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5323
9
    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5324
5325
954k
  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5326
954k
  ANCHOR_PREC_READ_NOT);
5327
5328
954k
  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5329
26
    reg->anchor_dmin = opt.len.min;
5330
26
    reg->anchor_dmax = opt.len.max;
5331
26
  }
5332
5333
954k
  if (opt.exb.len > 0 || opt.exm.len > 0) {
5334
685k
    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5335
685k
    if (opt.map.value > 0 &&
5336
685k
  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5337
198k
      goto set_map;
5338
198k
    }
5339
487k
    else {
5340
487k
      r = set_optimize_exact_info(reg, &opt.exb);
5341
487k
      set_sub_anchor(reg, &opt.exb.anc);
5342
487k
    }
5343
685k
  }
5344
268k
  else if (opt.map.value > 0) {
5345
310k
  set_map:
5346
310k
    set_optimize_map_info(reg, &opt.map);
5347
310k
    set_sub_anchor(reg, &opt.map.anc);
5348
310k
  }
5349
157k
  else {
5350
157k
    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5351
157k
    if (opt.len.max == 0)
5352
82.9k
      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5353
157k
  }
5354
5355
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5356
  print_optimize_info(stderr, reg);
5357
#endif
5358
954k
  return r;
5359
954k
}
5360
5361
static void
5362
clear_optimize_info(regex_t* reg)
5363
954k
{
5364
954k
  reg->optimize      = ONIG_OPTIMIZE_NONE;
5365
954k
  reg->anchor        = 0;
5366
954k
  reg->anchor_dmin   = 0;
5367
954k
  reg->anchor_dmax   = 0;
5368
954k
  reg->sub_anchor    = 0;
5369
954k
  reg->exact_end     = (UChar* )NULL;
5370
954k
  reg->threshold_len = 0;
5371
954k
  if (IS_NOT_NULL(reg->exact)) {
5372
0
    xfree(reg->exact);
5373
0
    reg->exact = (UChar* )NULL;
5374
0
  }
5375
954k
}
5376
5377
#ifdef ONIG_DEBUG
5378
5379
static void print_enc_string(FILE* fp, OnigEncoding enc,
5380
           const UChar *s, const UChar *end)
5381
{
5382
  fprintf(fp, "\nPATTERN: /");
5383
5384
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5385
    const UChar *p;
5386
    OnigCodePoint code;
5387
5388
    p = s;
5389
    while (p < end) {
5390
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5391
      if (code >= 0x80) {
5392
  fprintf(fp, " 0x%04x ", (int )code);
5393
      }
5394
      else {
5395
  fputc((int )code, fp);
5396
      }
5397
5398
      p += enclen(enc, p, end);
5399
    }
5400
  }
5401
  else {
5402
    while (s < end) {
5403
      fputc((int )*s, fp);
5404
      s++;
5405
    }
5406
  }
5407
5408
  fprintf(fp, "/ (%s)\n", enc->name);
5409
}
5410
#endif  /* ONIG_DEBUG */
5411
5412
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413
static void
5414
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5415
{
5416
  if (a == ONIG_INFINITE_DISTANCE)
5417
    fputs("inf", f);
5418
  else
5419
    fprintf(f, "(%"PRIuPTR")", a);
5420
5421
  fputs("-", f);
5422
5423
  if (b == ONIG_INFINITE_DISTANCE)
5424
    fputs("inf", f);
5425
  else
5426
    fprintf(f, "(%"PRIuPTR")", b);
5427
}
5428
5429
static void
5430
print_anchor(FILE* f, int anchor)
5431
{
5432
  int q = 0;
5433
5434
  fprintf(f, "[");
5435
5436
  if (anchor & ANCHOR_BEGIN_BUF) {
5437
    fprintf(f, "begin-buf");
5438
    q = 1;
5439
  }
5440
  if (anchor & ANCHOR_BEGIN_LINE) {
5441
    if (q) fprintf(f, ", ");
5442
    q = 1;
5443
    fprintf(f, "begin-line");
5444
  }
5445
  if (anchor & ANCHOR_BEGIN_POSITION) {
5446
    if (q) fprintf(f, ", ");
5447
    q = 1;
5448
    fprintf(f, "begin-pos");
5449
  }
5450
  if (anchor & ANCHOR_END_BUF) {
5451
    if (q) fprintf(f, ", ");
5452
    q = 1;
5453
    fprintf(f, "end-buf");
5454
  }
5455
  if (anchor & ANCHOR_SEMI_END_BUF) {
5456
    if (q) fprintf(f, ", ");
5457
    q = 1;
5458
    fprintf(f, "semi-end-buf");
5459
  }
5460
  if (anchor & ANCHOR_END_LINE) {
5461
    if (q) fprintf(f, ", ");
5462
    q = 1;
5463
    fprintf(f, "end-line");
5464
  }
5465
  if (anchor & ANCHOR_ANYCHAR_STAR) {
5466
    if (q) fprintf(f, ", ");
5467
    q = 1;
5468
    fprintf(f, "anychar-star");
5469
  }
5470
  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5471
    if (q) fprintf(f, ", ");
5472
    fprintf(f, "anychar-star-ml");
5473
  }
5474
5475
  fprintf(f, "]");
5476
}
5477
5478
static void
5479
print_optimize_info(FILE* f, regex_t* reg)
5480
{
5481
  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5482
                              "EXACT_IC", "MAP",
5483
                              "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5484
5485
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5486
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5487
  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5488
    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5489
  fprintf(f, "\n");
5490
5491
  if (reg->optimize) {
5492
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5493
    fprintf(f, "\n");
5494
  }
5495
  fprintf(f, "\n");
5496
5497
  if (reg->exact) {
5498
    UChar *p;
5499
    fprintf(f, "exact: [");
5500
    for (p = reg->exact; p < reg->exact_end; p++) {
5501
      fputc(*p, f);
5502
    }
5503
    fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5504
  }
5505
  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5506
    int c, i, n = 0;
5507
5508
    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5509
      if (reg->map[i]) n++;
5510
5511
    fprintf(f, "map: n=%d\n", n);
5512
    if (n > 0) {
5513
      c = 0;
5514
      fputc('[', f);
5515
      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5516
  if (reg->map[i] != 0) {
5517
    if (c > 0)  fputs(", ", f);
5518
    c++;
5519
    if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5520
        ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5521
      fputc(i, f);
5522
    else
5523
      fprintf(f, "%d", i);
5524
  }
5525
      }
5526
      fprintf(f, "]\n");
5527
    }
5528
  }
5529
}
5530
#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5531
5532
5533
extern void
5534
onig_free_body(regex_t* reg)
5535
956k
{
5536
956k
  if (IS_NOT_NULL(reg)) {
5537
956k
    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5538
956k
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5539
956k
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5540
956k
    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5541
5542
956k
#ifdef USE_NAMED_GROUP
5543
956k
    onig_names_free(reg);
5544
956k
#endif
5545
956k
  }
5546
956k
}
5547
5548
extern void
5549
onig_free(regex_t* reg)
5550
956k
{
5551
956k
  if (IS_NOT_NULL(reg)) {
5552
956k
    onig_free_body(reg);
5553
956k
    xfree(reg);
5554
956k
  }
5555
956k
}
5556
5557
#ifdef RUBY
5558
size_t
5559
onig_memsize(const regex_t *reg)
5560
{
5561
    size_t size = sizeof(regex_t);
5562
    if (IS_NULL(reg)) return 0;
5563
    if (IS_NOT_NULL(reg->p))                size += reg->alloc;
5564
    if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
5565
    if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5566
    if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
5567
5568
    return size;
5569
}
5570
5571
size_t
5572
onig_region_memsize(const OnigRegion *regs)
5573
{
5574
    size_t size = sizeof(*regs);
5575
    if (IS_NULL(regs)) return 0;
5576
    size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5577
    return size;
5578
}
5579
#endif
5580
5581
#define REGEX_TRANSFER(to,from) do {\
5582
  onig_free_body(to);\
5583
  xmemcpy(to, from, sizeof(regex_t));\
5584
  xfree(from);\
5585
} while (0)
5586
5587
#if 0
5588
extern void
5589
onig_transfer(regex_t* to, regex_t* from)
5590
{
5591
  REGEX_TRANSFER(to, from);
5592
}
5593
#endif
5594
5595
#ifdef ONIG_DEBUG_COMPILE
5596
static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5597
#endif
5598
#ifdef ONIG_DEBUG_PARSE_TREE
5599
static void print_tree(FILE* f, Node* node);
5600
#endif
5601
5602
#ifdef RUBY
5603
extern int
5604
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5605
       OnigErrorInfo* einfo)
5606
{
5607
  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5608
}
5609
#endif
5610
5611
#ifdef RUBY
5612
extern int
5613
onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5614
        OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5615
#else
5616
extern int
5617
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5618
       OnigErrorInfo* einfo)
5619
#endif
5620
956k
{
5621
956k
#define COMPILE_INIT_SIZE  20
5622
5623
956k
  int r;
5624
956k
  OnigDistance init_size;
5625
956k
  Node*  root;
5626
956k
  ScanEnv  scan_env = {0};
5627
956k
#ifdef USE_SUBEXP_CALL
5628
956k
  UnsetAddrList  uslist;
5629
956k
#endif
5630
5631
956k
  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5632
5633
#ifdef RUBY
5634
  scan_env.sourcefile = sourcefile;
5635
  scan_env.sourceline = sourceline;
5636
#endif
5637
5638
#ifdef ONIG_DEBUG
5639
  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5640
#endif
5641
5642
956k
  if (reg->alloc == 0) {
5643
956k
    init_size = (pattern_end - pattern) * 2;
5644
956k
    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5645
956k
    r = BBUF_INIT(reg, init_size);
5646
956k
    if (r != 0) goto end;
5647
956k
  }
5648
0
  else
5649
0
    reg->used = 0;
5650
5651
956k
  reg->num_mem            = 0;
5652
956k
  reg->num_repeat         = 0;
5653
956k
  reg->num_null_check     = 0;
5654
956k
  reg->repeat_range_alloc = 0;
5655
956k
  reg->repeat_range       = (OnigRepeatRange* )NULL;
5656
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5657
  reg->num_comb_exp_check = 0;
5658
#endif
5659
5660
956k
  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5661
956k
  if (r != 0) goto err;
5662
5663
#ifdef ONIG_DEBUG_PARSE_TREE
5664
# if 0
5665
  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5666
  print_tree(stderr, root);
5667
# endif
5668
#endif
5669
5670
954k
#ifdef USE_NAMED_GROUP
5671
  /* mixed use named group and no-named group */
5672
954k
  if (scan_env.num_named > 0 &&
5673
954k
      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5674
954k
      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5675
82.5k
    if (scan_env.num_named != scan_env.num_mem)
5676
294
      r = disable_noname_group_capture(&root, reg, &scan_env);
5677
82.2k
    else
5678
82.2k
      r = numbered_ref_check(root);
5679
5680
82.5k
    if (r != 0) goto err;
5681
82.5k
  }
5682
954k
#endif
5683
5684
954k
#ifdef USE_SUBEXP_CALL
5685
954k
  if (scan_env.num_call > 0) {
5686
2.74k
    r = unset_addr_list_init(&uslist, scan_env.num_call);
5687
2.74k
    if (r != 0) goto err;
5688
2.74k
    scan_env.unset_addr_list = &uslist;
5689
2.74k
    r = setup_subexp_call(root, &scan_env);
5690
2.74k
    if (r != 0) goto err_unset;
5691
2.72k
    r = subexp_recursive_check_trav(root, &scan_env);
5692
2.72k
    if (r  < 0) goto err_unset;
5693
2.72k
    r = subexp_inf_recursive_check_trav(root, &scan_env);
5694
2.72k
    if (r != 0) goto err_unset;
5695
5696
2.70k
    reg->num_call = scan_env.num_call;
5697
2.70k
  }
5698
952k
  else
5699
952k
    reg->num_call = 0;
5700
954k
#endif
5701
5702
954k
  r = setup_tree(root, reg, 0, &scan_env);
5703
954k
  if (r != 0) goto err_unset;
5704
5705
#ifdef ONIG_DEBUG_PARSE_TREE
5706
  print_tree(stderr, root);
5707
#endif
5708
5709
954k
  reg->capture_history  = scan_env.capture_history;
5710
954k
  reg->bt_mem_start     = scan_env.bt_mem_start;
5711
954k
  reg->bt_mem_start    |= reg->capture_history;
5712
954k
  if (IS_FIND_CONDITION(reg->options))
5713
0
    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5714
954k
  else {
5715
954k
    reg->bt_mem_end  = scan_env.bt_mem_end;
5716
954k
    reg->bt_mem_end |= reg->capture_history;
5717
954k
  }
5718
5719
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5720
  if (scan_env.backrefed_mem == 0
5721
# ifdef USE_SUBEXP_CALL
5722
      || scan_env.num_call == 0
5723
# endif
5724
      ) {
5725
    setup_comb_exp_check(root, 0, &scan_env);
5726
# ifdef USE_SUBEXP_CALL
5727
    if (scan_env.has_recursion != 0) {
5728
      scan_env.num_comb_exp_check = 0;
5729
    }
5730
    else
5731
# endif
5732
    if (scan_env.comb_exp_max_regnum > 0) {
5733
      int i;
5734
      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5735
  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5736
    scan_env.num_comb_exp_check = 0;
5737
    break;
5738
  }
5739
      }
5740
    }
5741
  }
5742
5743
  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5744
#endif
5745
5746
954k
  clear_optimize_info(reg);
5747
954k
#ifndef ONIG_DONT_OPTIMIZE
5748
954k
  r = set_optimize_info_from_tree(root, reg, &scan_env);
5749
954k
  if (r != 0) goto err_unset;
5750
954k
#endif
5751
5752
954k
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5753
489
    xfree(scan_env.mem_nodes_dynamic);
5754
489
    scan_env.mem_nodes_dynamic = (Node** )NULL;
5755
489
  }
5756
5757
954k
  r = compile_tree(root, reg);
5758
954k
  if (r == 0) {
5759
954k
    r = add_opcode(reg, OP_END);
5760
954k
#ifdef USE_SUBEXP_CALL
5761
954k
    if (scan_env.num_call > 0) {
5762
2.64k
      r = unset_addr_list_fix(&uslist, reg);
5763
2.64k
      unset_addr_list_end(&uslist);
5764
2.64k
      if (r) goto err;
5765
2.64k
    }
5766
954k
#endif
5767
5768
954k
    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5769
2.88k
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5770
951k
    else {
5771
951k
      if (reg->bt_mem_start != 0)
5772
2.69k
  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5773
948k
      else
5774
948k
  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5775
951k
    }
5776
954k
  }
5777
141
#ifdef USE_SUBEXP_CALL
5778
141
  else if (scan_env.num_call > 0) {
5779
27
    unset_addr_list_end(&uslist);
5780
27
  }
5781
954k
#endif
5782
954k
  onig_node_free(root);
5783
5784
#ifdef ONIG_DEBUG_COMPILE
5785
# ifdef USE_NAMED_GROUP
5786
  onig_print_names(stderr, reg);
5787
# endif
5788
  print_compiled_byte_code_list(stderr, reg);
5789
#endif
5790
5791
954k
 end:
5792
954k
  return r;
5793
5794
168
 err_unset:
5795
168
#ifdef USE_SUBEXP_CALL
5796
168
  if (scan_env.num_call > 0) {
5797
68
    unset_addr_list_end(&uslist);
5798
68
  }
5799
168
#endif
5800
1.35k
 err:
5801
1.35k
  if (IS_NOT_NULL(scan_env.error)) {
5802
124
    if (IS_NOT_NULL(einfo)) {
5803
124
      einfo->enc     = scan_env.enc;
5804
124
      einfo->par     = scan_env.error;
5805
124
      einfo->par_end = scan_env.error_end;
5806
124
    }
5807
124
  }
5808
5809
1.35k
  onig_node_free(root);
5810
1.35k
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5811
351
      xfree(scan_env.mem_nodes_dynamic);
5812
1.35k
  return r;
5813
168
}
5814
5815
static int onig_inited = 0;
5816
5817
extern int
5818
onig_reg_init(regex_t* reg, OnigOptionType option,
5819
        OnigCaseFoldType case_fold_flag,
5820
        OnigEncoding enc, const OnigSyntaxType* syntax)
5821
956k
{
5822
956k
  if (! onig_inited)
5823
14
    onig_init();
5824
5825
956k
  if (IS_NULL(reg))
5826
0
    return ONIGERR_INVALID_ARGUMENT;
5827
5828
956k
  (reg)->exact            = (UChar* )NULL;
5829
956k
  (reg)->chain            = (regex_t* )NULL;
5830
956k
  (reg)->p                = (UChar* )NULL;
5831
956k
  (reg)->name_table       = (void* )NULL;
5832
956k
  (reg)->repeat_range     = (OnigRepeatRange* )NULL;
5833
5834
956k
  if (ONIGENC_IS_UNDEF(enc))
5835
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5836
5837
956k
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5838
956k
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5839
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5840
0
  }
5841
5842
956k
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5843
0
    option |= syntax->options;
5844
0
    option &= ~ONIG_OPTION_SINGLELINE;
5845
0
  }
5846
956k
  else
5847
956k
    option |= syntax->options;
5848
5849
956k
  (reg)->enc              = enc;
5850
956k
  (reg)->options          = option;
5851
956k
  (reg)->syntax           = syntax;
5852
956k
  (reg)->optimize         = 0;
5853
5854
956k
  (reg)->alloc            = 0;
5855
956k
  (reg)->used             = 0;
5856
5857
956k
  (reg)->case_fold_flag   = case_fold_flag;
5858
956k
  return 0;
5859
956k
}
5860
5861
extern int
5862
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5863
          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5864
          const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5865
0
{
5866
0
  int r;
5867
5868
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5869
0
  if (r) return r;
5870
5871
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
5872
0
  return r;
5873
0
}
5874
5875
extern int
5876
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5877
    OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5878
    OnigErrorInfo* einfo)
5879
956k
{
5880
956k
  int r;
5881
5882
956k
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5883
956k
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5884
5885
956k
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5886
956k
  if (r) goto err;
5887
5888
956k
  r = onig_compile(*reg, pattern, pattern_end, einfo);
5889
956k
  if (r) {
5890
1.49k
  err:
5891
1.49k
    onig_free(*reg);
5892
1.49k
    *reg = NULL;
5893
1.49k
  }
5894
956k
  return r;
5895
956k
}
5896
5897
extern int
5898
onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5899
0
{
5900
0
  return onig_init();
5901
0
}
5902
5903
extern int
5904
onig_init(void)
5905
38.0k
{
5906
38.0k
  if (onig_inited != 0)
5907
35.7k
    return 0;
5908
5909
2.26k
  onig_inited = 1;
5910
5911
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5912
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
5913
#endif
5914
5915
2.26k
  onigenc_init();
5916
  /* onigenc_set_default_caseconv_table((UChar* )0); */
5917
5918
#ifdef ONIG_DEBUG_STATISTICS
5919
  onig_statistics_init();
5920
#endif
5921
5922
2.26k
  return 0;
5923
38.0k
}
5924
5925
5926
static OnigEndCallListItemType* EndCallTop;
5927
5928
extern void onig_add_end_call(void (*func)(void))
5929
0
{
5930
0
  OnigEndCallListItemType* item;
5931
5932
0
  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5933
0
  if (item == 0) return ;
5934
5935
0
  item->next = EndCallTop;
5936
0
  item->func = func;
5937
5938
0
  EndCallTop = item;
5939
0
}
5940
5941
static void
5942
exec_end_call_list(void)
5943
2.25k
{
5944
2.25k
  OnigEndCallListItemType* prev;
5945
2.25k
  void (*func)(void);
5946
5947
2.25k
  while (EndCallTop != 0) {
5948
0
    func = EndCallTop->func;
5949
0
    (*func)();
5950
5951
0
    prev = EndCallTop;
5952
0
    EndCallTop = EndCallTop->next;
5953
0
    xfree(prev);
5954
0
  }
5955
2.25k
}
5956
5957
extern int
5958
onig_end(void)
5959
2.25k
{
5960
2.25k
  exec_end_call_list();
5961
5962
#ifdef ONIG_DEBUG_STATISTICS
5963
  onig_print_statistics(stderr);
5964
#endif
5965
5966
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5967
  _CrtDumpMemoryLeaks();
5968
#endif
5969
5970
2.25k
  onig_inited = 0;
5971
5972
2.25k
  return 0;
5973
2.25k
}
5974
5975
extern int
5976
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5977
9.37M
{
5978
9.37M
  OnigCodePoint n, *data;
5979
9.37M
  OnigCodePoint low, high, x;
5980
5981
9.37M
  GET_CODE_POINT(n, p);
5982
9.37M
  data = (OnigCodePoint* )p;
5983
9.37M
  data++;
5984
5985
43.8M
  for (low = 0, high = n; low < high; ) {
5986
34.4M
    x = (low + high) >> 1;
5987
34.4M
    if (code > data[x * 2 + 1])
5988
17.4M
      low = x + 1;
5989
16.9M
    else
5990
16.9M
      high = x;
5991
34.4M
  }
5992
5993
9.37M
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5994
9.37M
}
5995
5996
extern int
5997
onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5998
34.3M
{
5999
34.3M
  int found;
6000
6001
34.3M
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6002
33.1M
    if (IS_NULL(cc->mbuf)) {
6003
23.7M
      found = 0;
6004
23.7M
    }
6005
9.37M
    else {
6006
9.37M
      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6007
9.37M
    }
6008
33.1M
  }
6009
1.21M
  else {
6010
1.21M
    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6011
1.21M
  }
6012
6013
34.3M
  if (IS_NCCLASS_NOT(cc))
6014
5.67M
    return !found;
6015
28.7M
  else
6016
28.7M
    return found;
6017
34.3M
}
6018
6019
extern int
6020
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6021
34.3M
{
6022
34.3M
  int len;
6023
6024
34.3M
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6025
0
    len = 2;
6026
0
  }
6027
34.3M
  else {
6028
34.3M
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6029
34.3M
  }
6030
34.3M
  return onig_is_code_in_cc_len(len, code, cc);
6031
34.3M
}
6032
6033
6034
#ifdef ONIG_DEBUG
6035
6036
/* arguments type */
6037
# define ARG_SPECIAL     -1
6038
# define ARG_NON          0
6039
# define ARG_RELADDR      1
6040
# define ARG_ABSADDR      2
6041
# define ARG_LENGTH       3
6042
# define ARG_MEMNUM       4
6043
# define ARG_OPTION       5
6044
# define ARG_STATE_CHECK  6
6045
6046
OnigOpInfoType OnigOpInfo[] = {
6047
  { OP_FINISH,            "finish",          ARG_NON },
6048
  { OP_END,               "end",             ARG_NON },
6049
  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
6050
  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
6051
  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
6052
  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
6053
  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
6054
  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
6055
  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
6056
  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
6057
  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
6058
  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
6059
  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
6060
  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
6061
  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
6062
  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
6063
  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
6064
  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
6065
  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
6066
  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
6067
  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
6068
  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
6069
  { OP_ANYCHAR,           "anychar",         ARG_NON },
6070
  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
6071
  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
6072
  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
6073
  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6074
  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6075
  { OP_WORD,                "word",            ARG_NON },
6076
  { OP_NOT_WORD,            "not-word",        ARG_NON },
6077
  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
6078
  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
6079
  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
6080
  { OP_WORD_END,            "word-end",        ARG_NON },
6081
  { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
6082
  { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
6083
  { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
6084
  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6085
  { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
6086
  { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
6087
  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
6088
  { OP_END_BUF,             "end-buf",         ARG_NON },
6089
  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
6090
  { OP_END_LINE,            "end-line",        ARG_NON },
6091
  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
6092
  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
6093
  { OP_BACKREF1,            "backref1",             ARG_NON },
6094
  { OP_BACKREF2,            "backref2",             ARG_NON },
6095
  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
6096
  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
6097
  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
6098
  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
6099
  { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
6100
  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
6101
  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
6102
  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
6103
  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
6104
  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
6105
  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
6106
  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
6107
  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
6108
  { OP_KEEP,                "keep",                 ARG_NON },
6109
  { OP_FAIL,                "fail",                 ARG_NON },
6110
  { OP_JUMP,                "jump",                 ARG_RELADDR },
6111
  { OP_PUSH,                "push",                 ARG_RELADDR },
6112
  { OP_POP,                 "pop",                  ARG_NON },
6113
  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
6114
  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
6115
  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
6116
  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
6117
  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
6118
  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
6119
  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
6120
  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
6121
  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
6122
  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
6123
  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
6124
  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
6125
  { OP_PUSH_POS,             "push-pos",             ARG_NON },
6126
  { OP_POP_POS,              "pop-pos",              ARG_NON },
6127
  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
6128
  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
6129
  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
6130
  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
6131
  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
6132
  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6133
  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6134
  { OP_PUSH_ABSENT_POS,      "push-absent-pos",      ARG_NON },
6135
  { OP_ABSENT,               "absent",               ARG_RELADDR },
6136
  { OP_ABSENT_END,           "absent-end",           ARG_NON },
6137
  { OP_CALL,                 "call",                 ARG_ABSADDR },
6138
  { OP_RETURN,               "return",               ARG_NON },
6139
  { OP_CONDITION,            "condition",            ARG_SPECIAL },
6140
  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
6141
  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6142
  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
6143
  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
6144
  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6145
    "state-check-anychar-ml*", ARG_STATE_CHECK },
6146
  { -1, "", ARG_NON }
6147
};
6148
6149
static const char*
6150
op2name(int opcode)
6151
{
6152
  int i;
6153
6154
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6155
    if (opcode == OnigOpInfo[i].opcode)
6156
      return OnigOpInfo[i].name;
6157
  }
6158
  return "";
6159
}
6160
6161
static int
6162
op2arg_type(int opcode)
6163
{
6164
  int i;
6165
6166
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6167
    if (opcode == OnigOpInfo[i].opcode)
6168
      return OnigOpInfo[i].arg_type;
6169
  }
6170
  return ARG_SPECIAL;
6171
}
6172
6173
# ifdef ONIG_DEBUG_PARSE_TREE
6174
static void
6175
Indent(FILE* f, int indent)
6176
{
6177
  int i;
6178
  for (i = 0; i < indent; i++) putc(' ', f);
6179
}
6180
# endif /* ONIG_DEBUG_PARSE_TREE */
6181
6182
static void
6183
p_string(FILE* f, ptrdiff_t len, UChar* s)
6184
{
6185
  fputs(":", f);
6186
  while (len-- > 0) { fputc(*s++, f); }
6187
}
6188
6189
static void
6190
p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6191
{
6192
  int x = len * mb_len;
6193
6194
  fprintf(f, ":%d:", len);
6195
  while (x-- > 0) { fputc(*s++, f); }
6196
}
6197
6198
extern void
6199
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6200
                              OnigEncoding enc)
6201
{
6202
  int i, n, arg_type;
6203
  RelAddrType addr;
6204
  LengthType len;
6205
  MemNumType mem;
6206
  StateCheckNumType scn;
6207
  OnigCodePoint code;
6208
  UChar *q;
6209
6210
  fprintf(f, "[%s", op2name(*bp));
6211
  arg_type = op2arg_type(*bp);
6212
  if (arg_type != ARG_SPECIAL) {
6213
    bp++;
6214
    switch (arg_type) {
6215
    case ARG_NON:
6216
      break;
6217
    case ARG_RELADDR:
6218
      GET_RELADDR_INC(addr, bp);
6219
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6220
      break;
6221
    case ARG_ABSADDR:
6222
      GET_ABSADDR_INC(addr, bp);
6223
      fprintf(f, ":(%d)", addr);
6224
      break;
6225
    case ARG_LENGTH:
6226
      GET_LENGTH_INC(len, bp);
6227
      fprintf(f, ":%d", len);
6228
      break;
6229
    case ARG_MEMNUM:
6230
      mem = *((MemNumType* )bp);
6231
      bp += SIZE_MEMNUM;
6232
      fprintf(f, ":%d", mem);
6233
      break;
6234
    case ARG_OPTION:
6235
      {
6236
  OnigOptionType option = *((OnigOptionType* )bp);
6237
  bp += SIZE_OPTION;
6238
  fprintf(f, ":%d", option);
6239
      }
6240
      break;
6241
6242
    case ARG_STATE_CHECK:
6243
      scn = *((StateCheckNumType* )bp);
6244
      bp += SIZE_STATE_CHECK_NUM;
6245
      fprintf(f, ":%d", scn);
6246
      break;
6247
    }
6248
  }
6249
  else {
6250
    switch (*bp++) {
6251
    case OP_EXACT1:
6252
    case OP_ANYCHAR_STAR_PEEK_NEXT:
6253
    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6254
      p_string(f, 1, bp++); break;
6255
    case OP_EXACT2:
6256
      p_string(f, 2, bp); bp += 2; break;
6257
    case OP_EXACT3:
6258
      p_string(f, 3, bp); bp += 3; break;
6259
    case OP_EXACT4:
6260
      p_string(f, 4, bp); bp += 4; break;
6261
    case OP_EXACT5:
6262
      p_string(f, 5, bp); bp += 5; break;
6263
    case OP_EXACTN:
6264
      GET_LENGTH_INC(len, bp);
6265
      p_len_string(f, len, 1, bp);
6266
      bp += len;
6267
      break;
6268
6269
    case OP_EXACTMB2N1:
6270
      p_string(f, 2, bp); bp += 2; break;
6271
    case OP_EXACTMB2N2:
6272
      p_string(f, 4, bp); bp += 4; break;
6273
    case OP_EXACTMB2N3:
6274
      p_string(f, 6, bp); bp += 6; break;
6275
    case OP_EXACTMB2N:
6276
      GET_LENGTH_INC(len, bp);
6277
      p_len_string(f, len, 2, bp);
6278
      bp += len * 2;
6279
      break;
6280
    case OP_EXACTMB3N:
6281
      GET_LENGTH_INC(len, bp);
6282
      p_len_string(f, len, 3, bp);
6283
      bp += len * 3;
6284
      break;
6285
    case OP_EXACTMBN:
6286
      {
6287
  int mb_len;
6288
6289
  GET_LENGTH_INC(mb_len, bp);
6290
  GET_LENGTH_INC(len, bp);
6291
  fprintf(f, ":%d:%d:", mb_len, len);
6292
  n = len * mb_len;
6293
  while (n-- > 0) { fputc(*bp++, f); }
6294
      }
6295
      break;
6296
6297
    case OP_EXACT1_IC:
6298
      len = enclen(enc, bp, bpend);
6299
      p_string(f, len, bp);
6300
      bp += len;
6301
      break;
6302
    case OP_EXACTN_IC:
6303
      GET_LENGTH_INC(len, bp);
6304
      p_len_string(f, len, 1, bp);
6305
      bp += len;
6306
      break;
6307
6308
    case OP_CCLASS:
6309
      n = bitset_on_num((BitSetRef )bp);
6310
      bp += SIZE_BITSET;
6311
      fprintf(f, ":%d", n);
6312
      break;
6313
6314
    case OP_CCLASS_NOT:
6315
      n = bitset_on_num((BitSetRef )bp);
6316
      bp += SIZE_BITSET;
6317
      fprintf(f, ":%d", n);
6318
      break;
6319
6320
    case OP_CCLASS_MB:
6321
    case OP_CCLASS_MB_NOT:
6322
      GET_LENGTH_INC(len, bp);
6323
      q = bp;
6324
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6325
      ALIGNMENT_RIGHT(q);
6326
# endif
6327
      GET_CODE_POINT(code, q);
6328
      bp += len;
6329
      fprintf(f, ":%d:%d", (int )code, len);
6330
      break;
6331
6332
    case OP_CCLASS_MIX:
6333
    case OP_CCLASS_MIX_NOT:
6334
      n = bitset_on_num((BitSetRef )bp);
6335
      bp += SIZE_BITSET;
6336
      GET_LENGTH_INC(len, bp);
6337
      q = bp;
6338
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6339
      ALIGNMENT_RIGHT(q);
6340
# endif
6341
      GET_CODE_POINT(code, q);
6342
      bp += len;
6343
      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6344
      break;
6345
6346
    case OP_BACKREFN_IC:
6347
      mem = *((MemNumType* )bp);
6348
      bp += SIZE_MEMNUM;
6349
      fprintf(f, ":%d", mem);
6350
      break;
6351
6352
    case OP_BACKREF_MULTI_IC:
6353
    case OP_BACKREF_MULTI:
6354
      fputs(" ", f);
6355
      GET_LENGTH_INC(len, bp);
6356
      for (i = 0; i < len; i++) {
6357
  GET_MEMNUM_INC(mem, bp);
6358
  if (i > 0) fputs(", ", f);
6359
  fprintf(f, "%d", mem);
6360
      }
6361
      break;
6362
6363
    case OP_BACKREF_WITH_LEVEL:
6364
      {
6365
  OnigOptionType option;
6366
  LengthType level;
6367
6368
  GET_OPTION_INC(option, bp);
6369
  fprintf(f, ":%d", option);
6370
  GET_LENGTH_INC(level, bp);
6371
  fprintf(f, ":%d", level);
6372
6373
  fputs(" ", f);
6374
  GET_LENGTH_INC(len, bp);
6375
  for (i = 0; i < len; i++) {
6376
    GET_MEMNUM_INC(mem, bp);
6377
    if (i > 0) fputs(", ", f);
6378
    fprintf(f, "%d", mem);
6379
  }
6380
      }
6381
      break;
6382
6383
    case OP_REPEAT:
6384
    case OP_REPEAT_NG:
6385
      {
6386
  mem = *((MemNumType* )bp);
6387
  bp += SIZE_MEMNUM;
6388
  addr = *((RelAddrType* )bp);
6389
  bp += SIZE_RELADDR;
6390
  fprintf(f, ":%d:%d", mem, addr);
6391
      }
6392
      break;
6393
6394
    case OP_PUSH_OR_JUMP_EXACT1:
6395
    case OP_PUSH_IF_PEEK_NEXT:
6396
      addr = *((RelAddrType* )bp);
6397
      bp += SIZE_RELADDR;
6398
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6399
      p_string(f, 1, bp);
6400
      bp += 1;
6401
      break;
6402
6403
    case OP_LOOK_BEHIND:
6404
      GET_LENGTH_INC(len, bp);
6405
      fprintf(f, ":%d", len);
6406
      break;
6407
6408
    case OP_PUSH_LOOK_BEHIND_NOT:
6409
      GET_RELADDR_INC(addr, bp);
6410
      GET_LENGTH_INC(len, bp);
6411
      fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6412
      break;
6413
6414
    case OP_STATE_CHECK_PUSH:
6415
    case OP_STATE_CHECK_PUSH_OR_JUMP:
6416
      scn = *((StateCheckNumType* )bp);
6417
      bp += SIZE_STATE_CHECK_NUM;
6418
      addr = *((RelAddrType* )bp);
6419
      bp += SIZE_RELADDR;
6420
      fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6421
      break;
6422
6423
    case OP_CONDITION:
6424
      GET_MEMNUM_INC(mem, bp);
6425
      GET_RELADDR_INC(addr, bp);
6426
      fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6427
      break;
6428
6429
    default:
6430
      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6431
        bp[-1]);
6432
    }
6433
  }
6434
  fputs("]", f);
6435
  if (nextp) *nextp = bp;
6436
}
6437
6438
# ifdef ONIG_DEBUG_COMPILE
6439
static void
6440
print_compiled_byte_code_list(FILE* f, regex_t* reg)
6441
{
6442
  int ncode;
6443
  UChar* bp = reg->p;
6444
  UChar* end = reg->p + reg->used;
6445
6446
  fprintf(f, "code length: %d", reg->used);
6447
6448
  ncode = -1;
6449
  while (bp < end) {
6450
    ncode++;
6451
    if (ncode % 5 == 0)
6452
      fprintf(f, "\n%ld:", bp - reg->p);
6453
    else
6454
      fprintf(f, " %ld:", bp - reg->p);
6455
    onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6456
  }
6457
6458
  fprintf(f, "\n");
6459
}
6460
# endif /* ONIG_DEBUG_COMPILE */
6461
6462
# ifdef ONIG_DEBUG_PARSE_TREE
6463
static void
6464
print_indent_tree(FILE* f, Node* node, int indent)
6465
{
6466
  int i, type, container_p = 0;
6467
  int add = 3;
6468
  UChar* p;
6469
6470
  Indent(f, indent);
6471
  if (IS_NULL(node)) {
6472
    fprintf(f, "ERROR: null node!!!\n");
6473
    exit (0);
6474
  }
6475
6476
  type = NTYPE(node);
6477
  switch (type) {
6478
  case NT_LIST:
6479
  case NT_ALT:
6480
    if (NTYPE(node) == NT_LIST)
6481
      fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6482
    else
6483
      fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6484
6485
    print_indent_tree(f, NCAR(node), indent + add);
6486
    while (IS_NOT_NULL(node = NCDR(node))) {
6487
      if (NTYPE(node) != type) {
6488
  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6489
  exit(0);
6490
      }
6491
      print_indent_tree(f, NCAR(node), indent + add);
6492
    }
6493
    break;
6494
6495
  case NT_STR:
6496
    fprintf(f, "<string%s:%"PRIxPTR">",
6497
      (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6498
    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6499
      if (*p >= 0x20 && *p < 0x7f)
6500
  fputc(*p, f);
6501
      else {
6502
  fprintf(f, " 0x%02x", *p);
6503
      }
6504
    }
6505
    break;
6506
6507
  case NT_CCLASS:
6508
    fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6509
    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6510
    if (NCCLASS(node)->mbuf) {
6511
      BBuf* bbuf = NCCLASS(node)->mbuf;
6512
      OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6513
      OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6514
      fprintf(f, "%d", *data++);
6515
      for (; data < end; data+=2) {
6516
  fprintf(f, ",");
6517
  fprintf(f, "%04x-%04x", data[0], data[1]);
6518
      }
6519
    }
6520
    break;
6521
6522
  case NT_CTYPE:
6523
    fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6524
    switch (NCTYPE(node)->ctype) {
6525
    case ONIGENC_CTYPE_WORD:
6526
      if (NCTYPE(node)->not != 0)
6527
  fputs("not word",       f);
6528
      else
6529
  fputs("word",           f);
6530
      break;
6531
6532
    default:
6533
      fprintf(f, "ERROR: undefined ctype.\n");
6534
      exit(0);
6535
    }
6536
    break;
6537
6538
  case NT_CANY:
6539
    fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6540
    break;
6541
6542
  case NT_ANCHOR:
6543
    fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6544
    switch (NANCHOR(node)->type) {
6545
    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6546
    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6547
    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6548
    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6549
    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6550
    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6551
6552
    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6553
    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6554
#  ifdef USE_WORD_BEGIN_END
6555
    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6556
    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6557
#  endif
6558
    case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
6559
    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
6560
    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
6561
    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6562
    case ANCHOR_KEEP:            fputs("keep",f);            break;
6563
6564
    default:
6565
      fprintf(f, "ERROR: undefined anchor type.\n");
6566
      break;
6567
    }
6568
    break;
6569
6570
  case NT_BREF:
6571
    {
6572
      int* p;
6573
      BRefNode* br = NBREF(node);
6574
      p = BACKREFS_P(br);
6575
      fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6576
      for (i = 0; i < br->back_num; i++) {
6577
  if (i > 0) fputs(", ", f);
6578
  fprintf(f, "%d", p[i]);
6579
      }
6580
    }
6581
    break;
6582
6583
#  ifdef USE_SUBEXP_CALL
6584
  case NT_CALL:
6585
    {
6586
      CallNode* cn = NCALL(node);
6587
      fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6588
      p_string(f, cn->name_end - cn->name, cn->name);
6589
    }
6590
    break;
6591
#  endif
6592
6593
  case NT_QTFR:
6594
    fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6595
      NQTFR(node)->lower, NQTFR(node)->upper,
6596
      (NQTFR(node)->greedy ? "" : "?"));
6597
    print_indent_tree(f, NQTFR(node)->target, indent + add);
6598
    break;
6599
6600
  case NT_ENCLOSE:
6601
    fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6602
    switch (NENCLOSE(node)->type) {
6603
    case ENCLOSE_OPTION:
6604
      fprintf(f, "option:%d", NENCLOSE(node)->option);
6605
      break;
6606
    case ENCLOSE_MEMORY:
6607
      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6608
      break;
6609
    case ENCLOSE_STOP_BACKTRACK:
6610
      fprintf(f, "stop-bt");
6611
      break;
6612
    case ENCLOSE_CONDITION:
6613
      fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6614
      break;
6615
    case ENCLOSE_ABSENT:
6616
      fprintf(f, "absent");
6617
      break;
6618
6619
    default:
6620
      break;
6621
    }
6622
    fprintf(f, "\n");
6623
    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6624
    break;
6625
6626
  default:
6627
    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6628
    break;
6629
  }
6630
6631
  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6632
      type != NT_ENCLOSE)
6633
    fprintf(f, "\n");
6634
6635
  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6636
6637
  fflush(f);
6638
}
6639
6640
static void
6641
print_tree(FILE* f, Node* node)
6642
{
6643
  print_indent_tree(f, node, 0);
6644
}
6645
# endif /* ONIG_DEBUG_PARSE_TREE */
6646
#endif /* ONIG_DEBUG */