Coverage Report

Created: 2026-03-22 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/fluent-bit/lib/onigmo/regcomp.c
Line
Count
Source
1
/**********************************************************************
2
  regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regparse.h"
32
33
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35
extern OnigCaseFoldType
36
onig_get_default_case_fold_flag(void)
37
0
{
38
0
  return OnigDefaultCaseFoldFlag;
39
0
}
40
41
extern int
42
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43
0
{
44
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
45
0
  return 0;
46
0
}
47
48
49
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51
#endif
52
53
#if 0
54
static UChar*
55
str_dup(UChar* s, UChar* end)
56
{
57
  ptrdiff_t len = end - s;
58
59
  if (len > 0) {
60
    UChar* r = (UChar* )xmalloc(len + 1);
61
    CHECK_NULL_RETURN(r);
62
    xmemcpy(r, s, len);
63
    r[len] = (UChar )0;
64
    return r;
65
  }
66
  else return NULL;
67
}
68
#endif
69
70
static void
71
swap_node(Node* a, Node* b)
72
1.56M
{
73
1.56M
  Node c;
74
1.56M
  c = *a; *a = *b; *b = c;
75
76
1.56M
  if (NTYPE(a) == NT_STR) {
77
16.7k
    StrNode* sn = NSTR(a);
78
16.7k
    if (sn->capa == 0) {
79
16.3k
      size_t len = sn->end - sn->s;
80
16.3k
      sn->s   = sn->buf;
81
16.3k
      sn->end = sn->s + len;
82
16.3k
    }
83
16.7k
  }
84
85
1.56M
  if (NTYPE(b) == NT_STR) {
86
19.8k
    StrNode* sn = NSTR(b);
87
19.8k
    if (sn->capa == 0) {
88
18.2k
      size_t len = sn->end - sn->s;
89
18.2k
      sn->s   = sn->buf;
90
18.2k
      sn->end = sn->s + len;
91
18.2k
    }
92
19.8k
  }
93
1.56M
}
94
95
static OnigDistance
96
distance_add(OnigDistance d1, OnigDistance d2)
97
47.9M
{
98
47.9M
  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99
16.6M
    return ONIG_INFINITE_DISTANCE;
100
31.2M
  else {
101
31.2M
    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102
1.95k
    else return ONIG_INFINITE_DISTANCE;
103
31.2M
  }
104
47.9M
}
105
106
static OnigDistance
107
distance_multiply(OnigDistance d, int m)
108
24.4M
{
109
24.4M
  if (m == 0) return 0;
110
111
22.1M
  if (d < ONIG_INFINITE_DISTANCE / m)
112
22.1M
    return d * m;
113
35.0k
  else
114
35.0k
    return ONIG_INFINITE_DISTANCE;
115
22.1M
}
116
117
static int
118
bitset_is_empty(BitSetRef bs)
119
17.6M
{
120
17.6M
  int i;
121
146M
  for (i = 0; i < BITSET_SIZE; i++) {
122
130M
    if (bs[i] != 0) return 0;
123
130M
  }
124
16.0M
  return 1;
125
17.6M
}
126
127
#ifdef ONIG_DEBUG
128
static int
129
bitset_on_num(BitSetRef bs)
130
{
131
  int i, n;
132
133
  n = 0;
134
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135
    if (BITSET_AT(bs, i)) n++;
136
  }
137
  return n;
138
}
139
#endif
140
141
extern int
142
onig_bbuf_init(BBuf* buf, OnigDistance size)
143
5.61M
{
144
5.61M
  if (size <= 0) {
145
0
    size   = 0;
146
0
    buf->p = NULL;
147
0
  }
148
5.61M
  else {
149
5.61M
    buf->p = (UChar* )xmalloc(size);
150
5.61M
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151
5.61M
  }
152
153
5.61M
  buf->alloc = (unsigned int )size;
154
5.61M
  buf->used  = 0;
155
5.61M
  return 0;
156
5.61M
}
157
158
159
#ifdef USE_SUBEXP_CALL
160
161
static int
162
unset_addr_list_init(UnsetAddrList* uslist, int size)
163
1.15k
{
164
1.15k
  UnsetAddr* p;
165
166
1.15k
  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167
1.15k
  CHECK_NULL_RETURN_MEMERR(p);
168
1.15k
  uslist->num   = 0;
169
1.15k
  uslist->alloc = size;
170
1.15k
  uslist->us    = p;
171
1.15k
  return 0;
172
1.15k
}
173
174
static void
175
unset_addr_list_end(UnsetAddrList* uslist)
176
1.15k
{
177
1.15k
  if (IS_NOT_NULL(uslist->us))
178
1.15k
    xfree(uslist->us);
179
1.15k
}
180
181
static int
182
unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183
5.08k
{
184
5.08k
  UnsetAddr* p;
185
5.08k
  int size;
186
187
5.08k
  if (uslist->num >= uslist->alloc) {
188
1.54k
    size = uslist->alloc * 2;
189
1.54k
    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190
1.54k
    CHECK_NULL_RETURN_MEMERR(p);
191
1.54k
    uslist->alloc = size;
192
1.54k
    uslist->us    = p;
193
1.54k
  }
194
195
5.08k
  uslist->us[uslist->num].offset = offset;
196
5.08k
  uslist->us[uslist->num].target = node;
197
5.08k
  uslist->num++;
198
5.08k
  return 0;
199
5.08k
}
200
#endif /* USE_SUBEXP_CALL */
201
202
203
static int
204
add_opcode(regex_t* reg, int opcode)
205
33.4M
{
206
33.4M
  BBUF_ADD1(reg, opcode);
207
33.4M
  return 0;
208
33.4M
}
209
210
#ifdef USE_COMBINATION_EXPLOSION_CHECK
211
static int
212
add_state_check_num(regex_t* reg, int num)
213
{
214
  StateCheckNumType n = (StateCheckNumType )num;
215
216
  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217
  return 0;
218
}
219
#endif
220
221
static int
222
add_rel_addr(regex_t* reg, int addr)
223
11.7M
{
224
11.7M
  RelAddrType ra = (RelAddrType )addr;
225
226
11.7M
  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227
11.7M
  return 0;
228
11.7M
}
229
230
static int
231
add_abs_addr(regex_t* reg, int addr)
232
7.33k
{
233
7.33k
  AbsAddrType ra = (AbsAddrType )addr;
234
235
7.33k
  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236
7.33k
  return 0;
237
7.33k
}
238
239
static int
240
add_length(regex_t* reg, OnigDistance len)
241
4.13M
{
242
4.13M
  LengthType l = (LengthType )len;
243
244
4.13M
  BBUF_ADD(reg, &l, SIZE_LENGTH);
245
4.13M
  return 0;
246
4.13M
}
247
248
static int
249
add_mem_num(regex_t* reg, int num)
250
2.08M
{
251
2.08M
  MemNumType n = (MemNumType )num;
252
253
2.08M
  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254
2.08M
  return 0;
255
2.08M
}
256
257
#if 0
258
static int
259
add_pointer(regex_t* reg, void* addr)
260
{
261
  PointerType ptr = (PointerType )addr;
262
263
  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264
  return 0;
265
}
266
#endif
267
268
static int
269
add_option(regex_t* reg, OnigOptionType option)
270
2.20k
{
271
2.20k
  BBUF_ADD(reg, &option, SIZE_OPTION);
272
2.20k
  return 0;
273
2.20k
}
274
275
static int
276
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277
11.7M
{
278
11.7M
  int r;
279
280
11.7M
  r = add_opcode(reg, opcode);
281
11.7M
  if (r) return r;
282
11.7M
  r = add_rel_addr(reg, addr);
283
11.7M
  return r;
284
11.7M
}
285
286
static int
287
add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288
7.26M
{
289
7.26M
  BBUF_ADD(reg, bytes, len);
290
7.26M
  return 0;
291
7.26M
}
292
293
static int
294
add_bitset(regex_t* reg, BitSetRef bs)
295
4.23M
{
296
4.23M
  BBUF_ADD(reg, bs, SIZE_BITSET);
297
4.23M
  return 0;
298
4.23M
}
299
300
static int
301
add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302
0
{
303
0
  int r;
304
0
305
0
  r = add_opcode(reg, opcode);
306
0
  if (r) return r;
307
0
  r = add_option(reg, option);
308
0
  return r;
309
0
}
310
311
static int compile_length_tree(Node* node, regex_t* reg);
312
static int compile_tree(Node* node, regex_t* reg);
313
314
315
#define IS_NEED_STR_LEN_OP_EXACT(op) \
316
10.7M
   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
317
10.7M
    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
318
319
static int
320
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321
10.7M
{
322
10.7M
  int op;
323
10.7M
  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325
10.7M
  if (ignore_case) {
326
116k
    switch (str_len) {
327
35.4k
    case 1:  op = OP_EXACT1_IC; break;
328
80.5k
    default: op = OP_EXACTN_IC; break;
329
116k
    }
330
116k
  }
331
10.6M
  else {
332
10.6M
    switch (mb_len) {
333
10.5M
    case 1:
334
10.5M
      switch (str_len) {
335
3.82M
      case 1:  op = OP_EXACT1; break;
336
1.11M
      case 2:  op = OP_EXACT2; break;
337
1.40M
      case 3:  op = OP_EXACT3; break;
338
407k
      case 4:  op = OP_EXACT4; break;
339
563k
      case 5:  op = OP_EXACT5; break;
340
3.25M
      default: op = OP_EXACTN; break;
341
10.5M
      }
342
10.5M
      break;
343
344
10.5M
    case 2:
345
51.5k
      switch (str_len) {
346
40.0k
      case 1:  op = OP_EXACTMB2N1; break;
347
455
      case 2:  op = OP_EXACTMB2N2; break;
348
1.31k
      case 3:  op = OP_EXACTMB2N3; break;
349
9.75k
      default: op = OP_EXACTMB2N;  break;
350
51.5k
      }
351
51.5k
      break;
352
353
51.5k
    case 3:
354
22.9k
      op = OP_EXACTMB3N;
355
22.9k
      break;
356
357
17.7k
    default:
358
17.7k
      op = OP_EXACTMBN;
359
17.7k
      break;
360
10.6M
    }
361
10.6M
  }
362
10.7M
  return op;
363
10.7M
}
364
365
static int
366
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367
2.28M
{
368
2.28M
  int r;
369
2.28M
  int saved_num_null_check = reg->num_null_check;
370
371
2.28M
  if (empty_info != 0) {
372
35.9k
    r = add_opcode(reg, OP_NULL_CHECK_START);
373
35.9k
    if (r) return r;
374
35.9k
    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375
35.9k
    if (r) return r;
376
35.9k
    reg->num_null_check++;
377
35.9k
  }
378
379
2.28M
  r = compile_tree(node, reg);
380
2.28M
  if (r) return r;
381
382
2.28M
  if (empty_info != 0) {
383
35.9k
    if (empty_info == NQ_TARGET_IS_EMPTY)
384
29.8k
      r = add_opcode(reg, OP_NULL_CHECK_END);
385
6.16k
    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386
5.44k
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387
721
    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388
721
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390
35.9k
    if (r) return r;
391
35.9k
    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392
35.9k
  }
393
2.28M
  return r;
394
2.28M
}
395
396
#ifdef USE_SUBEXP_CALL
397
static int
398
compile_call(CallNode* node, regex_t* reg)
399
5.08k
{
400
5.08k
  int r;
401
402
5.08k
  r = add_opcode(reg, OP_CALL);
403
5.08k
  if (r) return r;
404
5.08k
  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405
5.08k
                          node->target);
406
5.08k
  if (r) return r;
407
5.08k
  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408
5.08k
  return r;
409
5.08k
}
410
#endif
411
412
static int
413
compile_tree_n_times(Node* node, int n, regex_t* reg)
414
4.62M
{
415
4.62M
  int i, r;
416
417
7.32M
  for (i = 0; i < n; i++) {
418
2.69M
    r = compile_tree(node, reg);
419
2.69M
    if (r) return r;
420
2.69M
  }
421
4.62M
  return 0;
422
4.62M
}
423
424
static int
425
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426
                          regex_t* reg ARG_UNUSED, int ignore_case)
427
6.18M
{
428
6.18M
  int len;
429
6.18M
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431
6.18M
  len = SIZE_OPCODE;
432
433
6.18M
  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
434
6.18M
  if (IS_NEED_STR_LEN_OP_EXACT(op))
435
1.75M
    len += SIZE_LENGTH;
436
437
6.18M
  len += (int )byte_len;
438
6.18M
  return len;
439
6.18M
}
440
441
static int
442
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443
                   regex_t* reg, int ignore_case)
444
4.59M
{
445
4.59M
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446
4.59M
  add_opcode(reg, op);
447
448
4.59M
  if (op == OP_EXACTMBN)
449
7.66k
    add_length(reg, mb_len);
450
451
4.59M
  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452
1.63M
    if (op == OP_EXACTN_IC)
453
21.0k
      add_length(reg, byte_len);
454
1.61M
    else
455
1.61M
      add_length(reg, byte_len / mb_len);
456
1.63M
  }
457
458
4.59M
  add_bytes(reg, s, byte_len);
459
4.59M
  return 0;
460
4.59M
}
461
462
463
static int
464
compile_length_string_node(Node* node, regex_t* reg)
465
5.05M
{
466
5.05M
  int rlen, r, len, prev_len, blen, ambig;
467
5.05M
  OnigEncoding enc = reg->enc;
468
5.05M
  UChar *p, *prev;
469
5.05M
  StrNode* sn;
470
471
5.05M
  sn = NSTR(node);
472
5.05M
  if (sn->end <= sn->s)
473
328k
    return 0;
474
475
4.72M
  ambig = NSTRING_IS_AMBIG(node);
476
477
4.72M
  p = prev = sn->s;
478
4.72M
  prev_len = enclen(enc, p, sn->end);
479
4.72M
  p += prev_len;
480
4.72M
  blen = prev_len;
481
4.72M
  rlen = 0;
482
483
27.1M
  for (; p < sn->end; ) {
484
22.4M
    len = enclen(enc, p, sn->end);
485
22.4M
    if (len == prev_len || ambig) {
486
22.4M
      blen += len;
487
22.4M
    }
488
22.9k
    else {
489
22.9k
      r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490
22.9k
      rlen += r;
491
22.9k
      prev = p;
492
22.9k
      blen = len;
493
22.9k
      prev_len = len;
494
22.9k
    }
495
22.4M
    p += len;
496
22.4M
  }
497
4.72M
  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498
4.72M
  rlen += r;
499
4.72M
  return rlen;
500
5.05M
}
501
502
static int
503
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504
1.43M
{
505
1.43M
  if (sn->end <= sn->s)
506
0
    return 0;
507
508
1.43M
  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509
1.43M
}
510
511
static int
512
compile_string_node(Node* node, regex_t* reg)
513
4.44M
{
514
4.44M
  int r, len, prev_len, blen, ambig;
515
4.44M
  OnigEncoding enc = reg->enc;
516
4.44M
  UChar *p, *prev, *end;
517
4.44M
  StrNode* sn;
518
519
4.44M
  sn = NSTR(node);
520
4.44M
  if (sn->end <= sn->s)
521
121k
    return 0;
522
523
4.32M
  end = sn->end;
524
4.32M
  ambig = NSTRING_IS_AMBIG(node);
525
526
4.32M
  p = prev = sn->s;
527
4.32M
  prev_len = enclen(enc, p, end);
528
4.32M
  p += prev_len;
529
4.32M
  blen = prev_len;
530
531
34.0M
  for (; p < end; ) {
532
29.7M
    len = enclen(enc, p, end);
533
29.7M
    if (len == prev_len || ambig) {
534
29.7M
      blen += len;
535
29.7M
    }
536
21.3k
    else {
537
21.3k
      r = add_compile_string(prev, prev_len, blen, reg, ambig);
538
21.3k
      if (p + len > end) {
539
0
        return 0;
540
0
      }
541
21.3k
      if (r) return r;
542
543
21.3k
      prev  = p;
544
21.3k
      blen  = len;
545
21.3k
      prev_len = len;
546
21.3k
    }
547
548
29.7M
    p += len;
549
29.7M
  }
550
4.32M
  return add_compile_string(prev, prev_len, blen, reg, ambig);
551
4.32M
}
552
553
static int
554
compile_string_raw_node(StrNode* sn, regex_t* reg)
555
249k
{
556
249k
  if (sn->end <= sn->s)
557
0
    return 0;
558
559
249k
  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560
249k
}
561
562
static int
563
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
564
2.48M
{
565
2.48M
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566
2.48M
  add_length(reg, mbuf->used);
567
2.48M
  return add_bytes(reg, mbuf->p, mbuf->used);
568
#else
569
  int r, pad_size;
570
  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
571
572
  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573
  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575
576
  r = add_bytes(reg, mbuf->p, mbuf->used);
577
578
  /* padding for return value from compile_length_cclass_node() to be fix. */
579
  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581
  return r;
582
#endif
583
2.48M
}
584
585
static int
586
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
587
18.3M
{
588
18.3M
  int len;
589
590
18.3M
  if (IS_NULL(cc->mbuf)) {
591
3.21M
    len = SIZE_OPCODE + SIZE_BITSET;
592
3.21M
  }
593
15.1M
  else {
594
15.1M
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595
13.8M
      len = SIZE_OPCODE;
596
13.8M
    }
597
1.31M
    else {
598
1.31M
      len = SIZE_OPCODE + SIZE_BITSET;
599
1.31M
    }
600
15.1M
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601
15.1M
    len += SIZE_LENGTH + cc->mbuf->used;
602
#else
603
    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604
#endif
605
15.1M
  }
606
607
18.3M
  return len;
608
18.3M
}
609
610
static int
611
compile_cclass_node(CClassNode* cc, regex_t* reg)
612
6.47M
{
613
6.47M
  int r;
614
615
6.47M
  if (IS_NULL(cc->mbuf)) {
616
3.98M
    if (IS_NCCLASS_NOT(cc))
617
1.11M
      add_opcode(reg, OP_CCLASS_NOT);
618
2.87M
    else
619
2.87M
      add_opcode(reg, OP_CCLASS);
620
621
3.98M
    r = add_bitset(reg, cc->bs);
622
3.98M
  }
623
2.48M
  else {
624
2.48M
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
625
2.23M
      if (IS_NCCLASS_NOT(cc))
626
1.18k
  add_opcode(reg, OP_CCLASS_MB_NOT);
627
2.23M
      else
628
2.23M
  add_opcode(reg, OP_CCLASS_MB);
629
630
2.23M
      r = add_multi_byte_cclass(cc->mbuf, reg);
631
2.23M
    }
632
250k
    else {
633
250k
      if (IS_NCCLASS_NOT(cc))
634
291
  add_opcode(reg, OP_CCLASS_MIX_NOT);
635
250k
      else
636
250k
  add_opcode(reg, OP_CCLASS_MIX);
637
638
250k
      r = add_bitset(reg, cc->bs);
639
250k
      if (r) return r;
640
250k
      r = add_multi_byte_cclass(cc->mbuf, reg);
641
250k
    }
642
2.48M
  }
643
644
6.47M
  return r;
645
6.47M
}
646
647
static int
648
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
649
24.0k
{
650
24.0k
#define REPEAT_RANGE_ALLOC  4
651
652
24.0k
  OnigRepeatRange* p;
653
654
24.0k
  if (reg->repeat_range_alloc == 0) {
655
1.80k
    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
656
1.80k
    CHECK_NULL_RETURN_MEMERR(p);
657
1.80k
    reg->repeat_range = p;
658
1.80k
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
659
1.80k
  }
660
22.2k
  else if (reg->repeat_range_alloc <= id) {
661
5.46k
    int n;
662
5.46k
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
663
5.46k
    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
664
5.46k
                                    sizeof(OnigRepeatRange) * n);
665
5.46k
    CHECK_NULL_RETURN_MEMERR(p);
666
5.46k
    reg->repeat_range = p;
667
5.46k
    reg->repeat_range_alloc = n;
668
5.46k
  }
669
16.7k
  else {
670
16.7k
    p = reg->repeat_range;
671
16.7k
  }
672
673
24.0k
  p[id].lower = lower;
674
24.0k
  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
675
24.0k
  return 0;
676
24.0k
}
677
678
static int
679
compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
680
                          regex_t* reg)
681
24.0k
{
682
24.0k
  int r;
683
24.0k
  int num_repeat = reg->num_repeat;
684
685
24.0k
  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
686
24.0k
  if (r) return r;
687
24.0k
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
688
24.0k
  reg->num_repeat++;
689
24.0k
  if (r) return r;
690
24.0k
  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
691
24.0k
  if (r) return r;
692
693
24.0k
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
694
24.0k
  if (r) return r;
695
696
24.0k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
697
24.0k
  if (r) return r;
698
699
24.0k
  if (
700
24.0k
#ifdef USE_SUBEXP_CALL
701
24.0k
      reg->num_call > 0 ||
702
23.0k
#endif
703
23.0k
      IS_QUANTIFIER_IN_REPEAT(qn)) {
704
20.4k
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
705
20.4k
  }
706
3.64k
  else {
707
3.64k
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
708
3.64k
  }
709
24.0k
  if (r) return r;
710
24.0k
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
711
24.0k
  return r;
712
24.0k
}
713
714
static int
715
is_anychar_star_quantifier(QtfrNode* qn)
716
3.13M
{
717
3.13M
  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
718
2.73M
      NTYPE(qn->target) == NT_CANY)
719
636k
    return 1;
720
2.50M
  else
721
2.50M
    return 0;
722
3.13M
}
723
724
28.4M
#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
725
#define CKN_ON   (ckn > 0)
726
727
#ifdef USE_COMBINATION_EXPLOSION_CHECK
728
729
static int
730
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
731
{
732
  int len, mod_tlen, cklen;
733
  int ckn;
734
  int infinite = IS_REPEAT_INFINITE(qn->upper);
735
  int empty_info = qn->target_empty_info;
736
  int tlen = compile_length_tree(qn->target, reg);
737
738
  if (tlen < 0) return tlen;
739
740
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
741
742
  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
743
744
  /* anychar repeat */
745
  if (NTYPE(qn->target) == NT_CANY) {
746
    if (qn->greedy && infinite) {
747
      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
748
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
749
      else
750
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
751
    }
752
  }
753
754
  if (empty_info != 0)
755
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
756
  else
757
    mod_tlen = tlen;
758
759
  if (infinite && qn->lower <= 1) {
760
    if (qn->greedy) {
761
      if (qn->lower == 1)
762
  len = SIZE_OP_JUMP;
763
      else
764
  len = 0;
765
766
      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
767
    }
768
    else {
769
      if (qn->lower == 0)
770
  len = SIZE_OP_JUMP;
771
      else
772
  len = 0;
773
774
      len += mod_tlen + SIZE_OP_PUSH + cklen;
775
    }
776
  }
777
  else if (qn->upper == 0) {
778
    if (qn->is_referred != 0) /* /(?<n>..){0}/ */
779
      len = SIZE_OP_JUMP + tlen;
780
    else
781
      len = 0;
782
  }
783
  else if (qn->upper == 1 && qn->greedy) {
784
    if (qn->lower == 0) {
785
      if (CKN_ON) {
786
  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
787
      }
788
      else {
789
  len = SIZE_OP_PUSH + tlen;
790
      }
791
    }
792
    else {
793
      len = tlen;
794
    }
795
  }
796
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
797
    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
798
  }
799
  else {
800
    len = SIZE_OP_REPEAT_INC
801
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
802
    if (CKN_ON)
803
      len += SIZE_OP_STATE_CHECK;
804
  }
805
806
  return len;
807
}
808
809
static int
810
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
811
{
812
  int r, mod_tlen;
813
  int ckn;
814
  int infinite = IS_REPEAT_INFINITE(qn->upper);
815
  int empty_info = qn->target_empty_info;
816
  int tlen = compile_length_tree(qn->target, reg);
817
818
  if (tlen < 0) return tlen;
819
820
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
821
822
  if (is_anychar_star_quantifier(qn)) {
823
    r = compile_tree_n_times(qn->target, qn->lower, reg);
824
    if (r) return r;
825
    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
826
      if (IS_MULTILINE(reg->options))
827
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
828
      else
829
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
830
      if (r) return r;
831
      if (CKN_ON) {
832
  r = add_state_check_num(reg, ckn);
833
  if (r) return r;
834
      }
835
836
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
837
    }
838
    else {
839
      if (IS_MULTILINE(reg->options)) {
840
  r = add_opcode(reg, (CKN_ON ?
841
             OP_STATE_CHECK_ANYCHAR_ML_STAR
842
           : OP_ANYCHAR_ML_STAR));
843
      }
844
      else {
845
  r = add_opcode(reg, (CKN_ON ?
846
             OP_STATE_CHECK_ANYCHAR_STAR
847
           : OP_ANYCHAR_STAR));
848
      }
849
      if (r) return r;
850
      if (CKN_ON)
851
  r = add_state_check_num(reg, ckn);
852
853
      return r;
854
    }
855
  }
856
857
  if (empty_info != 0)
858
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
859
  else
860
    mod_tlen = tlen;
861
862
  if (infinite && qn->lower <= 1) {
863
    if (qn->greedy) {
864
      if (qn->lower == 1) {
865
  r = add_opcode_rel_addr(reg, OP_JUMP,
866
      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
867
  if (r) return r;
868
      }
869
870
      if (CKN_ON) {
871
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
872
  if (r) return r;
873
  r = add_state_check_num(reg, ckn);
874
  if (r) return r;
875
  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
876
      }
877
      else {
878
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
879
      }
880
      if (r) return r;
881
      r = compile_tree_empty_check(qn->target, reg, empty_info);
882
      if (r) return r;
883
      r = add_opcode_rel_addr(reg, OP_JUMP,
884
        -(mod_tlen + (int )SIZE_OP_JUMP
885
    + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
886
    }
887
    else {
888
      if (qn->lower == 0) {
889
  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
890
  if (r) return r;
891
      }
892
      r = compile_tree_empty_check(qn->target, reg, empty_info);
893
      if (r) return r;
894
      if (CKN_ON) {
895
  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
896
  if (r) return r;
897
  r = add_state_check_num(reg, ckn);
898
  if (r) return r;
899
  r = add_rel_addr(reg,
900
     -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
901
      }
902
      else
903
  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
904
    }
905
  }
906
  else if (qn->upper == 0) {
907
    if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
908
      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
909
      if (r) return r;
910
      r = compile_tree(qn->target, reg);
911
    }
912
    else
913
      r = 0;
914
  }
915
  else if (qn->upper == 1 && qn->greedy) {
916
    if (qn->lower == 0) {
917
      if (CKN_ON) {
918
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
919
  if (r) return r;
920
  r = add_state_check_num(reg, ckn);
921
  if (r) return r;
922
  r = add_rel_addr(reg, tlen);
923
      }
924
      else {
925
  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
926
      }
927
      if (r) return r;
928
    }
929
930
    r = compile_tree(qn->target, reg);
931
  }
932
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
933
    if (CKN_ON) {
934
      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
935
      if (r) return r;
936
      r = add_state_check_num(reg, ckn);
937
      if (r) return r;
938
      r = add_rel_addr(reg, SIZE_OP_JUMP);
939
    }
940
    else {
941
      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
942
    }
943
944
    if (r) return r;
945
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
946
    if (r) return r;
947
    r = compile_tree(qn->target, reg);
948
  }
949
  else {
950
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
951
    if (CKN_ON) {
952
      if (r) return r;
953
      r = add_opcode(reg, OP_STATE_CHECK);
954
      if (r) return r;
955
      r = add_state_check_num(reg, ckn);
956
    }
957
  }
958
  return r;
959
}
960
961
#else /* USE_COMBINATION_EXPLOSION_CHECK */
962
963
static int
964
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
965
33.0M
{
966
33.0M
  int len, mod_tlen;
967
33.0M
  int infinite = IS_REPEAT_INFINITE(qn->upper);
968
33.0M
  int empty_info = qn->target_empty_info;
969
33.0M
  int tlen = compile_length_tree(qn->target, reg);
970
971
33.0M
  if (tlen < 0) return tlen;
972
973
  /* anychar repeat */
974
33.0M
  if (NTYPE(qn->target) == NT_CANY) {
975
8.45k
    if (qn->greedy && infinite) {
976
7.09k
      if (IS_NOT_NULL(qn->next_head_exact))
977
662
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
978
6.43k
      else
979
6.43k
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
980
7.09k
    }
981
8.45k
  }
982
983
33.0M
  if (empty_info != 0)
984
20.1M
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
985
12.8M
  else
986
12.8M
    mod_tlen = tlen;
987
988
33.0M
  if (infinite &&
989
31.7M
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
990
31.7M
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
991
22.7M
      len = SIZE_OP_JUMP;
992
22.7M
    }
993
9.00M
    else {
994
9.00M
      len = tlen * qn->lower;
995
9.00M
    }
996
997
31.7M
    if (qn->greedy) {
998
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
999
      if (IS_NOT_NULL(qn->head_exact))
1000
  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1001
      else
1002
#endif
1003
31.7M
      if (IS_NOT_NULL(qn->next_head_exact))
1004
1.55M
  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1005
30.2M
      else
1006
30.2M
  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1007
31.7M
    }
1008
8.43k
    else
1009
8.43k
      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1010
31.7M
  }
1011
1.27M
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1012
2.16k
    len = SIZE_OP_JUMP + tlen;
1013
2.16k
  }
1014
1.27M
  else if (!infinite && qn->greedy &&
1015
1.25M
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1016
1.12M
                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1017
913k
    len = tlen * qn->lower;
1018
913k
    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1019
913k
  }
1020
359k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1021
2.23k
    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1022
2.23k
  }
1023
357k
  else {
1024
357k
    len = SIZE_OP_REPEAT_INC
1025
357k
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1026
357k
  }
1027
1028
33.0M
  return len;
1029
33.0M
}
1030
1031
static int
1032
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1033
3.13M
{
1034
3.13M
  int i, r, mod_tlen;
1035
3.13M
  int infinite = IS_REPEAT_INFINITE(qn->upper);
1036
3.13M
  int empty_info = qn->target_empty_info;
1037
3.13M
  int tlen = compile_length_tree(qn->target, reg);
1038
1039
3.13M
  if (tlen < 0) return tlen;
1040
1041
3.13M
  if (is_anychar_star_quantifier(qn)) {
1042
636k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1043
636k
    if (r) return r;
1044
636k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1045
175k
      if (IS_MULTILINE(reg->options))
1046
0
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1047
175k
      else
1048
175k
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1049
175k
      if (r) return r;
1050
175k
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1051
175k
    }
1052
461k
    else {
1053
461k
      if (IS_MULTILINE(reg->options))
1054
0
  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1055
461k
      else
1056
461k
  return add_opcode(reg, OP_ANYCHAR_STAR);
1057
461k
    }
1058
636k
  }
1059
1060
2.50M
  if (empty_info != 0)
1061
36.7k
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1062
2.46M
  else
1063
2.46M
    mod_tlen = tlen;
1064
1065
2.50M
  if (infinite &&
1066
2.25M
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1067
2.25M
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1068
38.0k
      if (qn->greedy) {
1069
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1070
  if (IS_NOT_NULL(qn->head_exact))
1071
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1072
  else
1073
#endif
1074
37.7k
  if (IS_NOT_NULL(qn->next_head_exact))
1075
2.02k
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1076
35.7k
  else
1077
35.7k
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1078
37.7k
      }
1079
286
      else {
1080
286
  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1081
286
      }
1082
38.0k
      if (r) return r;
1083
38.0k
    }
1084
2.21M
    else {
1085
2.21M
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1086
2.21M
      if (r) return r;
1087
2.21M
    }
1088
1089
2.25M
    if (qn->greedy) {
1090
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1091
      if (IS_NOT_NULL(qn->head_exact)) {
1092
  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093
           mod_tlen + SIZE_OP_JUMP);
1094
  if (r) return r;
1095
  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097
  if (r) return r;
1098
  r = add_opcode_rel_addr(reg, OP_JUMP,
1099
  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100
      }
1101
      else
1102
#endif
1103
2.10M
      if (IS_NOT_NULL(qn->next_head_exact)) {
1104
7.56k
  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1105
7.56k
        mod_tlen + SIZE_OP_JUMP);
1106
7.56k
  if (r) return r;
1107
7.56k
  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1108
7.56k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1109
7.56k
  if (r) return r;
1110
7.56k
  r = add_opcode_rel_addr(reg, OP_JUMP,
1111
7.56k
          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1112
7.56k
      }
1113
2.09M
      else {
1114
2.09M
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1115
2.09M
  if (r) return r;
1116
2.09M
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1117
2.09M
  if (r) return r;
1118
2.09M
  r = add_opcode_rel_addr(reg, OP_JUMP,
1119
2.09M
         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1120
2.09M
      }
1121
2.10M
    }
1122
155k
    else {
1123
155k
      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1124
155k
      if (r) return r;
1125
155k
      r = compile_tree_empty_check(qn->target, reg, empty_info);
1126
155k
      if (r) return r;
1127
155k
      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1128
155k
    }
1129
2.25M
  }
1130
244k
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1131
157
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1132
157
    if (r) return r;
1133
157
    r = compile_tree(qn->target, reg);
1134
157
  }
1135
243k
  else if (!infinite && qn->greedy &&
1136
240k
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1137
219k
                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1138
219k
    int n = qn->upper - qn->lower;
1139
1140
219k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1141
219k
    if (r) return r;
1142
1143
313k
    for (i = 0; i < n; i++) {
1144
93.8k
      r = add_opcode_rel_addr(reg, OP_PUSH,
1145
93.8k
         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1146
93.8k
      if (r) return r;
1147
93.8k
      r = compile_tree(qn->target, reg);
1148
93.8k
      if (r) return r;
1149
93.8k
    }
1150
219k
  }
1151
24.7k
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1152
695
    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1153
695
    if (r) return r;
1154
695
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1155
695
    if (r) return r;
1156
695
    r = compile_tree(qn->target, reg);
1157
695
  }
1158
24.0k
  else {
1159
24.0k
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1160
24.0k
  }
1161
2.50M
  return r;
1162
2.50M
}
1163
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164
1165
static int
1166
compile_length_option_node(EncloseNode* node, regex_t* reg)
1167
1.26M
{
1168
1.26M
  int tlen;
1169
1.26M
  OnigOptionType prev = reg->options;
1170
1171
1.26M
  reg->options = node->option;
1172
1.26M
  tlen = compile_length_tree(node->target, reg);
1173
1.26M
  reg->options = prev;
1174
1175
1.26M
  if (tlen < 0) return tlen;
1176
1177
845k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178
0
    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1179
0
           + tlen + SIZE_OP_SET_OPTION;
1180
0
  }
1181
845k
  else
1182
845k
    return tlen;
1183
845k
}
1184
1185
static int
1186
compile_option_node(EncloseNode* node, regex_t* reg)
1187
339k
{
1188
339k
  int r;
1189
339k
  OnigOptionType prev = reg->options;
1190
1191
339k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1192
0
    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1193
0
    if (r) return r;
1194
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1195
0
    if (r) return r;
1196
0
    r = add_opcode(reg, OP_FAIL);
1197
0
    if (r) return r;
1198
0
  }
1199
1200
339k
  reg->options = node->option;
1201
339k
  r = compile_tree(node->target, reg);
1202
339k
  reg->options = prev;
1203
1204
339k
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1205
0
    if (r) return r;
1206
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1207
0
  }
1208
339k
  return r;
1209
339k
}
1210
1211
static int
1212
compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1213
26.2M
{
1214
26.2M
  int len;
1215
26.2M
  int tlen;
1216
1217
26.2M
  if (node->type == ENCLOSE_OPTION)
1218
1.26M
    return compile_length_option_node(node, reg);
1219
1220
24.9M
  if (node->target) {
1221
24.9M
    tlen = compile_length_tree(node->target, reg);
1222
24.9M
    if (tlen < 0) return tlen;
1223
24.9M
  }
1224
0
  else
1225
0
    tlen = 0;
1226
1227
24.9M
  switch (node->type) {
1228
1.09M
  case ENCLOSE_MEMORY:
1229
1.09M
#ifdef USE_SUBEXP_CALL
1230
1.09M
    if (IS_ENCLOSE_CALLED(node)) {
1231
2.43k
      len = SIZE_OP_MEMORY_START_PUSH + tlen
1232
2.43k
    + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1233
2.43k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1234
57
  len += (IS_ENCLOSE_RECURSION(node)
1235
57
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1236
2.37k
      else
1237
2.37k
  len += (IS_ENCLOSE_RECURSION(node)
1238
2.37k
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1239
2.43k
    }
1240
1.08M
    else if (IS_ENCLOSE_RECURSION(node)) {
1241
3.95k
      len = SIZE_OP_MEMORY_START_PUSH;
1242
3.95k
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1243
3.95k
         ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1244
3.95k
    }
1245
1.08M
    else
1246
1.08M
#endif
1247
1.08M
    {
1248
1.08M
      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1249
1.08M
  len = SIZE_OP_MEMORY_START_PUSH;
1250
227
      else
1251
227
  len = SIZE_OP_MEMORY_START;
1252
1253
1.08M
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1254
1.08M
         ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1255
1.08M
    }
1256
1.09M
    break;
1257
1258
23.7M
  case ENCLOSE_STOP_BACKTRACK:
1259
23.7M
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1260
1.57M
      QtfrNode* qn = NQTFR(node->target);
1261
1.57M
      tlen = compile_length_tree(qn->target, reg);
1262
1.57M
      if (tlen < 0) return tlen;
1263
1264
1.57M
      len = tlen * qn->lower
1265
1.57M
    + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1266
1.57M
    }
1267
22.2M
    else {
1268
22.2M
      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1269
22.2M
    }
1270
23.7M
    break;
1271
1272
23.7M
  case ENCLOSE_CONDITION:
1273
6.04k
    len = SIZE_OP_CONDITION;
1274
6.04k
    if (NTYPE(node->target) == NT_ALT) {
1275
6.04k
      Node* x = node->target;
1276
1277
6.04k
      tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1278
6.04k
      if (tlen < 0) return tlen;
1279
6.04k
      len += tlen + SIZE_OP_JUMP;
1280
6.04k
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1281
6.04k
      x = NCDR(x);
1282
6.04k
      tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1283
6.04k
      if (tlen < 0) return tlen;
1284
6.04k
      len += tlen;
1285
6.04k
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1286
6.04k
    }
1287
0
    else {
1288
0
      return ONIGERR_PARSER_BUG;
1289
0
    }
1290
6.04k
    break;
1291
1292
62.8k
  case ENCLOSE_ABSENT:
1293
62.8k
    len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1294
62.8k
    break;
1295
1296
0
  default:
1297
0
    return ONIGERR_TYPE_BUG;
1298
0
    break;
1299
24.9M
  }
1300
1301
24.9M
  return len;
1302
24.9M
}
1303
1304
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1305
1306
static int
1307
compile_enclose_node(EncloseNode* node, regex_t* reg)
1308
2.95M
{
1309
2.95M
  int r, len;
1310
1311
2.95M
  if (node->type == ENCLOSE_OPTION)
1312
339k
    return compile_option_node(node, reg);
1313
1314
2.61M
  switch (node->type) {
1315
882k
  case ENCLOSE_MEMORY:
1316
882k
#ifdef USE_SUBEXP_CALL
1317
882k
    if (IS_ENCLOSE_CALLED(node)) {
1318
2.24k
      r = add_opcode(reg, OP_CALL);
1319
2.24k
      if (r) return r;
1320
2.24k
      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1321
2.24k
      node->state |= NST_ADDR_FIXED;
1322
2.24k
      r = add_abs_addr(reg, (int )node->call_addr);
1323
2.24k
      if (r) return r;
1324
2.24k
      len = compile_length_tree(node->target, reg);
1325
2.24k
      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1326
2.24k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1327
173
  len += (IS_ENCLOSE_RECURSION(node)
1328
173
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1329
2.07k
      else
1330
2.07k
  len += (IS_ENCLOSE_RECURSION(node)
1331
2.07k
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1332
1333
2.24k
      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1334
2.24k
      if (r) return r;
1335
2.24k
    }
1336
882k
#endif
1337
882k
    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1338
23.3k
      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1339
859k
    else
1340
859k
      r = add_opcode(reg, OP_MEMORY_START);
1341
882k
    if (r) return r;
1342
882k
    r = add_mem_num(reg, node->regnum);
1343
882k
    if (r) return r;
1344
882k
    r = compile_tree(node->target, reg);
1345
882k
    if (r) return r;
1346
882k
#ifdef USE_SUBEXP_CALL
1347
882k
    if (IS_ENCLOSE_CALLED(node)) {
1348
2.24k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1349
173
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350
173
           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1351
2.07k
      else
1352
2.07k
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1353
2.07k
           ? OP_MEMORY_END_REC : OP_MEMORY_END));
1354
1355
2.24k
      if (r) return r;
1356
2.24k
      r = add_mem_num(reg, node->regnum);
1357
2.24k
      if (r) return r;
1358
2.24k
      r = add_opcode(reg, OP_RETURN);
1359
2.24k
    }
1360
880k
    else if (IS_ENCLOSE_RECURSION(node)) {
1361
1.99k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1362
1.60k
  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1363
389
      else
1364
389
  r = add_opcode(reg, OP_MEMORY_END_REC);
1365
1.99k
      if (r) return r;
1366
1.99k
      r = add_mem_num(reg, node->regnum);
1367
1.99k
    }
1368
878k
    else
1369
878k
#endif
1370
878k
    {
1371
878k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1372
5.06k
  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1373
873k
      else
1374
873k
  r = add_opcode(reg, OP_MEMORY_END);
1375
878k
      if (r) return r;
1376
878k
      r = add_mem_num(reg, node->regnum);
1377
878k
    }
1378
882k
    break;
1379
1380
1.73M
  case ENCLOSE_STOP_BACKTRACK:
1381
1.73M
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1382
1.55M
      QtfrNode* qn = NQTFR(node->target);
1383
1.55M
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1384
1.55M
      if (r) return r;
1385
1386
1.55M
      len = compile_length_tree(qn->target, reg);
1387
1.55M
      if (len < 0) return len;
1388
1389
1.55M
      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1390
1.55M
      if (r) return r;
1391
1.55M
      r = compile_tree(qn->target, reg);
1392
1.55M
      if (r) return r;
1393
1.55M
      r = add_opcode(reg, OP_POP);
1394
1.55M
      if (r) return r;
1395
1.55M
      r = add_opcode_rel_addr(reg, OP_JUMP,
1396
1.55M
   -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1397
1.55M
    }
1398
174k
    else {
1399
174k
      r = add_opcode(reg, OP_PUSH_STOP_BT);
1400
174k
      if (r) return r;
1401
174k
      r = compile_tree(node->target, reg);
1402
174k
      if (r) return r;
1403
174k
      r = add_opcode(reg, OP_POP_STOP_BT);
1404
174k
    }
1405
1.73M
    break;
1406
1407
1.73M
  case ENCLOSE_CONDITION:
1408
782
    r = add_opcode(reg, OP_CONDITION);
1409
782
    if (r) return r;
1410
782
    r = add_mem_num(reg, node->regnum);
1411
782
    if (r) return r;
1412
1413
782
    if (NTYPE(node->target) == NT_ALT) {
1414
782
      Node* x = node->target;
1415
782
      int len2;
1416
1417
782
      len = compile_length_tree(NCAR(x), reg);  /* yes-node */
1418
782
      if (len < 0) return len;
1419
782
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1420
782
      x = NCDR(x);
1421
782
      len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1422
782
      if (len2 < 0) return len2;
1423
782
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1424
1425
782
      x = node->target;
1426
782
      r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1427
782
      if (r) return r;
1428
782
      r = compile_tree(NCAR(x), reg);   /* yes-node */
1429
782
      if (r) return r;
1430
782
      r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1431
782
      if (r) return r;
1432
782
      x = NCDR(x);
1433
782
      r = compile_tree(NCAR(x), reg);   /* no-node */
1434
782
    }
1435
0
    else {
1436
0
      return ONIGERR_PARSER_BUG;
1437
0
    }
1438
782
    break;
1439
1440
1.06k
  case ENCLOSE_ABSENT:
1441
1.06k
    len = compile_length_tree(node->target, reg);
1442
1.06k
    if (len < 0) return len;
1443
1444
1.06k
    r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1445
1.06k
    if (r) return r;
1446
1.06k
    r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1447
1.06k
    if (r) return r;
1448
1.06k
    r = compile_tree(node->target, reg);
1449
1.06k
    if (r) return r;
1450
1.06k
    r = add_opcode(reg, OP_ABSENT_END);
1451
1.06k
    break;
1452
1453
0
  default:
1454
0
    return ONIGERR_TYPE_BUG;
1455
0
    break;
1456
2.61M
  }
1457
1458
2.61M
  return r;
1459
2.61M
}
1460
1461
static int
1462
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1463
593k
{
1464
593k
  int len;
1465
593k
  int tlen = 0;
1466
1467
593k
  if (node->target) {
1468
21.0k
    tlen = compile_length_tree(node->target, reg);
1469
21.0k
    if (tlen < 0) return tlen;
1470
21.0k
  }
1471
1472
593k
  switch (node->type) {
1473
13.2k
  case ANCHOR_PREC_READ:
1474
13.2k
    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1475
13.2k
    break;
1476
227
  case ANCHOR_PREC_READ_NOT:
1477
227
    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1478
227
    break;
1479
7.49k
  case ANCHOR_LOOK_BEHIND:
1480
7.49k
    len = SIZE_OP_LOOK_BEHIND + tlen;
1481
7.49k
    break;
1482
114
  case ANCHOR_LOOK_BEHIND_NOT:
1483
114
    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1484
114
    break;
1485
1486
572k
  default:
1487
572k
    len = SIZE_OPCODE;
1488
572k
    break;
1489
593k
  }
1490
1491
593k
  return len;
1492
593k
}
1493
1494
static int
1495
compile_anchor_node(AnchorNode* node, regex_t* reg)
1496
2.87M
{
1497
2.87M
  int r, len;
1498
1499
2.87M
  switch (node->type) {
1500
256
  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1501
394
  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1502
1.92M
  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1503
854k
  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1504
3.41k
  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1505
278
  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1506
1507
87.7k
  case ANCHOR_WORD_BOUND:
1508
87.7k
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1509
87.7k
    else                      r = add_opcode(reg, OP_WORD_BOUND);
1510
87.7k
    break;
1511
72
  case ANCHOR_NOT_WORD_BOUND:
1512
72
    if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1513
11
    else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
1514
72
    break;
1515
0
#ifdef USE_WORD_BEGIN_END
1516
0
  case ANCHOR_WORD_BEGIN:
1517
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1518
0
    else                      r = add_opcode(reg, OP_WORD_BEGIN);
1519
0
    break;
1520
0
  case ANCHOR_WORD_END:
1521
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
1522
0
    else                      r = add_opcode(reg, OP_WORD_END);
1523
0
    break;
1524
0
#endif
1525
4
  case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
1526
1527
418
  case ANCHOR_PREC_READ:
1528
418
    r = add_opcode(reg, OP_PUSH_POS);
1529
418
    if (r) return r;
1530
418
    r = compile_tree(node->target, reg);
1531
418
    if (r) return r;
1532
418
    r = add_opcode(reg, OP_POP_POS);
1533
418
    break;
1534
1535
200
  case ANCHOR_PREC_READ_NOT:
1536
200
    len = compile_length_tree(node->target, reg);
1537
200
    if (len < 0) return len;
1538
198
    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1539
198
    if (r) return r;
1540
198
    r = compile_tree(node->target, reg);
1541
198
    if (r) return r;
1542
198
    r = add_opcode(reg, OP_FAIL_POS);
1543
198
    break;
1544
1545
1.15k
  case ANCHOR_LOOK_BEHIND:
1546
1.15k
    {
1547
1.15k
      int n;
1548
1.15k
      r = add_opcode(reg, OP_LOOK_BEHIND);
1549
1.15k
      if (r) return r;
1550
1.15k
      if (node->char_len < 0) {
1551
723
  r = get_char_length_tree(node->target, reg, &n);
1552
723
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1553
723
      }
1554
430
      else
1555
430
  n = node->char_len;
1556
1.15k
      r = add_length(reg, n);
1557
1.15k
      if (r) return r;
1558
1.15k
      r = compile_tree(node->target, reg);
1559
1.15k
    }
1560
0
    break;
1561
1562
55
  case ANCHOR_LOOK_BEHIND_NOT:
1563
55
    {
1564
55
      int n;
1565
55
      len = compile_length_tree(node->target, reg);
1566
55
      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1567
55
         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1568
55
      if (r) return r;
1569
55
      if (node->char_len < 0) {
1570
0
  r = get_char_length_tree(node->target, reg, &n);
1571
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1572
0
      }
1573
55
      else
1574
55
  n = node->char_len;
1575
55
      r = add_length(reg, n);
1576
55
      if (r) return r;
1577
55
      r = compile_tree(node->target, reg);
1578
55
      if (r) return r;
1579
55
      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1580
55
    }
1581
0
    break;
1582
1583
0
  default:
1584
0
    return ONIGERR_TYPE_BUG;
1585
0
    break;
1586
2.87M
  }
1587
1588
2.87M
  return r;
1589
2.87M
}
1590
1591
static int
1592
compile_length_tree(Node* node, regex_t* reg)
1593
93.1M
{
1594
93.1M
  int len, type, r;
1595
1596
93.1M
  type = NTYPE(node);
1597
93.1M
  switch (type) {
1598
5.08M
  case NT_LIST:
1599
5.08M
    len = 0;
1600
14.0M
    do {
1601
14.0M
      r = compile_length_tree(NCAR(node), reg);
1602
14.0M
      if (r < 0) return r;
1603
13.9M
      len += r;
1604
13.9M
    } while (IS_NOT_NULL(node = NCDR(node)));
1605
5.00M
    r = len;
1606
5.00M
    break;
1607
1608
1.99M
  case NT_ALT:
1609
1.99M
    {
1610
1.99M
      int n = 0;
1611
1.99M
      len = 0;
1612
7.80M
      do {
1613
7.80M
  r = compile_length_tree(NCAR(node), reg);
1614
7.80M
  if (r < 0) return r;
1615
7.45M
  len += r;
1616
7.45M
  n++;
1617
7.45M
      } while (IS_NOT_NULL(node = NCDR(node)));
1618
1.63M
      r = len;
1619
1.63M
      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1620
1.63M
    }
1621
0
    break;
1622
1623
6.49M
  case NT_STR:
1624
6.49M
    if (NSTRING_IS_RAW(node))
1625
1.43M
      r = compile_length_string_raw_node(NSTR(node), reg);
1626
5.05M
    else
1627
5.05M
      r = compile_length_string_node(node, reg);
1628
6.49M
    break;
1629
1630
18.3M
  case NT_CCLASS:
1631
18.3M
    r = compile_length_cclass_node(NCCLASS(node), reg);
1632
18.3M
    break;
1633
1634
9.12k
  case NT_CTYPE:
1635
1.38M
  case NT_CANY:
1636
1.38M
    r = SIZE_OPCODE;
1637
1.38M
    break;
1638
1639
5.93k
  case NT_BREF:
1640
5.93k
    {
1641
5.93k
      BRefNode* br = NBREF(node);
1642
1643
5.93k
#ifdef USE_BACKREF_WITH_LEVEL
1644
5.93k
      if (IS_BACKREF_NEST_LEVEL(br)) {
1645
2.37k
  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1646
2.37k
            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1647
2.37k
      }
1648
3.56k
      else
1649
3.56k
#endif
1650
3.56k
      if (br->back_num == 1) {
1651
1.85k
  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1652
1.85k
       ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1653
1.85k
      }
1654
1.70k
      else {
1655
1.70k
  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1656
1.70k
      }
1657
5.93k
    }
1658
5.93k
    break;
1659
1660
0
#ifdef USE_SUBEXP_CALL
1661
8.35k
  case NT_CALL:
1662
8.35k
    r = SIZE_OP_CALL;
1663
8.35k
    break;
1664
0
#endif
1665
1666
33.0M
  case NT_QTFR:
1667
33.0M
    r = compile_length_quantifier_node(NQTFR(node), reg);
1668
33.0M
    break;
1669
1670
26.2M
  case NT_ENCLOSE:
1671
26.2M
    r = compile_length_enclose_node(NENCLOSE(node), reg);
1672
26.2M
    break;
1673
1674
593k
  case NT_ANCHOR:
1675
593k
    r = compile_length_anchor_node(NANCHOR(node), reg);
1676
593k
    break;
1677
1678
0
  default:
1679
0
    return ONIGERR_TYPE_BUG;
1680
0
    break;
1681
93.1M
  }
1682
1683
92.7M
  return r;
1684
93.1M
}
1685
1686
static int
1687
compile_tree(Node* node, regex_t* reg)
1688
25.1M
{
1689
25.1M
  int n, type, len, pos, r = 0;
1690
1691
25.1M
  type = NTYPE(node);
1692
25.1M
  switch (type) {
1693
2.99M
  case NT_LIST:
1694
11.9M
    do {
1695
11.9M
      r = compile_tree(NCAR(node), reg);
1696
11.9M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1697
2.99M
    break;
1698
1699
905k
  case NT_ALT:
1700
905k
    {
1701
905k
      Node* x = node;
1702
905k
      len = 0;
1703
2.88M
      do {
1704
2.88M
  len += compile_length_tree(NCAR(x), reg);
1705
2.88M
  if (NCDR(x) != NULL) {
1706
1.97M
    len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1707
1.97M
  }
1708
2.88M
      } while (IS_NOT_NULL(x = NCDR(x)));
1709
905k
      pos = reg->used + len;  /* goal position */
1710
1711
2.88M
      do {
1712
2.88M
  len = compile_length_tree(NCAR(node), reg);
1713
2.88M
  if (IS_NOT_NULL(NCDR(node))) {
1714
1.97M
    r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1715
1.97M
    if (r) break;
1716
1.97M
  }
1717
2.88M
  r = compile_tree(NCAR(node), reg);
1718
2.88M
  if (r) break;
1719
2.88M
  if (IS_NOT_NULL(NCDR(node))) {
1720
1.97M
    len = pos - (reg->used + SIZE_OP_JUMP);
1721
1.97M
    r = add_opcode_rel_addr(reg, OP_JUMP, len);
1722
1.97M
    if (r) break;
1723
1.97M
  }
1724
2.88M
      } while (IS_NOT_NULL(node = NCDR(node)));
1725
905k
    }
1726
905k
    break;
1727
1728
4.69M
  case NT_STR:
1729
4.69M
    if (NSTRING_IS_RAW(node))
1730
249k
      r = compile_string_raw_node(NSTR(node), reg);
1731
4.44M
    else
1732
4.44M
      r = compile_string_node(node, reg);
1733
4.69M
    break;
1734
1735
6.47M
  case NT_CCLASS:
1736
6.47M
    r = compile_cclass_node(NCCLASS(node), reg);
1737
6.47M
    break;
1738
1739
3.03k
  case NT_CTYPE:
1740
3.03k
    {
1741
3.03k
      int op;
1742
1743
3.03k
      switch (NCTYPE(node)->ctype) {
1744
3.03k
      case ONIGENC_CTYPE_WORD:
1745
3.03k
  if (NCTYPE(node)->ascii_range != 0) {
1746
3.03k
    if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
1747
261
    else                         op = OP_ASCII_WORD;
1748
3.03k
  }
1749
0
  else {
1750
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1751
0
    else                         op = OP_WORD;
1752
0
  }
1753
3.03k
  break;
1754
0
      default:
1755
0
  return ONIGERR_TYPE_BUG;
1756
0
  break;
1757
3.03k
      }
1758
3.03k
      r = add_opcode(reg, op);
1759
3.03k
    }
1760
0
    break;
1761
1762
1.09M
  case NT_CANY:
1763
1.09M
    if (IS_MULTILINE(reg->options))
1764
124k
      r = add_opcode(reg, OP_ANYCHAR_ML);
1765
969k
    else
1766
969k
      r = add_opcode(reg, OP_ANYCHAR);
1767
1.09M
    break;
1768
1769
3.72k
  case NT_BREF:
1770
3.72k
    {
1771
3.72k
      BRefNode* br = NBREF(node);
1772
1773
3.72k
#ifdef USE_BACKREF_WITH_LEVEL
1774
3.72k
      if (IS_BACKREF_NEST_LEVEL(br)) {
1775
2.20k
  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1776
2.20k
  if (r) return r;
1777
2.20k
  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1778
2.20k
  if (r) return r;
1779
2.20k
  r = add_length(reg, br->nest_level);
1780
2.20k
  if (r) return r;
1781
1782
2.20k
  goto add_bacref_mems;
1783
2.20k
      }
1784
1.52k
      else
1785
1.52k
#endif
1786
1.52k
      if (br->back_num == 1) {
1787
420
  n = br->back_static[0];
1788
420
  if (IS_IGNORECASE(reg->options)) {
1789
9
    r = add_opcode(reg, OP_BACKREFN_IC);
1790
9
    if (r) return r;
1791
9
    r = add_mem_num(reg, n);
1792
9
  }
1793
411
  else {
1794
411
    switch (n) {
1795
206
    case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1796
117
    case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1797
88
    default:
1798
88
      r = add_opcode(reg, OP_BACKREFN);
1799
88
      if (r) return r;
1800
88
      r = add_mem_num(reg, n);
1801
88
      break;
1802
411
    }
1803
411
  }
1804
420
      }
1805
1.10k
      else {
1806
1.10k
  int i;
1807
1.10k
  int* p;
1808
1809
1.10k
  if (IS_IGNORECASE(reg->options)) {
1810
0
    r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1811
0
  }
1812
1.10k
  else {
1813
1.10k
    r = add_opcode(reg, OP_BACKREF_MULTI);
1814
1.10k
  }
1815
1.10k
  if (r) return r;
1816
1817
1.10k
#ifdef USE_BACKREF_WITH_LEVEL
1818
3.30k
      add_bacref_mems:
1819
3.30k
#endif
1820
3.30k
  r = add_length(reg, br->back_num);
1821
3.30k
  if (r) return r;
1822
3.30k
  p = BACKREFS_P(br);
1823
205k
  for (i = br->back_num - 1; i >= 0; i--) {
1824
201k
    r = add_mem_num(reg, p[i]);
1825
201k
    if (r) return r;
1826
201k
  }
1827
3.30k
      }
1828
3.72k
    }
1829
3.72k
    break;
1830
1831
3.72k
#ifdef USE_SUBEXP_CALL
1832
5.08k
  case NT_CALL:
1833
5.08k
    r = compile_call(NCALL(node), reg);
1834
5.08k
    break;
1835
0
#endif
1836
1837
3.13M
  case NT_QTFR:
1838
3.13M
    r = compile_quantifier_node(NQTFR(node), reg);
1839
3.13M
    break;
1840
1841
2.95M
  case NT_ENCLOSE:
1842
2.95M
    r = compile_enclose_node(NENCLOSE(node), reg);
1843
2.95M
    break;
1844
1845
2.87M
  case NT_ANCHOR:
1846
2.87M
    r = compile_anchor_node(NANCHOR(node), reg);
1847
2.87M
    break;
1848
1849
0
  default:
1850
#ifdef ONIG_DEBUG
1851
    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1852
#endif
1853
0
    break;
1854
25.1M
  }
1855
1856
25.1M
  return r;
1857
25.1M
}
1858
1859
#ifdef USE_NAMED_GROUP
1860
1861
static int
1862
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1863
131k
{
1864
131k
  int r = 0;
1865
131k
  Node* node = *plink;
1866
1867
131k
  switch (NTYPE(node)) {
1868
14.5k
  case NT_LIST:
1869
18.7k
  case NT_ALT:
1870
85.3k
    do {
1871
85.3k
      r = noname_disable_map(&(NCAR(node)), map, counter);
1872
85.3k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873
18.7k
    break;
1874
1875
26.3k
  case NT_QTFR:
1876
26.3k
    {
1877
26.3k
      Node** ptarget = &(NQTFR(node)->target);
1878
26.3k
      Node*  old = *ptarget;
1879
26.3k
      r = noname_disable_map(ptarget, map, counter);
1880
26.3k
      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1881
1.74k
  onig_reduce_nested_quantifier(node, *ptarget);
1882
1.74k
      }
1883
26.3k
    }
1884
26.3k
    break;
1885
1886
16.5k
  case NT_ENCLOSE:
1887
16.5k
    {
1888
16.5k
      EncloseNode* en = NENCLOSE(node);
1889
16.5k
      if (en->type == ENCLOSE_MEMORY) {
1890
11.7k
  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1891
4.97k
    (*counter)++;
1892
4.97k
    map[en->regnum].new_val = *counter;
1893
4.97k
    en->regnum = *counter;
1894
4.97k
  }
1895
6.73k
  else if (en->regnum != 0) {
1896
6.73k
    *plink = en->target;
1897
6.73k
    en->target = NULL_NODE;
1898
6.73k
    onig_node_free(node);
1899
6.73k
    r = noname_disable_map(plink, map, counter);
1900
6.73k
    break;
1901
6.73k
  }
1902
11.7k
      }
1903
9.81k
      r = noname_disable_map(&(en->target), map, counter);
1904
9.81k
    }
1905
0
    break;
1906
1907
8.80k
  case NT_ANCHOR:
1908
8.80k
    if (NANCHOR(node)->target)
1909
1
      r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1910
8.80k
    break;
1911
1912
60.8k
  default:
1913
60.8k
    break;
1914
131k
  }
1915
1916
131k
  return r;
1917
131k
}
1918
1919
static int
1920
renumber_node_backref(Node* node, GroupNumRemap* map)
1921
1.11k
{
1922
1.11k
  int i, pos, n, old_num;
1923
1.11k
  int *backs;
1924
1.11k
  BRefNode* bn = NBREF(node);
1925
1926
1.11k
  if (! IS_BACKREF_NAME_REF(bn))
1927
1
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1928
1929
1.11k
  old_num = bn->back_num;
1930
1.11k
  if (IS_NULL(bn->back_dynamic))
1931
99
    backs = bn->back_static;
1932
1.01k
  else
1933
1.01k
    backs = bn->back_dynamic;
1934
1935
200k
  for (i = 0, pos = 0; i < old_num; i++) {
1936
199k
    n = map[backs[i]].new_val;
1937
199k
    if (n > 0) {
1938
199k
      backs[pos] = n;
1939
199k
      pos++;
1940
199k
    }
1941
199k
  }
1942
1943
1.11k
  bn->back_num = pos;
1944
1.11k
  return 0;
1945
1.11k
}
1946
1947
static int
1948
renumber_by_map(Node* node, GroupNumRemap* map)
1949
119k
{
1950
119k
  int r = 0;
1951
1952
119k
  switch (NTYPE(node)) {
1953
14.1k
  case NT_LIST:
1954
18.0k
  case NT_ALT:
1955
82.1k
    do {
1956
82.1k
      r = renumber_by_map(NCAR(node), map);
1957
82.1k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1958
18.0k
    break;
1959
24.8k
  case NT_QTFR:
1960
24.8k
    r = renumber_by_map(NQTFR(node)->target, map);
1961
24.8k
    break;
1962
9.55k
  case NT_ENCLOSE:
1963
9.55k
    {
1964
9.55k
      EncloseNode* en = NENCLOSE(node);
1965
9.55k
      if (en->type == ENCLOSE_CONDITION)
1966
128
  en->regnum = map[en->regnum].new_val;
1967
9.55k
      r = renumber_by_map(en->target, map);
1968
9.55k
    }
1969
9.55k
    break;
1970
1971
1.11k
  case NT_BREF:
1972
1.11k
    r = renumber_node_backref(node, map);
1973
1.11k
    break;
1974
1975
8.80k
  case NT_ANCHOR:
1976
8.80k
    if (NANCHOR(node)->target)
1977
1
      r = renumber_by_map(NANCHOR(node)->target, map);
1978
8.80k
    break;
1979
1980
57.2k
  default:
1981
57.2k
    break;
1982
119k
  }
1983
1984
119k
  return r;
1985
119k
}
1986
1987
static int
1988
numbered_ref_check(Node* node)
1989
3.84M
{
1990
3.84M
  int r = 0;
1991
1992
3.84M
  switch (NTYPE(node)) {
1993
278k
  case NT_LIST:
1994
488k
  case NT_ALT:
1995
2.26M
    do {
1996
2.26M
      r = numbered_ref_check(NCAR(node));
1997
2.26M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998
488k
    break;
1999
637k
  case NT_QTFR:
2000
637k
    r = numbered_ref_check(NQTFR(node)->target);
2001
637k
    break;
2002
721k
  case NT_ENCLOSE:
2003
721k
    r = numbered_ref_check(NENCLOSE(node)->target);
2004
721k
    break;
2005
2006
3
  case NT_BREF:
2007
3
    if (! IS_BACKREF_NAME_REF(NBREF(node)))
2008
1
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2009
2
    break;
2010
2011
441k
  case NT_ANCHOR:
2012
441k
    if (NANCHOR(node)->target)
2013
4
      r = numbered_ref_check(NANCHOR(node)->target);
2014
441k
    break;
2015
2016
1.55M
  default:
2017
1.55M
    break;
2018
3.84M
  }
2019
2020
3.84M
  return r;
2021
3.84M
}
2022
2023
static int
2024
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025
3.06k
{
2026
3.06k
  int r, i, pos, counter;
2027
3.06k
  BitStatusType loc;
2028
3.06k
  GroupNumRemap* map;
2029
2030
3.06k
  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2031
3.06k
  CHECK_NULL_RETURN_MEMERR(map);
2032
14.7k
  for (i = 1; i <= env->num_mem; i++) {
2033
11.7k
    map[i].new_val = 0;
2034
11.7k
  }
2035
3.06k
  counter = 0;
2036
3.06k
  r = noname_disable_map(root, map, &counter);
2037
3.06k
  if (r != 0) return r;
2038
2039
3.06k
  r = renumber_by_map(*root, map);
2040
3.06k
  if (r != 0) return r;
2041
2042
14.4k
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043
11.3k
    if (map[i].new_val > 0) {
2044
4.97k
      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2045
4.97k
      pos++;
2046
4.97k
    }
2047
11.3k
  }
2048
2049
3.06k
  loc = env->capture_history;
2050
3.06k
  BIT_STATUS_CLEAR(env->capture_history);
2051
98.0k
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052
95.0k
    if (BIT_STATUS_AT(loc, i)) {
2053
0
      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054
0
    }
2055
95.0k
  }
2056
2057
3.06k
  env->num_mem = env->num_named;
2058
3.06k
  reg->num_mem = env->num_named;
2059
2060
3.06k
  return onig_renumber_name_table(reg, map);
2061
3.06k
}
2062
#endif /* USE_NAMED_GROUP */
2063
2064
#ifdef USE_SUBEXP_CALL
2065
static int
2066
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067
1.14k
{
2068
1.14k
  int i, offset;
2069
1.14k
  EncloseNode* en;
2070
1.14k
  AbsAddrType addr;
2071
2072
6.23k
  for (i = 0; i < uslist->num; i++) {
2073
5.08k
    en = NENCLOSE(uslist->us[i].target);
2074
5.08k
    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075
5.08k
    addr = en->call_addr;
2076
5.08k
    offset = uslist->us[i].offset;
2077
2078
5.08k
    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079
5.08k
  }
2080
1.14k
  return 0;
2081
1.14k
}
2082
#endif
2083
2084
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085
static int
2086
quantifiers_memory_node_info(Node* node)
2087
41.6M
{
2088
41.6M
  int r = 0;
2089
2090
41.6M
  switch (NTYPE(node)) {
2091
295k
  case NT_LIST:
2092
465k
  case NT_ALT:
2093
465k
    {
2094
465k
      int v;
2095
1.55M
      do {
2096
1.55M
  v = quantifiers_memory_node_info(NCAR(node));
2097
1.55M
  if (v > r) r = v;
2098
1.55M
      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099
465k
    }
2100
465k
    break;
2101
2102
0
# ifdef USE_SUBEXP_CALL
2103
3.92k
  case NT_CALL:
2104
3.92k
    if (IS_CALL_RECURSION(NCALL(node))) {
2105
571
      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106
571
    }
2107
3.35k
    else
2108
3.35k
      r = quantifiers_memory_node_info(NCALL(node)->target);
2109
3.35k
    break;
2110
3.35k
# endif
2111
2112
20.2M
  case NT_QTFR:
2113
20.2M
    {
2114
20.2M
      QtfrNode* qn = NQTFR(node);
2115
20.2M
      if (qn->upper != 0) {
2116
20.2M
  r = quantifiers_memory_node_info(qn->target);
2117
20.2M
      }
2118
20.2M
    }
2119
20.2M
    break;
2120
2121
19.8M
  case NT_ENCLOSE:
2122
19.8M
    {
2123
19.8M
      EncloseNode* en = NENCLOSE(node);
2124
19.8M
      switch (en->type) {
2125
4.49k
      case ENCLOSE_MEMORY:
2126
4.49k
  return NQ_TARGET_IS_EMPTY_MEM;
2127
0
  break;
2128
2129
112k
      case ENCLOSE_OPTION:
2130
19.7M
      case ENCLOSE_STOP_BACKTRACK:
2131
19.7M
      case ENCLOSE_CONDITION:
2132
19.7M
      case ENCLOSE_ABSENT:
2133
19.7M
  r = quantifiers_memory_node_info(en->target);
2134
19.7M
  break;
2135
0
      default:
2136
0
  break;
2137
19.8M
      }
2138
19.8M
    }
2139
19.7M
    break;
2140
2141
19.7M
  case NT_BREF:
2142
144k
  case NT_STR:
2143
145k
  case NT_CTYPE:
2144
1.04M
  case NT_CCLASS:
2145
1.10M
  case NT_CANY:
2146
1.12M
  case NT_ANCHOR:
2147
1.12M
  default:
2148
1.12M
    break;
2149
41.6M
  }
2150
2151
41.6M
  return r;
2152
41.6M
}
2153
#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155
static int
2156
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157
45.4M
{
2158
45.4M
  OnigDistance tmin;
2159
45.4M
  int r = 0;
2160
2161
45.4M
  *min = 0;
2162
45.4M
  switch (NTYPE(node)) {
2163
1.33k
  case NT_BREF:
2164
1.33k
    {
2165
1.33k
      int i;
2166
1.33k
      int* backs;
2167
1.33k
      Node** nodes = SCANENV_MEM_NODES(env);
2168
1.33k
      BRefNode* br = NBREF(node);
2169
1.33k
      if (br->state & NST_RECURSION) break;
2170
2171
799
      backs = BACKREFS_P(br);
2172
799
      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2173
791
      r = get_min_match_length(nodes[backs[0]], min, env);
2174
791
      if (r != 0) break;
2175
90.7k
      for (i = 1; i < br->back_num; i++) {
2176
89.9k
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2177
89.9k
  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178
89.9k
  if (r != 0) break;
2179
89.9k
  if (*min > tmin) *min = tmin;
2180
89.9k
      }
2181
791
    }
2182
791
    break;
2183
2184
791
#ifdef USE_SUBEXP_CALL
2185
6.31k
  case NT_CALL:
2186
6.31k
    if (IS_CALL_RECURSION(NCALL(node))) {
2187
1.70k
      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188
1.70k
      if (IS_ENCLOSE_MIN_FIXED(en))
2189
223
  *min = en->min_len;
2190
1.70k
    }
2191
4.60k
    else
2192
4.60k
      r = get_min_match_length(NCALL(node)->target, min, env);
2193
6.31k
    break;
2194
0
#endif
2195
2196
508k
  case NT_LIST:
2197
1.33M
    do {
2198
1.33M
      r = get_min_match_length(NCAR(node), &tmin, env);
2199
1.33M
      if (r == 0) *min += tmin;
2200
1.33M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201
508k
    break;
2202
2203
152k
  case NT_ALT:
2204
152k
    {
2205
152k
      Node *x, *y;
2206
152k
      y = node;
2207
660k
      do {
2208
660k
  x = NCAR(y);
2209
660k
  r = get_min_match_length(x, &tmin, env);
2210
660k
  if (r != 0) break;
2211
659k
  if (y == node) *min = tmin;
2212
506k
  else if (*min > tmin) *min = tmin;
2213
659k
      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214
152k
    }
2215
152k
    break;
2216
2217
555k
  case NT_STR:
2218
555k
    {
2219
555k
      StrNode* sn = NSTR(node);
2220
555k
      *min = sn->end - sn->s;
2221
555k
    }
2222
555k
    break;
2223
2224
1.67k
  case NT_CTYPE:
2225
1.67k
    *min = 1;
2226
1.67k
    break;
2227
2228
4.14M
  case NT_CCLASS:
2229
4.98M
  case NT_CANY:
2230
4.98M
    *min = 1;
2231
4.98M
    break;
2232
2233
19.9M
  case NT_QTFR:
2234
19.9M
    {
2235
19.9M
      QtfrNode* qn = NQTFR(node);
2236
2237
19.9M
      if (qn->lower > 0) {
2238
19.4M
  r = get_min_match_length(qn->target, min, env);
2239
19.4M
  if (r == 0)
2240
19.4M
    *min = distance_multiply(*min, qn->lower);
2241
19.4M
      }
2242
19.9M
    }
2243
19.9M
    break;
2244
2245
19.3M
  case NT_ENCLOSE:
2246
19.3M
    {
2247
19.3M
      EncloseNode* en = NENCLOSE(node);
2248
19.3M
      switch (en->type) {
2249
207k
      case ENCLOSE_MEMORY:
2250
207k
        if (IS_ENCLOSE_MIN_FIXED(en))
2251
194k
          *min = en->min_len;
2252
13.2k
        else {
2253
13.2k
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254
0
      *min = 0;  /* recursive */
2255
13.2k
    else {
2256
13.2k
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2257
13.2k
      r = get_min_match_length(en->target, min, env);
2258
13.2k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2259
13.2k
      if (r == 0) {
2260
13.2k
        en->min_len = *min;
2261
13.2k
        SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2262
13.2k
      }
2263
13.2k
    }
2264
13.2k
        }
2265
207k
        break;
2266
2267
101k
      case ENCLOSE_OPTION:
2268
19.0M
      case ENCLOSE_STOP_BACKTRACK:
2269
19.0M
      case ENCLOSE_CONDITION:
2270
19.0M
  r = get_min_match_length(en->target, min, env);
2271
19.0M
  break;
2272
2273
739
      case ENCLOSE_ABSENT:
2274
739
  break;
2275
19.3M
      }
2276
19.3M
    }
2277
19.3M
    break;
2278
2279
19.3M
  case NT_ANCHOR:
2280
8.33k
  default:
2281
8.33k
    break;
2282
45.4M
  }
2283
2284
45.4M
  return r;
2285
45.4M
}
2286
2287
static int
2288
get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289
227k
{
2290
227k
  OnigDistance tmax;
2291
227k
  int r = 0;
2292
2293
227k
  *max = 0;
2294
227k
  switch (NTYPE(node)) {
2295
4.91k
  case NT_LIST:
2296
19.1k
    do {
2297
19.1k
      r = get_max_match_length(NCAR(node), &tmax, env);
2298
19.1k
      if (r == 0)
2299
19.1k
  *max = distance_add(*max, tmax);
2300
19.1k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301
4.91k
    break;
2302
2303
1.72k
  case NT_ALT:
2304
7.76k
    do {
2305
7.76k
      r = get_max_match_length(NCAR(node), &tmax, env);
2306
7.76k
      if (r == 0 && *max < tmax) *max = tmax;
2307
7.76k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308
1.72k
    break;
2309
2310
9.98k
  case NT_STR:
2311
9.98k
    {
2312
9.98k
      StrNode* sn = NSTR(node);
2313
9.98k
      *max = sn->end - sn->s;
2314
9.98k
    }
2315
9.98k
    break;
2316
2317
0
  case NT_CTYPE:
2318
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319
0
    break;
2320
2321
9.12k
  case NT_CCLASS:
2322
11.0k
  case NT_CANY:
2323
11.0k
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324
11.0k
    break;
2325
2326
997
  case NT_BREF:
2327
997
    {
2328
997
      int i;
2329
997
      int* backs;
2330
997
      Node** nodes = SCANENV_MEM_NODES(env);
2331
997
      BRefNode* br = NBREF(node);
2332
997
      if (br->state & NST_RECURSION) {
2333
465
  *max = ONIG_INFINITE_DISTANCE;
2334
465
  break;
2335
465
      }
2336
532
      backs = BACKREFS_P(br);
2337
90.7k
      for (i = 0; i < br->back_num; i++) {
2338
90.2k
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2339
90.2k
  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340
90.2k
  if (r != 0) break;
2341
90.2k
  if (*max < tmax) *max = tmax;
2342
90.2k
      }
2343
532
    }
2344
532
    break;
2345
2346
532
#ifdef USE_SUBEXP_CALL
2347
532
  case NT_CALL:
2348
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2349
0
      r = get_max_match_length(NCALL(node)->target, max, env);
2350
0
    else
2351
0
      *max = ONIG_INFINITE_DISTANCE;
2352
0
    break;
2353
0
#endif
2354
2355
10.5k
  case NT_QTFR:
2356
10.5k
    {
2357
10.5k
      QtfrNode* qn = NQTFR(node);
2358
2359
10.5k
      if (qn->upper != 0) {
2360
10.5k
  r = get_max_match_length(qn->target, max, env);
2361
10.5k
  if (r == 0 && *max != 0) {
2362
9.94k
    if (! IS_REPEAT_INFINITE(qn->upper))
2363
3.24k
      *max = distance_multiply(*max, qn->upper);
2364
6.70k
    else
2365
6.70k
      *max = ONIG_INFINITE_DISTANCE;
2366
9.94k
  }
2367
10.5k
      }
2368
10.5k
    }
2369
10.5k
    break;
2370
2371
188k
  case NT_ENCLOSE:
2372
188k
    {
2373
188k
      EncloseNode* en = NENCLOSE(node);
2374
188k
      switch (en->type) {
2375
184k
      case ENCLOSE_MEMORY:
2376
184k
  if (IS_ENCLOSE_MAX_FIXED(en))
2377
181k
    *max = en->max_len;
2378
3.12k
  else {
2379
3.12k
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2380
0
      *max = ONIG_INFINITE_DISTANCE;
2381
3.12k
    else {
2382
3.12k
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2383
3.12k
      r = get_max_match_length(en->target, max, env);
2384
3.12k
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2385
3.12k
      if (r == 0) {
2386
3.12k
        en->max_len = *max;
2387
3.12k
        SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2388
3.12k
      }
2389
3.12k
    }
2390
3.12k
  }
2391
184k
  break;
2392
2393
1.13k
      case ENCLOSE_OPTION:
2394
3.49k
      case ENCLOSE_STOP_BACKTRACK:
2395
3.49k
      case ENCLOSE_CONDITION:
2396
3.49k
  r = get_max_match_length(en->target, max, env);
2397
3.49k
  break;
2398
2399
177
      case ENCLOSE_ABSENT:
2400
177
  break;
2401
188k
      }
2402
188k
    }
2403
188k
    break;
2404
2405
188k
  case NT_ANCHOR:
2406
110
  default:
2407
110
    break;
2408
227k
  }
2409
2410
227k
  return r;
2411
227k
}
2412
2413
946
#define GET_CHAR_LEN_VARLEN           -1
2414
1.86k
#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2415
2416
/* fixed size pattern node only */
2417
static int
2418
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419
30.0k
{
2420
30.0k
  int tlen;
2421
30.0k
  int r = 0;
2422
2423
30.0k
  level++;
2424
30.0k
  *len = 0;
2425
30.0k
  switch (NTYPE(node)) {
2426
2.92k
  case NT_LIST:
2427
9.05k
    do {
2428
9.05k
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429
9.05k
      if (r == 0)
2430
8.74k
  *len = (int )distance_add(*len, tlen);
2431
9.05k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432
2.92k
    break;
2433
2434
4.59k
  case NT_ALT:
2435
4.59k
    {
2436
4.59k
      int tlen2;
2437
4.59k
      int varlen = 0;
2438
2439
4.59k
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440
12.9k
      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441
8.32k
  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442
8.32k
  if (r == 0) {
2443
5.10k
    if (tlen != tlen2)
2444
3.04k
      varlen = 1;
2445
5.10k
  }
2446
8.32k
      }
2447
4.59k
      if (r == 0) {
2448
1.36k
  if (varlen != 0) {
2449
937
    if (level == 1)
2450
932
      r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2451
5
    else
2452
5
      r = GET_CHAR_LEN_VARLEN;
2453
937
  }
2454
431
  else
2455
431
    *len = tlen;
2456
1.36k
      }
2457
4.59k
    }
2458
4.59k
    break;
2459
2460
12.6k
  case NT_STR:
2461
12.6k
    {
2462
12.6k
      StrNode* sn = NSTR(node);
2463
12.6k
      UChar *s = sn->s;
2464
107k
      while (s < sn->end) {
2465
94.7k
  s += enclen(reg->enc, s, sn->end);
2466
94.7k
  (*len)++;
2467
94.7k
      }
2468
12.6k
    }
2469
12.6k
    break;
2470
2471
1.73k
  case NT_QTFR:
2472
1.73k
    {
2473
1.73k
      QtfrNode* qn = NQTFR(node);
2474
1.73k
      if (qn->lower == qn->upper) {
2475
1.73k
  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476
1.73k
  if (r == 0)
2477
1.73k
    *len = (int )distance_multiply(tlen, qn->lower);
2478
1.73k
      }
2479
2
      else
2480
2
  r = GET_CHAR_LEN_VARLEN;
2481
1.73k
    }
2482
1.73k
    break;
2483
2484
0
#ifdef USE_SUBEXP_CALL
2485
1
  case NT_CALL:
2486
1
    if (! IS_CALL_RECURSION(NCALL(node)))
2487
1
      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488
0
    else
2489
0
      r = GET_CHAR_LEN_VARLEN;
2490
1
    break;
2491
0
#endif
2492
2493
204
  case NT_CTYPE:
2494
204
    *len = 1;
2495
204
    break;
2496
2497
0
  case NT_CCLASS:
2498
537
  case NT_CANY:
2499
537
    *len = 1;
2500
537
    break;
2501
2502
4.35k
  case NT_ENCLOSE:
2503
4.35k
    {
2504
4.35k
      EncloseNode* en = NENCLOSE(node);
2505
4.35k
      switch (en->type) {
2506
1.13k
      case ENCLOSE_MEMORY:
2507
1.13k
#ifdef USE_SUBEXP_CALL
2508
1.13k
  if (IS_ENCLOSE_CLEN_FIXED(en))
2509
3
    *len = en->char_len;
2510
1.13k
  else {
2511
1.13k
    r = get_char_length_tree1(en->target, reg, len, level);
2512
1.13k
    if (r == 0) {
2513
966
      en->char_len = *len;
2514
966
      SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2515
966
    }
2516
1.13k
  }
2517
1.13k
  break;
2518
0
#endif
2519
3.09k
      case ENCLOSE_OPTION:
2520
3.09k
      case ENCLOSE_STOP_BACKTRACK:
2521
3.09k
      case ENCLOSE_CONDITION:
2522
3.09k
  r = get_char_length_tree1(en->target, reg, len, level);
2523
3.09k
  break;
2524
128
      case ENCLOSE_ABSENT:
2525
128
      default:
2526
128
  break;
2527
4.35k
      }
2528
4.35k
    }
2529
4.35k
    break;
2530
2531
4.35k
  case NT_ANCHOR:
2532
3.06k
    break;
2533
2534
0
  default:
2535
0
    r = GET_CHAR_LEN_VARLEN;
2536
0
    break;
2537
30.0k
  }
2538
2539
30.0k
  return r;
2540
30.0k
}
2541
2542
static int
2543
get_char_length_tree(Node* node, regex_t* reg, int* len)
2544
2.14k
{
2545
2.14k
  return get_char_length_tree1(node, reg, len, 0);
2546
2.14k
}
2547
2548
/* x is not included y ==>  1 : 0 */
2549
static int
2550
is_not_included(Node* x, Node* y, regex_t* reg)
2551
1.54M
{
2552
1.54M
  int i;
2553
1.54M
  OnigDistance len;
2554
1.54M
  OnigCodePoint code;
2555
1.54M
  UChar *p;
2556
1.54M
  int ytype;
2557
2558
3.07M
 retry:
2559
3.07M
  ytype = NTYPE(y);
2560
3.07M
  switch (NTYPE(x)) {
2561
1.08k
  case NT_CTYPE:
2562
1.08k
    {
2563
1.08k
      switch (ytype) {
2564
77
      case NT_CTYPE:
2565
77
  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566
77
      NCTYPE(y)->not   != NCTYPE(x)->not &&
2567
18
      NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568
18
    return 1;
2569
59
  else
2570
59
    return 0;
2571
0
  break;
2572
2573
748
      case NT_CCLASS:
2574
1.53M
      swap:
2575
1.53M
  {
2576
1.53M
    Node* tmp;
2577
1.53M
    tmp = x; x = y; y = tmp;
2578
1.53M
    goto retry;
2579
748
  }
2580
0
  break;
2581
2582
264
      case NT_STR:
2583
264
  goto swap;
2584
0
  break;
2585
2586
0
      default:
2587
0
  break;
2588
1.08k
      }
2589
1.08k
    }
2590
0
    break;
2591
2592
1.53M
  case NT_CCLASS:
2593
1.53M
    {
2594
1.53M
      CClassNode* xc = NCCLASS(x);
2595
1.53M
      switch (ytype) {
2596
1.01k
      case NT_CTYPE:
2597
1.01k
  switch (NCTYPE(y)->ctype) {
2598
1.01k
  case ONIGENC_CTYPE_WORD:
2599
1.01k
    if (NCTYPE(y)->not == 0) {
2600
135
      if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601
10.5k
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602
10.5k
    if (BITSET_AT(xc->bs, i)) {
2603
385
      if (NCTYPE(y)->ascii_range) {
2604
385
        if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605
385
      }
2606
0
      else {
2607
0
        if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608
0
      }
2609
385
    }
2610
10.5k
        }
2611
0
        return 1;
2612
129
      }
2613
6
      return 0;
2614
135
    }
2615
881
    else {
2616
881
      if (IS_NOT_NULL(xc->mbuf)) return 0;
2617
39.2k
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618
39.2k
        int is_word;
2619
39.2k
        if (NCTYPE(y)->ascii_range)
2620
39.2k
    is_word = IS_CODE_SB_WORD(reg->enc, i);
2621
0
        else
2622
0
    is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623
39.2k
        if (! is_word) {
2624
34.1k
    if (!IS_NCCLASS_NOT(xc)) {
2625
33.9k
      if (BITSET_AT(xc->bs, i))
2626
711
        return 0;
2627
33.9k
    }
2628
128
    else {
2629
128
      if (! BITSET_AT(xc->bs, i))
2630
128
        return 0;
2631
128
    }
2632
34.1k
        }
2633
39.2k
      }
2634
1
      return 1;
2635
840
    }
2636
0
    break;
2637
2638
0
  default:
2639
0
    break;
2640
1.01k
  }
2641
0
  break;
2642
2643
593
      case NT_CCLASS:
2644
593
  {
2645
593
    int v;
2646
593
    CClassNode* yc = NCCLASS(y);
2647
2648
109k
    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649
109k
      v = BITSET_AT(xc->bs, i);
2650
109k
      if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651
109k
    (v == 0 && IS_NCCLASS_NOT(xc))) {
2652
52.8k
        v = BITSET_AT(yc->bs, i);
2653
52.8k
        if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654
52.8k
      (v == 0 && IS_NCCLASS_NOT(yc)))
2655
177
    return 0;
2656
52.8k
      }
2657
109k
    }
2658
416
    if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659
391
        (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660
50
      return 1;
2661
366
    return 0;
2662
416
  }
2663
0
  break;
2664
2665
1.53M
      case NT_STR:
2666
1.53M
  goto swap;
2667
0
  break;
2668
2669
0
      default:
2670
0
  break;
2671
1.53M
      }
2672
1.53M
    }
2673
0
    break;
2674
2675
1.54M
  case NT_STR:
2676
1.54M
    {
2677
1.54M
      StrNode* xs = NSTR(x);
2678
1.54M
      if (NSTRING_LEN(x) == 0)
2679
0
  break;
2680
2681
1.54M
      switch (ytype) {
2682
363
      case NT_CTYPE:
2683
363
  switch (NCTYPE(y)->ctype) {
2684
363
  case ONIGENC_CTYPE_WORD:
2685
363
    if (NCTYPE(y)->ascii_range) {
2686
363
      if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687
38
        return NCTYPE(y)->not;
2688
325
      else
2689
325
        return !(NCTYPE(y)->not);
2690
363
    }
2691
0
    else {
2692
0
      if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693
0
        return NCTYPE(y)->not;
2694
0
      else
2695
0
        return !(NCTYPE(y)->not);
2696
0
    }
2697
0
    break;
2698
0
  default:
2699
0
    break;
2700
363
  }
2701
0
  break;
2702
2703
1.53M
      case NT_CCLASS:
2704
1.53M
  {
2705
1.53M
    CClassNode* cc = NCCLASS(y);
2706
2707
1.53M
    code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708
1.53M
             xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709
1.53M
    return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710
363
  }
2711
0
  break;
2712
2713
12.3k
      case NT_STR:
2714
12.3k
  {
2715
12.3k
    UChar *q;
2716
12.3k
    StrNode* ys = NSTR(y);
2717
12.3k
    len = NSTRING_LEN(x);
2718
12.3k
    if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719
12.3k
    if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720
      /* tiny version */
2721
3.13k
      return 0;
2722
3.13k
    }
2723
9.24k
    else {
2724
9.55k
      for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725
9.25k
        if (*p != *q) return 1;
2726
9.25k
      }
2727
9.24k
    }
2728
12.3k
  }
2729
295
  break;
2730
2731
295
      default:
2732
0
  break;
2733
1.54M
      }
2734
1.54M
    }
2735
295
    break;
2736
2737
295
  default:
2738
0
    break;
2739
3.07M
  }
2740
2741
295
  return 0;
2742
3.07M
}
2743
2744
static Node*
2745
get_head_value_node(Node* node, int exact, regex_t* reg)
2746
8.41M
{
2747
8.41M
  Node* n = NULL_NODE;
2748
2749
8.41M
  switch (NTYPE(node)) {
2750
345
  case NT_BREF:
2751
677k
  case NT_ALT:
2752
1.24M
  case NT_CANY:
2753
1.24M
#ifdef USE_SUBEXP_CALL
2754
1.24M
  case NT_CALL:
2755
1.24M
#endif
2756
1.24M
    break;
2757
2758
1.99k
  case NT_CTYPE:
2759
2.57M
  case NT_CCLASS:
2760
2.57M
    if (exact == 0) {
2761
2.39M
      n = node;
2762
2.39M
    }
2763
2.57M
    break;
2764
2765
3.80k
  case NT_LIST:
2766
3.80k
    n = get_head_value_node(NCAR(node), exact, reg);
2767
3.80k
    break;
2768
2769
3.28M
  case NT_STR:
2770
3.28M
    {
2771
3.28M
      StrNode* sn = NSTR(node);
2772
2773
3.28M
      if (sn->end <= sn->s)
2774
61
  break;
2775
2776
3.28M
      if (exact == 0 ||
2777
3.28M
    NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2778
3.28M
  n = node;
2779
3.28M
      }
2780
3.28M
    }
2781
0
    break;
2782
2783
886k
  case NT_QTFR:
2784
886k
    {
2785
886k
      QtfrNode* qn = NQTFR(node);
2786
886k
      if (qn->lower > 0) {
2787
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2788
  if (IS_NOT_NULL(qn->head_exact))
2789
    n = qn->head_exact;
2790
  else
2791
#endif
2792
183k
    n = get_head_value_node(qn->target, exact, reg);
2793
183k
      }
2794
886k
    }
2795
886k
    break;
2796
2797
16.3k
  case NT_ENCLOSE:
2798
16.3k
    {
2799
16.3k
      EncloseNode* en = NENCLOSE(node);
2800
16.3k
      switch (en->type) {
2801
6.95k
      case ENCLOSE_OPTION:
2802
6.95k
  {
2803
6.95k
    OnigOptionType options = reg->options;
2804
2805
6.95k
    reg->options = NENCLOSE(node)->option;
2806
6.95k
    n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2807
6.95k
    reg->options = options;
2808
6.95k
  }
2809
6.95k
  break;
2810
2811
872
      case ENCLOSE_MEMORY:
2812
9.17k
      case ENCLOSE_STOP_BACKTRACK:
2813
9.32k
      case ENCLOSE_CONDITION:
2814
9.32k
  n = get_head_value_node(en->target, exact, reg);
2815
9.32k
  break;
2816
2817
120
      case ENCLOSE_ABSENT:
2818
120
  break;
2819
16.3k
      }
2820
16.3k
    }
2821
16.3k
    break;
2822
2823
396k
  case NT_ANCHOR:
2824
396k
    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2825
94
      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2826
396k
    break;
2827
2828
0
  default:
2829
0
    break;
2830
8.41M
  }
2831
2832
8.41M
  return n;
2833
8.41M
}
2834
2835
static int
2836
check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2837
2.51M
{
2838
2.51M
  int type, r = 0;
2839
2840
2.51M
  type = NTYPE(node);
2841
2.51M
  if ((NTYPE2BIT(type) & type_mask) == 0)
2842
0
    return 1;
2843
2844
2.51M
  switch (type) {
2845
229k
  case NT_LIST:
2846
686k
  case NT_ALT:
2847
1.83M
    do {
2848
1.83M
      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2849
1.83M
        anchor_mask);
2850
1.83M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2851
686k
    break;
2852
2853
227k
  case NT_QTFR:
2854
227k
    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2855
227k
      anchor_mask);
2856
227k
    break;
2857
2858
5.59k
  case NT_ENCLOSE:
2859
5.59k
    {
2860
5.59k
      EncloseNode* en = NENCLOSE(node);
2861
5.59k
      if ((en->type & enclose_mask) == 0)
2862
0
  return 1;
2863
2864
5.59k
      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2865
5.59k
    }
2866
0
    break;
2867
2868
452k
  case NT_ANCHOR:
2869
452k
    type = NANCHOR(node)->type;
2870
452k
    if ((type & anchor_mask) == 0)
2871
0
      return 1;
2872
2873
452k
    if (NANCHOR(node)->target)
2874
450k
      r = check_type_tree(NANCHOR(node)->target,
2875
450k
        type_mask, enclose_mask, anchor_mask);
2876
452k
    break;
2877
2878
1.14M
  default:
2879
1.14M
    break;
2880
2.51M
  }
2881
2.51M
  return r;
2882
2.51M
}
2883
2884
#ifdef USE_SUBEXP_CALL
2885
2886
477k
# define RECURSION_EXIST       1
2887
6.65M
# define RECURSION_INFINITE    2
2888
2889
static int
2890
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2891
16.3M
{
2892
16.3M
  int type;
2893
16.3M
  int r = 0;
2894
2895
16.3M
  type = NTYPE(node);
2896
16.3M
  switch (type) {
2897
2.23M
  case NT_LIST:
2898
2.23M
    {
2899
2.23M
      Node *x;
2900
2.23M
      OnigDistance min;
2901
2.23M
      int ret;
2902
2903
2.23M
      x = node;
2904
6.57M
      do {
2905
6.57M
  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2906
6.57M
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2907
6.57M
  r |= ret;
2908
6.57M
  if (head) {
2909
3.29k
    ret = get_min_match_length(NCAR(x), &min, env);
2910
3.29k
    if (ret != 0) return ret;
2911
3.28k
    if (min != 0) head = 0;
2912
3.28k
  }
2913
6.57M
      } while (IS_NOT_NULL(x = NCDR(x)));
2914
2.23M
    }
2915
2.23M
    break;
2916
2917
2.23M
  case NT_ALT:
2918
17.6k
    {
2919
17.6k
      int ret;
2920
17.6k
      r = RECURSION_EXIST;
2921
76.4k
      do {
2922
76.4k
  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2923
76.4k
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2924
76.2k
  r &= ret;
2925
76.2k
      } while (IS_NOT_NULL(node = NCDR(node)));
2926
17.6k
    }
2927
17.5k
    break;
2928
2929
447k
  case NT_QTFR:
2930
447k
    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2931
447k
    if (r == RECURSION_EXIST) {
2932
8.22k
      if (NQTFR(node)->lower == 0) r = 0;
2933
8.22k
    }
2934
447k
    break;
2935
2936
1.92k
  case NT_ANCHOR:
2937
1.92k
    {
2938
1.92k
      AnchorNode* an = NANCHOR(node);
2939
1.92k
      switch (an->type) {
2940
11
      case ANCHOR_PREC_READ:
2941
22
      case ANCHOR_PREC_READ_NOT:
2942
292
      case ANCHOR_LOOK_BEHIND:
2943
348
      case ANCHOR_LOOK_BEHIND_NOT:
2944
348
  r = subexp_inf_recursive_check(an->target, env, head);
2945
348
  break;
2946
1.92k
      }
2947
1.92k
    }
2948
1.92k
    break;
2949
2950
4.59M
  case NT_CALL:
2951
4.59M
    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2952
4.59M
    break;
2953
2954
5.05M
  case NT_ENCLOSE:
2955
5.05M
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2956
427k
      return 0;
2957
4.62M
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2958
12.7k
      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2959
4.61M
    else {
2960
4.61M
      SET_ENCLOSE_STATUS(node, NST_MARK2);
2961
4.61M
      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962
4.61M
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2963
4.61M
    }
2964
4.61M
    break;
2965
2966
4.61M
  default:
2967
3.96M
    break;
2968
16.3M
  }
2969
2970
15.8M
  return r;
2971
16.3M
}
2972
2973
static int
2974
subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2975
1.82M
{
2976
1.82M
  int type;
2977
1.82M
  int r = 0;
2978
2979
1.82M
  type = NTYPE(node);
2980
1.82M
  switch (type) {
2981
211k
  case NT_LIST:
2982
336k
  case NT_ALT:
2983
1.19M
    do {
2984
1.19M
      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2985
1.19M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2986
336k
    break;
2987
2988
478k
  case NT_QTFR:
2989
478k
    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2990
478k
    break;
2991
2992
4.93k
  case NT_ANCHOR:
2993
4.93k
    {
2994
4.93k
      AnchorNode* an = NANCHOR(node);
2995
4.93k
      switch (an->type) {
2996
229
      case ANCHOR_PREC_READ:
2997
359
      case ANCHOR_PREC_READ_NOT:
2998
492
      case ANCHOR_LOOK_BEHIND:
2999
526
      case ANCHOR_LOOK_BEHIND_NOT:
3000
526
  r = subexp_inf_recursive_check_trav(an->target, env);
3001
526
  break;
3002
4.93k
      }
3003
4.93k
    }
3004
4.93k
    break;
3005
3006
151k
  case NT_ENCLOSE:
3007
151k
    {
3008
151k
      EncloseNode* en = NENCLOSE(node);
3009
3010
151k
      if (IS_ENCLOSE_RECURSION(en)) {
3011
229
  SET_ENCLOSE_STATUS(node, NST_MARK1);
3012
229
  r = subexp_inf_recursive_check(en->target, env, 1);
3013
229
  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014
227
  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3015
227
      }
3016
151k
      r = subexp_inf_recursive_check_trav(en->target, env);
3017
151k
    }
3018
3019
0
    break;
3020
3021
854k
  default:
3022
854k
    break;
3023
1.82M
  }
3024
3025
1.82M
  return r;
3026
1.82M
}
3027
3028
static int
3029
subexp_recursive_check(Node* node)
3030
22.3M
{
3031
22.3M
  int r = 0;
3032
3033
22.3M
  switch (NTYPE(node)) {
3034
3.44M
  case NT_LIST:
3035
3.46M
  case NT_ALT:
3036
10.2M
    do {
3037
10.2M
      r |= subexp_recursive_check(NCAR(node));
3038
10.2M
    } while (IS_NOT_NULL(node = NCDR(node)));
3039
3.46M
    break;
3040
3041
457k
  case NT_QTFR:
3042
457k
    r = subexp_recursive_check(NQTFR(node)->target);
3043
457k
    break;
3044
3045
3.11k
  case NT_ANCHOR:
3046
3.11k
    {
3047
3.11k
      AnchorNode* an = NANCHOR(node);
3048
3.11k
      switch (an->type) {
3049
384
      case ANCHOR_PREC_READ:
3050
399
      case ANCHOR_PREC_READ_NOT:
3051
817
      case ANCHOR_LOOK_BEHIND:
3052
898
      case ANCHOR_LOOK_BEHIND_NOT:
3053
898
  r = subexp_recursive_check(an->target);
3054
898
  break;
3055
3.11k
      }
3056
3.11k
    }
3057
3.11k
    break;
3058
3059
5.82M
  case NT_CALL:
3060
5.82M
    r = subexp_recursive_check(NCALL(node)->target);
3061
5.82M
    if (r != 0) SET_CALL_RECURSION(node);
3062
5.82M
    break;
3063
3064
7.45M
  case NT_ENCLOSE:
3065
7.45M
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3066
1.60M
      return 0;
3067
5.85M
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3068
13.2k
      return 1; /* recursion */
3069
5.83M
    else {
3070
5.83M
      SET_ENCLOSE_STATUS(node, NST_MARK2);
3071
5.83M
      r = subexp_recursive_check(NENCLOSE(node)->target);
3072
5.83M
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3073
5.83M
    }
3074
5.83M
    break;
3075
3076
5.83M
  default:
3077
5.17M
    break;
3078
22.3M
  }
3079
3080
20.7M
  return r;
3081
22.3M
}
3082
3083
3084
static int
3085
subexp_recursive_check_trav(Node* node, ScanEnv* env)
3086
1.82M
{
3087
1.82M
# define FOUND_CALLED_NODE    1
3088
3089
1.82M
  int type;
3090
1.82M
  int r = 0;
3091
3092
1.82M
  type = NTYPE(node);
3093
1.82M
  switch (type) {
3094
212k
  case NT_LIST:
3095
337k
  case NT_ALT:
3096
337k
    {
3097
337k
      int ret;
3098
1.19M
      do {
3099
1.19M
  ret = subexp_recursive_check_trav(NCAR(node), env);
3100
1.19M
  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3101
1.19M
  else if (ret < 0) return ret;
3102
1.19M
      } while (IS_NOT_NULL(node = NCDR(node)));
3103
337k
    }
3104
337k
    break;
3105
3106
478k
  case NT_QTFR:
3107
478k
    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3108
478k
    if (NQTFR(node)->upper == 0) {
3109
622
      if (r == FOUND_CALLED_NODE)
3110
132
  NQTFR(node)->is_referred = 1;
3111
622
    }
3112
478k
    break;
3113
3114
5.09k
  case NT_ANCHOR:
3115
5.09k
    {
3116
5.09k
      AnchorNode* an = NANCHOR(node);
3117
5.09k
      switch (an->type) {
3118
233
      case ANCHOR_PREC_READ:
3119
365
      case ANCHOR_PREC_READ_NOT:
3120
626
      case ANCHOR_LOOK_BEHIND:
3121
681
      case ANCHOR_LOOK_BEHIND_NOT:
3122
681
  r = subexp_recursive_check_trav(an->target, env);
3123
681
  break;
3124
5.09k
      }
3125
5.09k
    }
3126
5.09k
    break;
3127
3128
151k
  case NT_ENCLOSE:
3129
151k
    {
3130
151k
      EncloseNode* en = NENCLOSE(node);
3131
3132
151k
      if (! IS_ENCLOSE_RECURSION(en)) {
3133
151k
  if (IS_ENCLOSE_CALLED(en)) {
3134
2.02k
    SET_ENCLOSE_STATUS(node, NST_MARK1);
3135
2.02k
    r = subexp_recursive_check(en->target);
3136
2.02k
    if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137
2.02k
    CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3138
2.02k
  }
3139
151k
      }
3140
151k
      r = subexp_recursive_check_trav(en->target, env);
3141
151k
      if (IS_ENCLOSE_CALLED(en))
3142
2.02k
  r |= FOUND_CALLED_NODE;
3143
151k
    }
3144
151k
    break;
3145
3146
856k
  default:
3147
856k
    break;
3148
1.82M
  }
3149
3150
1.82M
  return r;
3151
1.82M
}
3152
3153
static int
3154
setup_subexp_call(Node* node, ScanEnv* env)
3155
1.83M
{
3156
1.83M
  int type;
3157
1.83M
  int r = 0;
3158
3159
1.83M
  type = NTYPE(node);
3160
1.83M
  switch (type) {
3161
212k
  case NT_LIST:
3162
648k
    do {
3163
648k
      r = setup_subexp_call(NCAR(node), env);
3164
648k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3165
212k
    break;
3166
3167
125k
  case NT_ALT:
3168
548k
    do {
3169
548k
      r = setup_subexp_call(NCAR(node), env);
3170
548k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3171
125k
    break;
3172
3173
478k
  case NT_QTFR:
3174
478k
    r = setup_subexp_call(NQTFR(node)->target, env);
3175
478k
    break;
3176
151k
  case NT_ENCLOSE:
3177
151k
    r = setup_subexp_call(NENCLOSE(node)->target, env);
3178
151k
    break;
3179
3180
2.47k
  case NT_CALL:
3181
2.47k
    {
3182
2.47k
      CallNode* cn = NCALL(node);
3183
2.47k
      Node** nodes = SCANENV_MEM_NODES(env);
3184
3185
2.47k
      if (cn->group_num != 0) {
3186
2.45k
  int gnum = cn->group_num;
3187
3188
2.45k
# ifdef USE_NAMED_GROUP
3189
2.45k
  if (env->num_named > 0 &&
3190
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3191
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3192
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3193
0
  }
3194
2.45k
# endif
3195
2.45k
  if (gnum > env->num_mem) {
3196
1
    onig_scan_env_set_error_string(env,
3197
1
     ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3198
1
    return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3199
1
  }
3200
3201
2.45k
# ifdef USE_NAMED_GROUP
3202
2.47k
      set_call_attr:
3203
2.47k
# endif
3204
2.47k
  cn->target = nodes[cn->group_num];
3205
2.47k
  if (IS_NULL(cn->target)) {
3206
0
    onig_scan_env_set_error_string(env,
3207
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3208
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3209
0
  }
3210
2.47k
  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3211
2.47k
  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3212
2.47k
  cn->unset_addr_list = env->unset_addr_list;
3213
2.47k
      }
3214
16
# ifdef USE_NAMED_GROUP
3215
16
#  ifdef USE_PERL_SUBEXP_CALL
3216
16
      else if (cn->name == cn->name_end) {
3217
16
  goto set_call_attr;
3218
16
      }
3219
0
#  endif
3220
0
      else {
3221
0
  int *refs;
3222
3223
0
  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3224
0
             &refs);
3225
0
  if (n <= 0) {
3226
0
    onig_scan_env_set_error_string(env,
3227
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3228
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3229
0
  }
3230
0
  else if (n > 1 &&
3231
0
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3232
0
    onig_scan_env_set_error_string(env,
3233
0
      ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3234
0
    return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3235
0
  }
3236
0
  else {
3237
0
    cn->group_num = refs[0];
3238
0
    goto set_call_attr;
3239
0
  }
3240
0
      }
3241
2.47k
# endif
3242
2.47k
    }
3243
2.47k
    break;
3244
3245
5.09k
  case NT_ANCHOR:
3246
5.09k
    {
3247
5.09k
      AnchorNode* an = NANCHOR(node);
3248
3249
5.09k
      switch (an->type) {
3250
233
      case ANCHOR_PREC_READ:
3251
366
      case ANCHOR_PREC_READ_NOT:
3252
627
      case ANCHOR_LOOK_BEHIND:
3253
682
      case ANCHOR_LOOK_BEHIND_NOT:
3254
682
  r = setup_subexp_call(an->target, env);
3255
682
  break;
3256
5.09k
      }
3257
5.09k
    }
3258
5.09k
    break;
3259
3260
854k
  default:
3261
854k
    break;
3262
1.83M
  }
3263
3264
1.83M
  return r;
3265
1.83M
}
3266
#endif
3267
3268
3.77M
#define IN_ALT          (1<<0)
3269
878k
#define IN_NOT          (1<<1)
3270
9.36M
#define IN_REPEAT       (1<<2)
3271
5.41M
#define IN_VAR_REPEAT   (1<<3)
3272
878k
#define IN_CALL         (1<<4)
3273
876k
#define IN_RECCALL      (1<<5)
3274
19.5k
#define IN_LOOK_BEHIND  (1<<6)
3275
3276
/* divide different length alternatives in look-behind.
3277
  (?<=A|B) ==> (?<=A)|(?<=B)
3278
  (?<!A|B) ==> (?<!A)(?<!B)
3279
*/
3280
static int
3281
divide_look_behind_alternatives(Node* node)
3282
932
{
3283
932
  Node *head, *np, *insert_node;
3284
932
  AnchorNode* an = NANCHOR(node);
3285
932
  int anc_type = an->type;
3286
3287
932
  head = an->target;
3288
932
  np = NCAR(head);
3289
932
  swap_node(node, head);
3290
932
  NCAR(node) = head;
3291
932
  NANCHOR(head)->target = np;
3292
3293
932
  np = node;
3294
2.32k
  while ((np = NCDR(np)) != NULL_NODE) {
3295
1.39k
    insert_node = onig_node_new_anchor(anc_type);
3296
1.39k
    CHECK_NULL_RETURN_MEMERR(insert_node);
3297
1.39k
    NANCHOR(insert_node)->target = NCAR(np);
3298
1.39k
    NCAR(np) = insert_node;
3299
1.39k
  }
3300
3301
932
  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3302
799
    np = node;
3303
1.60k
    do {
3304
1.60k
      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3305
1.60k
    } while ((np = NCDR(np)) != NULL_NODE);
3306
799
  }
3307
932
  return 0;
3308
932
}
3309
3310
static int
3311
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3312
1.42k
{
3313
1.42k
  int r, len;
3314
1.42k
  AnchorNode* an = NANCHOR(node);
3315
3316
1.42k
  r = get_char_length_tree(an->target, reg, &len);
3317
1.42k
  if (r == 0)
3318
485
    an->char_len = len;
3319
939
  else if (r == GET_CHAR_LEN_VARLEN)
3320
7
    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3321
932
  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3322
932
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3323
932
      r = divide_look_behind_alternatives(node);
3324
0
    else
3325
0
      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3326
932
  }
3327
3328
1.42k
  return r;
3329
1.42k
}
3330
3331
static int
3332
next_setup(Node* node, Node* next_node, regex_t* reg)
3333
8.95M
{
3334
8.95M
  int type;
3335
3336
9.78M
 retry:
3337
9.78M
  type = NTYPE(node);
3338
9.78M
  if (type == NT_QTFR) {
3339
3.24M
    QtfrNode* qn = NQTFR(node);
3340
3.24M
    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3341
2.99M
#ifdef USE_QTFR_PEEK_NEXT
3342
2.99M
      Node* n = get_head_value_node(next_node, 1, reg);
3343
      /* '\0': for UTF-16BE etc... */
3344
2.99M
      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3345
1.72M
  qn->next_head_exact = n;
3346
1.72M
      }
3347
2.99M
#endif
3348
      /* automatic possessification a*b ==> (?>a*)b */
3349
2.99M
      if (qn->lower <= 1) {
3350
2.99M
  int ttype = NTYPE(qn->target);
3351
2.99M
  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3352
2.80M
    Node *x, *y;
3353
2.80M
    x = get_head_value_node(qn->target, 0, reg);
3354
2.80M
    if (IS_NOT_NULL(x)) {
3355
2.41M
      y = get_head_value_node(next_node,  0, reg);
3356
2.41M
      if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3357
1.54M
        Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3358
1.54M
        CHECK_NULL_RETURN_MEMERR(en);
3359
1.54M
        SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3360
1.54M
        swap_node(node, en);
3361
1.54M
        NENCLOSE(node)->target = en;
3362
1.54M
      }
3363
2.41M
    }
3364
2.80M
  }
3365
2.99M
      }
3366
2.99M
    }
3367
3.24M
  }
3368
6.54M
  else if (type == NT_ENCLOSE) {
3369
1.04M
    EncloseNode* en = NENCLOSE(node);
3370
1.04M
    if (en->type == ENCLOSE_MEMORY) {
3371
833k
      node = en->target;
3372
833k
      goto retry;
3373
833k
    }
3374
1.04M
  }
3375
8.95M
  return 0;
3376
9.78M
}
3377
3378
3379
static int
3380
update_string_node_case_fold(regex_t* reg, Node *node)
3381
19.0k
{
3382
19.0k
  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3383
19.0k
  UChar *sbuf, *ebuf, *sp;
3384
19.0k
  int r, i, len;
3385
19.0k
  OnigDistance sbuf_size;
3386
19.0k
  StrNode* sn = NSTR(node);
3387
3388
19.0k
  end = sn->end;
3389
19.0k
  sbuf_size = (end - sn->s) * 2;
3390
19.0k
  sbuf = (UChar* )xmalloc(sbuf_size);
3391
19.0k
  CHECK_NULL_RETURN_MEMERR(sbuf);
3392
19.0k
  ebuf = sbuf + sbuf_size;
3393
3394
19.0k
  sp = sbuf;
3395
19.0k
  p = sn->s;
3396
167k
  while (p < end) {
3397
148k
    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3398
331k
    for (i = 0; i < len; i++) {
3399
183k
      if (sp >= ebuf) {
3400
0
  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3401
0
  if (IS_NULL(p)) {
3402
0
    xfree(sbuf);
3403
0
    return ONIGERR_MEMORY;
3404
0
  }
3405
0
  sbuf = p;
3406
0
  sp = sbuf + sbuf_size;
3407
0
  sbuf_size *= 2;
3408
0
  ebuf = sbuf + sbuf_size;
3409
0
      }
3410
3411
183k
      *sp++ = buf[i];
3412
183k
    }
3413
148k
  }
3414
3415
19.0k
  r = onig_node_str_set(node, sbuf, sp);
3416
3417
19.0k
  xfree(sbuf);
3418
19.0k
  return r;
3419
19.0k
}
3420
3421
static int
3422
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3423
         regex_t* reg)
3424
749
{
3425
749
  int r;
3426
749
  Node *node;
3427
3428
749
  node = onig_node_new_str(s, end);
3429
749
  if (IS_NULL(node)) return ONIGERR_MEMORY;
3430
3431
749
  r = update_string_node_case_fold(reg, node);
3432
749
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return r;
3435
0
  }
3436
3437
749
  NSTRING_SET_AMBIG(node);
3438
749
  NSTRING_SET_DONT_GET_OPT_INFO(node);
3439
749
  *rnode = node;
3440
749
  return 0;
3441
749
}
3442
3443
static int
3444
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3445
        int slen)
3446
143k
{
3447
143k
  int i;
3448
3449
229k
  for (i = 0; i < item_num; i++) {
3450
87.5k
    if (items[i].byte_len != slen) {
3451
754
      return 1;
3452
754
    }
3453
86.7k
    if (items[i].code_len != 1) {
3454
441
      return 1;
3455
441
    }
3456
86.7k
  }
3457
142k
  return 0;
3458
143k
}
3459
3460
static int
3461
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3462
          UChar *p, int slen, UChar *end,
3463
          regex_t* reg, Node **rnode)
3464
993
{
3465
993
  int r, i, j, len, varlen;
3466
993
  Node *anode, *var_anode, *snode, *xnode, *an;
3467
993
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3468
3469
993
  *rnode = var_anode = NULL_NODE;
3470
3471
993
  varlen = 0;
3472
3.55k
  for (i = 0; i < item_num; i++) {
3473
3.20k
    if (items[i].byte_len != slen) {
3474
639
      varlen = 1;
3475
639
      break;
3476
639
    }
3477
3.20k
  }
3478
3479
993
  if (varlen != 0) {
3480
639
    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3481
639
    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3482
3483
639
    xnode = onig_node_new_list(NULL, NULL);
3484
639
    if (IS_NULL(xnode)) goto mem_err;
3485
639
    NCAR(var_anode) = xnode;
3486
3487
639
    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3488
639
    if (IS_NULL(anode)) goto mem_err;
3489
639
    NCAR(xnode) = anode;
3490
639
  }
3491
354
  else {
3492
354
    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3493
354
    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3494
354
  }
3495
3496
993
  snode = onig_node_new_str(p, p + slen);
3497
993
  if (IS_NULL(snode)) goto mem_err;
3498
3499
993
  NCAR(anode) = snode;
3500
3501
4.29k
  for (i = 0; i < item_num; i++) {
3502
3.29k
    snode = onig_node_new_str(NULL, NULL);
3503
3.29k
    if (IS_NULL(snode)) goto mem_err;
3504
3505
8.73k
    for (j = 0; j < items[i].code_len; j++) {
3506
5.43k
      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3507
5.43k
      if (len < 0) {
3508
0
  r = len;
3509
0
  goto mem_err2;
3510
0
      }
3511
3512
5.43k
      r = onig_node_str_cat(snode, buf, buf + len);
3513
5.43k
      if (r != 0) goto mem_err2;
3514
5.43k
    }
3515
3516
3.29k
    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517
3.29k
    if (IS_NULL(an)) {
3518
0
      goto mem_err2;
3519
0
    }
3520
3521
3.29k
    if (items[i].byte_len != slen) {
3522
738
      Node *rem;
3523
738
      UChar *q = p + items[i].byte_len;
3524
3525
738
      if (q < end) {
3526
592
  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3527
592
  if (r != 0) {
3528
0
    onig_node_free(an);
3529
0
    goto mem_err2;
3530
0
  }
3531
3532
592
  xnode = onig_node_list_add(NULL_NODE, snode);
3533
592
  if (IS_NULL(xnode)) {
3534
0
    onig_node_free(an);
3535
0
    onig_node_free(rem);
3536
0
    goto mem_err2;
3537
0
  }
3538
592
  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3539
0
    onig_node_free(an);
3540
0
    onig_node_free(xnode);
3541
0
    onig_node_free(rem);
3542
0
    goto mem_err;
3543
0
  }
3544
3545
592
  NCAR(an) = xnode;
3546
592
      }
3547
146
      else {
3548
146
  NCAR(an) = snode;
3549
146
      }
3550
3551
738
      NCDR(var_anode) = an;
3552
738
      var_anode = an;
3553
738
    }
3554
2.56k
    else {
3555
2.56k
      NCAR(an)     = snode;
3556
2.56k
      NCDR(anode) = an;
3557
2.56k
      anode = an;
3558
2.56k
    }
3559
3.29k
  }
3560
3561
993
  return varlen;
3562
3563
0
 mem_err2:
3564
0
  onig_node_free(snode);
3565
3566
0
 mem_err:
3567
0
  onig_node_free(*rnode);
3568
3569
0
  return ONIGERR_MEMORY;
3570
0
}
3571
3572
1.15k
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3573
3574
static int
3575
expand_case_fold_string(Node* node, regex_t* reg, int state)
3576
26.1k
{
3577
26.1k
  int r, n, len, alt_num;
3578
26.1k
  int varlen = 0;
3579
26.1k
  int is_in_look_behind;
3580
26.1k
  UChar *start, *end, *p;
3581
26.1k
  Node *top_root, *root, *snode, *prev_node;
3582
26.1k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3583
26.1k
  StrNode* sn;
3584
3585
26.1k
  if (NSTRING_IS_AMBIG(node)) return 0;
3586
3587
17.9k
  sn = NSTR(node);
3588
3589
17.9k
  start = sn->s;
3590
17.9k
  end   = sn->end;
3591
17.9k
  if (start >= end) return 0;
3592
3593
17.6k
  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3594
3595
17.6k
  r = 0;
3596
17.6k
  top_root = root = prev_node = snode = NULL_NODE;
3597
17.6k
  alt_num = 1;
3598
17.6k
  p = start;
3599
160k
  while (p < end) {
3600
143k
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3601
143k
             p, end, items);
3602
143k
    if (n < 0) {
3603
0
      r = n;
3604
0
      goto err;
3605
0
    }
3606
3607
143k
    len = enclen(reg->enc, p, end);
3608
3609
143k
    varlen = is_case_fold_variable_len(n, items, len);
3610
143k
    if (n == 0 || varlen == 0 || is_in_look_behind) {
3611
142k
      if (IS_NULL(snode)) {
3612
18.3k
  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3613
4
          onig_node_free(top_root);
3614
4
    top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3615
4
    if (IS_NULL(root)) {
3616
0
      onig_node_free(prev_node);
3617
0
      goto mem_err;
3618
0
    }
3619
4
  }
3620
3621
18.3k
  prev_node = snode = onig_node_new_str(NULL, NULL);
3622
18.3k
  if (IS_NULL(snode)) goto mem_err;
3623
18.3k
  if (IS_NOT_NULL(root)) {
3624
884
    if (IS_NULL(onig_node_list_add(root, snode))) {
3625
0
      onig_node_free(snode);
3626
0
      goto mem_err;
3627
0
    }
3628
884
  }
3629
18.3k
      }
3630
3631
142k
      r = onig_node_str_cat(snode, p, p + len);
3632
142k
      if (r != 0) goto err;
3633
142k
    }
3634
1.15k
    else {
3635
1.15k
      alt_num *= (n + 1);
3636
1.15k
      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3637
3638
993
      if (IS_NOT_NULL(snode)) {
3639
819
  r = update_string_node_case_fold(reg, snode);
3640
819
  if (r == 0) {
3641
819
    NSTRING_SET_AMBIG(snode);
3642
819
  }
3643
819
      }
3644
993
      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3645
691
        onig_node_free(top_root);
3646
691
  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3647
691
  if (IS_NULL(root)) {
3648
0
    onig_node_free(prev_node);
3649
0
    goto mem_err;
3650
0
  }
3651
691
      }
3652
3653
993
      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3654
993
      if (r < 0) goto mem_err;
3655
993
      if (r == 1) {
3656
639
  if (IS_NULL(root)) {
3657
81
    top_root = prev_node;
3658
81
  }
3659
558
  else {
3660
558
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3661
0
      onig_node_free(prev_node);
3662
0
      goto mem_err;
3663
0
    }
3664
558
  }
3665
3666
639
  root = NCAR(prev_node);
3667
639
      }
3668
354
      else { /* r == 0 */
3669
354
  if (IS_NOT_NULL(root)) {
3670
261
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671
0
      onig_node_free(prev_node);
3672
0
      goto mem_err;
3673
0
    }
3674
261
  }
3675
354
      }
3676
3677
993
      snode = NULL_NODE;
3678
993
    }
3679
3680
143k
    p += len;
3681
143k
  }
3682
17.6k
  if (IS_NOT_NULL(snode)) {
3683
17.4k
    r = update_string_node_case_fold(reg, snode);
3684
17.4k
    if (r == 0) {
3685
17.4k
      NSTRING_SET_AMBIG(snode);
3686
17.4k
    }
3687
17.4k
  }
3688
3689
17.6k
  if (p < end) {
3690
157
    Node *srem;
3691
3692
157
    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3693
157
    if (r != 0) goto mem_err;
3694
3695
157
    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3696
33
      onig_node_free(top_root);
3697
33
      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3698
33
      if (IS_NULL(root)) {
3699
0
  onig_node_free(srem);
3700
0
  onig_node_free(prev_node);
3701
0
  goto mem_err;
3702
0
      }
3703
33
    }
3704
3705
157
    if (IS_NULL(root)) {
3706
11
      prev_node = srem;
3707
11
    }
3708
146
    else {
3709
146
      if (IS_NULL(onig_node_list_add(root, srem))) {
3710
0
  onig_node_free(srem);
3711
0
  goto mem_err;
3712
0
      }
3713
146
    }
3714
157
  }
3715
3716
  /* ending */
3717
17.6k
  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3718
17.6k
  swap_node(node, top_root);
3719
17.6k
  onig_node_free(top_root);
3720
17.6k
  return 0;
3721
3722
0
 mem_err:
3723
0
  r = ONIGERR_MEMORY;
3724
3725
0
 err:
3726
0
  onig_node_free(top_root);
3727
0
  return r;
3728
0
}
3729
3730
3731
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3732
3733
# define CEC_THRES_NUM_BIG_REPEAT         512
3734
# define CEC_INFINITE_NUM          0x7fffffff
3735
3736
# define CEC_IN_INFINITE_REPEAT    (1<<0)
3737
# define CEC_IN_FINITE_REPEAT      (1<<1)
3738
# define CEC_CONT_BIG_REPEAT       (1<<2)
3739
3740
static int
3741
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3742
{
3743
  int type;
3744
  int r = state;
3745
3746
  type = NTYPE(node);
3747
  switch (type) {
3748
  case NT_LIST:
3749
    {
3750
      do {
3751
  r = setup_comb_exp_check(NCAR(node), r, env);
3752
      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3753
    }
3754
    break;
3755
3756
  case NT_ALT:
3757
    {
3758
      int ret;
3759
      do {
3760
  ret = setup_comb_exp_check(NCAR(node), state, env);
3761
  r |= ret;
3762
      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3763
    }
3764
    break;
3765
3766
  case NT_QTFR:
3767
    {
3768
      int child_state = state;
3769
      int add_state = 0;
3770
      QtfrNode* qn = NQTFR(node);
3771
      Node* target = qn->target;
3772
      int var_num;
3773
3774
      if (! IS_REPEAT_INFINITE(qn->upper)) {
3775
  if (qn->upper > 1) {
3776
    /* {0,1}, {1,1} are allowed */
3777
    child_state |= CEC_IN_FINITE_REPEAT;
3778
3779
    /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3780
    if (env->backrefed_mem == 0) {
3781
      if (NTYPE(qn->target) == NT_ENCLOSE) {
3782
        EncloseNode* en = NENCLOSE(qn->target);
3783
        if (en->type == ENCLOSE_MEMORY) {
3784
    if (NTYPE(en->target) == NT_QTFR) {
3785
      QtfrNode* q = NQTFR(en->target);
3786
      if (IS_REPEAT_INFINITE(q->upper)
3787
          && q->greedy == qn->greedy) {
3788
        qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3789
        if (qn->upper == 1)
3790
          child_state = state;
3791
      }
3792
    }
3793
        }
3794
      }
3795
    }
3796
  }
3797
      }
3798
3799
      if (state & CEC_IN_FINITE_REPEAT) {
3800
  qn->comb_exp_check_num = -1;
3801
      }
3802
      else {
3803
  if (IS_REPEAT_INFINITE(qn->upper)) {
3804
    var_num = CEC_INFINITE_NUM;
3805
    child_state |= CEC_IN_INFINITE_REPEAT;
3806
  }
3807
  else {
3808
    var_num = qn->upper - qn->lower;
3809
  }
3810
3811
  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3812
    add_state |= CEC_CONT_BIG_REPEAT;
3813
3814
  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3815
      ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3816
       var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3817
    if (qn->comb_exp_check_num == 0) {
3818
      env->num_comb_exp_check++;
3819
      qn->comb_exp_check_num = env->num_comb_exp_check;
3820
      if (env->curr_max_regnum > env->comb_exp_max_regnum)
3821
        env->comb_exp_max_regnum = env->curr_max_regnum;
3822
    }
3823
  }
3824
      }
3825
3826
      r = setup_comb_exp_check(target, child_state, env);
3827
      r |= add_state;
3828
    }
3829
    break;
3830
3831
  case NT_ENCLOSE:
3832
    {
3833
      EncloseNode* en = NENCLOSE(node);
3834
3835
      switch (en->type) {
3836
      case ENCLOSE_MEMORY:
3837
  {
3838
    if (env->curr_max_regnum < en->regnum)
3839
      env->curr_max_regnum = en->regnum;
3840
3841
    r = setup_comb_exp_check(en->target, state, env);
3842
  }
3843
  break;
3844
3845
      default:
3846
  r = setup_comb_exp_check(en->target, state, env);
3847
  break;
3848
      }
3849
    }
3850
    break;
3851
3852
# ifdef USE_SUBEXP_CALL
3853
  case NT_CALL:
3854
    if (IS_CALL_RECURSION(NCALL(node)))
3855
      env->has_recursion = 1;
3856
    else
3857
      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3858
    break;
3859
# endif
3860
3861
  default:
3862
    break;
3863
  }
3864
3865
  return r;
3866
}
3867
#endif
3868
3869
/* setup_tree does the following work.
3870
 1. check empty loop. (set qn->target_empty_info)
3871
 2. expand ignore-case in char class.
3872
 3. set memory status bit flags. (reg->mem_stats)
3873
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3874
 5. find invalid patterns in look-behind.
3875
 6. expand repeated string.
3876
 */
3877
static int
3878
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3879
23.2M
{
3880
23.2M
  int type;
3881
23.2M
  int r = 0;
3882
3883
23.2M
restart:
3884
23.2M
  type = NTYPE(node);
3885
23.2M
  switch (type) {
3886
2.99M
  case NT_LIST:
3887
2.99M
    {
3888
2.99M
      Node* prev = NULL_NODE;
3889
11.9M
      do {
3890
11.9M
  r = setup_tree(NCAR(node), reg, state, env);
3891
11.9M
  if (IS_NOT_NULL(prev) && r == 0) {
3892
8.95M
    r = next_setup(prev, NCAR(node), reg);
3893
8.95M
  }
3894
11.9M
  prev = NCAR(node);
3895
11.9M
      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3896
2.99M
    }
3897
2.99M
    break;
3898
3899
910k
  case NT_ALT:
3900
2.89M
    do {
3901
2.89M
      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3902
2.89M
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3903
910k
    break;
3904
3905
4.40M
  case NT_CCLASS:
3906
4.40M
    break;
3907
3908
4.66M
  case NT_STR:
3909
4.66M
    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3910
26.1k
      r = expand_case_fold_string(node, reg, state);
3911
26.1k
    }
3912
4.66M
    break;
3913
3914
2.29k
  case NT_CTYPE:
3915
1.28M
  case NT_CANY:
3916
1.28M
    break;
3917
3918
0
#ifdef USE_SUBEXP_CALL
3919
2.20k
  case NT_CALL:
3920
2.20k
    break;
3921
0
#endif
3922
3923
3.65k
  case NT_BREF:
3924
3.65k
    {
3925
3.65k
      int i;
3926
3.65k
      int* p;
3927
3.65k
      Node** nodes = SCANENV_MEM_NODES(env);
3928
3.65k
      BRefNode* br = NBREF(node);
3929
3.65k
      p = BACKREFS_P(br);
3930
205k
      for (i = 0; i < br->back_num; i++) {
3931
202k
  if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3932
202k
  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3933
202k
  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3934
202k
#ifdef USE_BACKREF_WITH_LEVEL
3935
202k
  if (IS_BACKREF_NEST_LEVEL(br)) {
3936
2.18k
    BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3937
2.18k
  }
3938
202k
#endif
3939
202k
  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3940
202k
      }
3941
3.65k
    }
3942
3.65k
    break;
3943
3944
4.68M
  case NT_QTFR:
3945
4.68M
    {
3946
4.68M
      OnigDistance d;
3947
4.68M
      QtfrNode* qn = NQTFR(node);
3948
4.68M
      Node* target = qn->target;
3949
3950
4.68M
      if ((state & IN_REPEAT) != 0) {
3951
690k
  qn->state |= NST_IN_REPEAT;
3952
690k
      }
3953
3954
4.68M
      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3955
4.68M
  r = get_min_match_length(target, &d, env);
3956
4.68M
  if (r) break;
3957
4.68M
  if (d == 0) {
3958
34.6k
    qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3959
34.6k
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3960
34.6k
    r = quantifiers_memory_node_info(target);
3961
34.6k
    if (r < 0) break;
3962
34.6k
    if (r > 0) {
3963
5.03k
      qn->target_empty_info = r;
3964
5.03k
    }
3965
34.6k
#endif
3966
#if 0
3967
    r = get_max_match_length(target, &d, env);
3968
    if (r == 0 && d == 0) {
3969
      /*  ()* ==> ()?, ()+ ==> ()  */
3970
      qn->upper = 1;
3971
      if (qn->lower > 1) qn->lower = 1;
3972
      if (NTYPE(target) == NT_STR) {
3973
        qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3974
      }
3975
    }
3976
#endif
3977
34.6k
  }
3978
4.68M
      }
3979
3980
4.68M
      state |= IN_REPEAT;
3981
4.68M
      if (qn->lower != qn->upper)
3982
4.53M
  state |= IN_VAR_REPEAT;
3983
4.68M
      r = setup_tree(target, reg, state, env);
3984
4.68M
      if (r) break;
3985
3986
      /* expand string */
3987
4.68M
#define EXPAND_STRING_MAX_LENGTH  100
3988
4.68M
      if (NTYPE(target) == NT_STR) {
3989
127k
  if (qn->lower > 1) {
3990
5.89k
    int i, n = qn->lower;
3991
5.89k
    OnigDistance len = NSTRING_LEN(target);
3992
5.89k
    StrNode* sn = NSTR(target);
3993
5.89k
    Node* np;
3994
3995
5.89k
    np = onig_node_new_str(sn->s, sn->end);
3996
5.89k
    if (IS_NULL(np)) return ONIGERR_MEMORY;
3997
5.89k
    NSTR(np)->flag = sn->flag;
3998
3999
271k
    for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4000
265k
      r = onig_node_str_cat(np, sn->s, sn->end);
4001
265k
      if (r) {
4002
0
        onig_node_free(np);
4003
0
        return r;
4004
0
      }
4005
265k
    }
4006
5.89k
    if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4007
3.61k
      Node *np1, *np2;
4008
4009
3.61k
      qn->lower -= i;
4010
3.61k
      if (! IS_REPEAT_INFINITE(qn->upper))
4011
2.91k
        qn->upper -= i;
4012
4013
3.61k
      np1 = onig_node_new_list(np, NULL);
4014
3.61k
      if (IS_NULL(np1)) {
4015
0
        onig_node_free(np);
4016
0
        return ONIGERR_MEMORY;
4017
0
      }
4018
3.61k
      swap_node(np1, node);
4019
3.61k
      np2 = onig_node_list_add(node, np1);
4020
3.61k
      if (IS_NULL(np2)) {
4021
0
        onig_node_free(np1);
4022
0
        return ONIGERR_MEMORY;
4023
0
      }
4024
3.61k
    }
4025
2.27k
    else {
4026
2.27k
      swap_node(np, node);
4027
2.27k
      onig_node_free(np);
4028
2.27k
    }
4029
5.89k
    break; /* break case NT_QTFR: */
4030
5.89k
  }
4031
127k
      }
4032
4033
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4034
      if (qn->greedy && (qn->target_empty_info != 0)) {
4035
  if (NTYPE(target) == NT_QTFR) {
4036
    QtfrNode* tqn = NQTFR(target);
4037
    if (IS_NOT_NULL(tqn->head_exact)) {
4038
      qn->head_exact  = tqn->head_exact;
4039
      tqn->head_exact = NULL;
4040
    }
4041
  }
4042
  else {
4043
    qn->head_exact = get_head_value_node(qn->target, 1, reg);
4044
  }
4045
      }
4046
#endif
4047
4.68M
    }
4048
4.67M
    break;
4049
4050
4.67M
  case NT_ENCLOSE:
4051
1.39M
    {
4052
1.39M
      EncloseNode* en = NENCLOSE(node);
4053
4054
1.39M
      switch (en->type) {
4055
343k
      case ENCLOSE_OPTION:
4056
343k
  {
4057
343k
    OnigOptionType options = reg->options;
4058
343k
    reg->options = NENCLOSE(node)->option;
4059
343k
    r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4060
343k
    reg->options = options;
4061
343k
  }
4062
343k
  break;
4063
4064
876k
      case ENCLOSE_MEMORY:
4065
876k
  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4066
14.6k
    BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4067
    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4068
14.6k
  }
4069
876k
  if (IS_ENCLOSE_CALLED(en))
4070
1.77k
    state |= IN_CALL;
4071
876k
  if (IS_ENCLOSE_RECURSION(en))
4072
222
    state |= IN_RECCALL;
4073
876k
  else if ((state & IN_RECCALL) != 0)
4074
832
    SET_CALL_RECURSION(node);
4075
876k
  r = setup_tree(en->target, reg, state, env);
4076
876k
  break;
4077
4078
177k
      case ENCLOSE_STOP_BACKTRACK:
4079
177k
  {
4080
177k
    Node* target = en->target;
4081
177k
    r = setup_tree(target, reg, state, env);
4082
177k
    if (NTYPE(target) == NT_QTFR) {
4083
52.1k
      QtfrNode* tqn = NQTFR(target);
4084
52.1k
      if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4085
50.8k
    tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4086
50.4k
        int qtype = NTYPE(tqn->target);
4087
50.4k
        if (IS_NODE_TYPE_SIMPLE(qtype))
4088
7.75k
    SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4089
50.4k
      }
4090
52.1k
    }
4091
177k
  }
4092
177k
  break;
4093
4094
297
      case ENCLOSE_CONDITION:
4095
297
#ifdef USE_NAMED_GROUP
4096
297
  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4097
0
      env->num_named > 0 &&
4098
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4099
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4100
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4101
0
  }
4102
297
#endif
4103
297
  if (NENCLOSE(node)->regnum > env->num_mem)
4104
0
    return ONIGERR_INVALID_BACKREF;
4105
297
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4106
297
  break;
4107
4108
685
      case ENCLOSE_ABSENT:
4109
685
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4110
685
  break;
4111
1.39M
      }
4112
1.39M
    }
4113
1.39M
    break;
4114
4115
2.86M
  case NT_ANCHOR:
4116
2.86M
    {
4117
2.86M
      AnchorNode* an = NANCHOR(node);
4118
4119
2.86M
      switch (an->type) {
4120
399
      case ANCHOR_PREC_READ:
4121
399
  r = setup_tree(an->target, reg, state, env);
4122
399
  break;
4123
214
      case ANCHOR_PREC_READ_NOT:
4124
214
  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4125
214
  break;
4126
4127
/* allowed node types in look-behind */
4128
0
#define ALLOWED_TYPE_IN_LB  \
4129
1.97k
  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4130
1.97k
    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4131
4132
568
#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4133
1.40k
#define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
4134
4135
568
#define ALLOWED_ANCHOR_IN_LB \
4136
568
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4137
568
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4138
568
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4139
568
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4140
1.40k
#define ALLOWED_ANCHOR_IN_LB_NOT \
4141
1.40k
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4142
1.40k
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4143
1.40k
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4144
1.40k
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4145
4146
568
      case ANCHOR_LOOK_BEHIND:
4147
568
  {
4148
568
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4149
568
            ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4150
568
    if (r < 0) return r;
4151
568
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4152
568
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4153
568
    r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4154
568
    if (r != 0) return r;
4155
568
    r = setup_look_behind(node, reg, env);
4156
568
  }
4157
0
  break;
4158
4159
1.40k
      case ANCHOR_LOOK_BEHIND_NOT:
4160
1.40k
  {
4161
1.40k
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4162
1.40k
          ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4163
1.40k
    if (r < 0) return r;
4164
1.40k
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4165
1.40k
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4166
1.40k
    r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4167
1.40k
       env);
4168
1.40k
    if (r != 0) return r;
4169
856
    r = setup_look_behind(node, reg, env);
4170
856
  }
4171
0
  break;
4172
2.86M
      }
4173
2.86M
    }
4174
2.86M
    break;
4175
4176
2.86M
  default:
4177
0
    break;
4178
23.2M
  }
4179
4180
23.2M
  return r;
4181
23.2M
}
4182
4183
/* set skip map for Sunday's quick search */
4184
static int
4185
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4186
      UChar skip[], int ignore_case)
4187
753k
{
4188
753k
  OnigDistance i, len;
4189
753k
  int clen, flen, n, j, k;
4190
753k
  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4191
753k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4192
753k
  OnigEncoding enc = reg->enc;
4193
4194
753k
  len = end - s;
4195
753k
  if (len >= ONIG_CHAR_TABLE_SIZE) {
4196
    /* This should not happen. */
4197
0
    return ONIGERR_TYPE_BUG;
4198
0
  }
4199
4200
753k
  if (ignore_case) {
4201
5.17k
    for (i = 0; i < len; i += clen) {
4202
4.56k
      p = s + i;
4203
4.56k
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4204
4.56k
    p, end, items);
4205
4.56k
      clen = enclen(enc, p, end);
4206
4.56k
      if (p + clen > end)
4207
0
  clen = (int )(end - p);
4208
4209
5.87k
      for (j = 0; j < n; j++) {
4210
2.13k
  if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4211
    /* Different length isn't supported. Stop optimization at here. */
4212
490
    end = p;
4213
490
    goto endcheck;
4214
490
  }
4215
1.64k
  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4216
1.64k
  if (flen != clen) {
4217
    /* Different length isn't supported. Stop optimization at here. */
4218
335
    end = p;
4219
335
    goto endcheck;
4220
335
  }
4221
1.64k
      }
4222
4.56k
    }
4223
1.43k
endcheck:
4224
1.43k
    len = end - s;
4225
1.43k
  }
4226
4227
193M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4228
192M
    skip[i] = (UChar )(len + 1);
4229
753k
  n = 0;
4230
11.1M
  for (i = 0; i < len; i += clen) {
4231
10.4M
    p = s + i;
4232
10.4M
    if (ignore_case)
4233
3.74k
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4234
10.4M
               p, end, items);
4235
10.4M
    clen = enclen(enc, p, end);
4236
10.4M
    if (p + clen > end)
4237
0
      clen = (int )(end - p);
4238
4239
20.8M
    for (j = 0; j < clen; j++) {
4240
10.4M
      skip[s[i + j]] = (UChar )(len - i - j);
4241
10.4M
      for (k = 0; k < n; k++) {
4242
1.28k
  ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4243
1.28k
  skip[buf[j]] = (UChar )(len - i - j);
4244
1.28k
      }
4245
10.4M
    }
4246
10.4M
  }
4247
4248
753k
  return (int )len;
4249
753k
}
4250
4251
typedef struct {
4252
  OnigDistance min;  /* min byte length */
4253
  OnigDistance max;  /* max byte length */
4254
} MinMaxLen;
4255
4256
typedef struct {
4257
  MinMaxLen        mmd;
4258
  OnigEncoding     enc;
4259
  OnigOptionType   options;
4260
  OnigCaseFoldType case_fold_flag;
4261
  ScanEnv*         scan_env;
4262
} OptEnv;
4263
4264
typedef struct {
4265
  int left_anchor;
4266
  int right_anchor;
4267
} OptAncInfo;
4268
4269
typedef struct {
4270
  MinMaxLen  mmd; /* info position */
4271
  OptAncInfo anc;
4272
4273
  int   reach_end;
4274
  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
4275
  int   len;
4276
  UChar s[OPT_EXACT_MAXLEN];
4277
} OptExactInfo;
4278
4279
typedef struct {
4280
  MinMaxLen mmd; /* info position */
4281
  OptAncInfo anc;
4282
4283
  int   value;      /* weighted value */
4284
  UChar map[ONIG_CHAR_TABLE_SIZE];
4285
} OptMapInfo;
4286
4287
typedef struct {
4288
  MinMaxLen    len;
4289
4290
  OptAncInfo   anc;
4291
  OptExactInfo exb;    /* boundary */
4292
  OptExactInfo exm;    /* middle */
4293
  OptExactInfo expr;   /* prec read (?=...) */
4294
4295
  OptMapInfo   map;   /* boundary */
4296
} NodeOptInfo;
4297
4298
4299
static int
4300
map_position_value(OnigEncoding enc, int i)
4301
15.5M
{
4302
15.5M
  static const short int ByteValTable[] = {
4303
15.5M
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4304
15.5M
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4305
15.5M
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4306
15.5M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4307
15.5M
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4308
15.5M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4309
15.5M
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4310
15.5M
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4311
15.5M
  };
4312
4313
15.5M
  if (i < numberof(ByteValTable)) {
4314
15.3M
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4315
0
      return 20;
4316
15.3M
    else
4317
15.3M
      return (int )ByteValTable[i];
4318
15.3M
  }
4319
213k
  else
4320
213k
    return 4;   /* Take it easy. */
4321
15.5M
}
4322
4323
static int
4324
distance_value(MinMaxLen* mm)
4325
10.8M
{
4326
  /* 1000 / (min-max-dist + 1) */
4327
10.8M
  static const short int dist_vals[] = {
4328
10.8M
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4329
10.8M
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4330
10.8M
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4331
10.8M
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4332
10.8M
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4333
10.8M
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4334
10.8M
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4335
10.8M
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4336
10.8M
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4337
10.8M
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4338
10.8M
  };
4339
4340
10.8M
  OnigDistance d;
4341
4342
10.8M
  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4343
4344
4.07M
  d = mm->max - mm->min;
4345
4.07M
  if (d < numberof(dist_vals))
4346
    /* return dist_vals[d] * 16 / (mm->min + 12); */
4347
4.07M
    return (int )dist_vals[d];
4348
3.49k
  else
4349
3.49k
    return 1;
4350
4.07M
}
4351
4352
static int
4353
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4354
5.41M
{
4355
5.41M
  if (v2 <= 0) return -1;
4356
5.41M
  if (v1 <= 0) return  1;
4357
4358
5.41M
  v1 *= distance_value(d1);
4359
5.41M
  v2 *= distance_value(d2);
4360
4361
5.41M
  if (v2 > v1) return  1;
4362
5.01M
  if (v2 < v1) return -1;
4363
4364
2.54M
  if (d2->min < d1->min) return  1;
4365
2.45M
  if (d2->min > d1->min) return -1;
4366
620k
  return 0;
4367
2.45M
}
4368
4369
static int
4370
is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4371
617k
{
4372
617k
  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4373
617k
}
4374
4375
4376
static void
4377
set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4378
15.0M
{
4379
15.0M
  mml->min = min;
4380
15.0M
  mml->max = max;
4381
15.0M
}
4382
4383
static void
4384
clear_mml(MinMaxLen* mml)
4385
106M
{
4386
106M
  mml->min = mml->max = 0;
4387
106M
}
4388
4389
static void
4390
copy_mml(MinMaxLen* to, MinMaxLen* from)
4391
74.3M
{
4392
74.3M
  to->min = from->min;
4393
74.3M
  to->max = from->max;
4394
74.3M
}
4395
4396
static void
4397
add_mml(MinMaxLen* to, MinMaxLen* from)
4398
23.9M
{
4399
23.9M
  to->min = distance_add(to->min, from->min);
4400
23.9M
  to->max = distance_add(to->max, from->max);
4401
23.9M
}
4402
4403
#if 0
4404
static void
4405
add_len_mml(MinMaxLen* to, OnigDistance len)
4406
{
4407
  to->min = distance_add(to->min, len);
4408
  to->max = distance_add(to->max, len);
4409
}
4410
#endif
4411
4412
static void
4413
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4414
2.69M
{
4415
2.69M
  if (to->min > from->min) to->min = from->min;
4416
2.69M
  if (to->max < from->max) to->max = from->max;
4417
2.69M
}
4418
4419
static void
4420
copy_opt_env(OptEnv* to, OptEnv* from)
4421
3.00M
{
4422
3.00M
  *to = *from;
4423
3.00M
}
4424
4425
static void
4426
clear_opt_anc_info(OptAncInfo* anc)
4427
144M
{
4428
144M
  anc->left_anchor  = 0;
4429
144M
  anc->right_anchor = 0;
4430
144M
}
4431
4432
static void
4433
copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4434
39.5M
{
4435
39.5M
  *to = *from;
4436
39.5M
}
4437
4438
static void
4439
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4440
        OnigDistance left_len, OnigDistance right_len)
4441
39.5M
{
4442
39.5M
  clear_opt_anc_info(to);
4443
4444
39.5M
  to->left_anchor = left->left_anchor;
4445
39.5M
  if (left_len == 0) {
4446
5.59M
    to->left_anchor |= right->left_anchor;
4447
5.59M
  }
4448
4449
39.5M
  to->right_anchor = right->right_anchor;
4450
39.5M
  if (right_len == 0) {
4451
2.97M
    to->right_anchor |= left->right_anchor;
4452
2.97M
  }
4453
36.5M
  else {
4454
36.5M
    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4455
36.5M
  }
4456
39.5M
}
4457
4458
static int
4459
is_left_anchor(int anc)
4460
2.77M
{
4461
2.77M
  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4462
2.77M
      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4463
1.92M
      anc == ANCHOR_PREC_READ_NOT)
4464
854k
    return 0;
4465
4466
1.92M
  return 1;
4467
2.77M
}
4468
4469
static int
4470
is_set_opt_anc_info(OptAncInfo* to, int anc)
4471
877k
{
4472
877k
  if ((to->left_anchor & anc) != 0) return 1;
4473
4474
877k
  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4475
877k
}
4476
4477
static void
4478
add_opt_anc_info(OptAncInfo* to, int anc)
4479
2.77M
{
4480
2.77M
  if (is_left_anchor(anc))
4481
1.92M
    to->left_anchor |= anc;
4482
854k
  else
4483
854k
    to->right_anchor |= anc;
4484
2.77M
}
4485
4486
static void
4487
remove_opt_anc_info(OptAncInfo* to, int anc)
4488
176
{
4489
176
  if (is_left_anchor(anc))
4490
176
    to->left_anchor &= ~anc;
4491
0
  else
4492
0
    to->right_anchor &= ~anc;
4493
176
}
4494
4495
static void
4496
alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4497
3.22M
{
4498
3.22M
  to->left_anchor  &= add->left_anchor;
4499
3.22M
  to->right_anchor &= add->right_anchor;
4500
3.22M
}
4501
4502
static int
4503
is_full_opt_exact_info(OptExactInfo* ex)
4504
26.8M
{
4505
26.8M
  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4506
26.8M
}
4507
4508
static void
4509
clear_opt_exact_info(OptExactInfo* ex)
4510
79.9M
{
4511
79.9M
  clear_mml(&ex->mmd);
4512
79.9M
  clear_opt_anc_info(&ex->anc);
4513
79.9M
  ex->reach_end   = 0;
4514
79.9M
  ex->ignore_case = -1;   /* unset */
4515
79.9M
  ex->len         = 0;
4516
79.9M
  ex->s[0]        = '\0';
4517
79.9M
}
4518
4519
static void
4520
copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4521
3.60M
{
4522
3.60M
  *to = *from;
4523
3.60M
}
4524
4525
static void
4526
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4527
26.9M
{
4528
26.9M
  int i, j, len;
4529
26.9M
  UChar *p, *end;
4530
26.9M
  OptAncInfo tanc;
4531
4532
26.9M
  if (to->ignore_case < 0)
4533
0
    to->ignore_case = add->ignore_case;
4534
26.9M
  else if (to->ignore_case != add->ignore_case)
4535
365
    return ;  /* avoid */
4536
4537
26.9M
  p = add->s;
4538
26.9M
  end = p + add->len;
4539
27.3M
  for (i = to->len; p < end; ) {
4540
27.1M
    len = enclen(enc, p, end);
4541
27.1M
    if (i + len > OPT_EXACT_MAXLEN) break;
4542
775k
    for (j = 0; j < len && p < end; j++)
4543
390k
      to->s[i++] = *p++;
4544
385k
  }
4545
4546
26.9M
  to->len = i;
4547
26.9M
  to->reach_end = (p == end ? add->reach_end : 0);
4548
4549
26.9M
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4550
26.9M
  if (! to->reach_end) tanc.right_anchor = 0;
4551
26.9M
  copy_opt_anc_info(&to->anc, &tanc);
4552
26.9M
}
4553
4554
static void
4555
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4556
        int raw ARG_UNUSED, OnigEncoding enc)
4557
4.67M
{
4558
4.67M
  int i, j, len;
4559
4.67M
  UChar *p;
4560
4561
33.6M
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4562
29.0M
    len = enclen(enc, p, end);
4563
29.0M
    if (i + len > OPT_EXACT_MAXLEN) break;
4564
58.3M
    for (j = 0; j < len && p < end; j++)
4565
29.3M
      to->s[i++] = *p++;
4566
29.0M
  }
4567
4568
4.67M
  to->len = i;
4569
4.67M
}
4570
4571
static void
4572
alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4573
5.95M
{
4574
5.95M
  int i, j, len;
4575
4576
5.95M
  if (add->len == 0 || to->len == 0) {
4577
5.33M
    clear_opt_exact_info(to);
4578
5.33M
    return ;
4579
5.33M
  }
4580
4581
617k
  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4582
88.0k
    clear_opt_exact_info(to);
4583
88.0k
    return ;
4584
88.0k
  }
4585
4586
883k
  for (i = 0; i < to->len && i < add->len; ) {
4587
883k
    if (to->s[i] != add->s[i]) break;
4588
354k
    len = enclen(env->enc, to->s + i, to->s + to->len);
4589
4590
354k
    for (j = 1; j < len; j++) {
4591
429
      if (to->s[i+j] != add->s[i+j]) break;
4592
429
    }
4593
354k
    if (j < len) break;
4594
353k
    i += len;
4595
353k
  }
4596
4597
529k
  if (! add->reach_end || i < add->len || i < to->len) {
4598
529k
    to->reach_end = 0;
4599
529k
  }
4600
529k
  to->len = i;
4601
529k
  if (to->ignore_case < 0)
4602
0
    to->ignore_case = add->ignore_case;
4603
529k
  else if (add->ignore_case >= 0)
4604
529k
    to->ignore_case |= add->ignore_case;
4605
4606
529k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4607
529k
  if (! to->reach_end) to->anc.right_anchor = 0;
4608
529k
}
4609
4610
static void
4611
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4612
25.6M
{
4613
25.6M
  int v1, v2;
4614
4615
25.6M
  v1 = now->len;
4616
25.6M
  v2 = alt->len;
4617
4618
25.6M
  if (v2 == 0) {
4619
20.9M
    return ;
4620
20.9M
  }
4621
4.66M
  else if (v1 == 0) {
4622
3.59M
    copy_opt_exact_info(now, alt);
4623
3.59M
    return ;
4624
3.59M
  }
4625
1.07M
  else if (v1 <= 2 && v2 <= 2) {
4626
    /* ByteValTable[x] is big value --> low price */
4627
177k
    v2 = map_position_value(enc, now->s[0]);
4628
177k
    v1 = map_position_value(enc, alt->s[0]);
4629
4630
177k
    if (now->len > 1) v1 += 5;
4631
177k
    if (alt->len > 1) v2 += 5;
4632
177k
  }
4633
4634
1.07M
  if (now->ignore_case <= 0) v1 *= 2;
4635
1.07M
  if (alt->ignore_case <= 0) v2 *= 2;
4636
4637
1.07M
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4638
4.81k
    copy_opt_exact_info(now, alt);
4639
1.07M
}
4640
4641
static void
4642
clear_opt_map_info(OptMapInfo* map)
4643
24.9M
{
4644
24.9M
  static const OptMapInfo clean_info = {
4645
24.9M
    {0, 0}, {0, 0}, 0,
4646
24.9M
    {
4647
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4648
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4649
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4650
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4651
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4652
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4653
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4654
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4655
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4656
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4657
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4658
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4659
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4660
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4661
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4662
24.9M
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4663
24.9M
    }
4664
24.9M
  };
4665
4666
24.9M
  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4667
24.9M
}
4668
4669
static void
4670
copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4671
2.24M
{
4672
2.24M
  *to = *from;
4673
2.24M
}
4674
4675
static void
4676
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4677
13.8M
{
4678
13.8M
  if (map->map[c] == 0) {
4679
13.8M
    map->map[c] = 1;
4680
13.8M
    map->value += map_position_value(enc, c);
4681
13.8M
  }
4682
13.8M
}
4683
4684
static int
4685
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4686
                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4687
31.4k
{
4688
31.4k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4689
31.4k
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4690
31.4k
  int i, n;
4691
4692
31.4k
  add_char_opt_map_info(map, p[0], enc);
4693
4694
31.4k
  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4695
31.4k
  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4696
31.4k
  if (n < 0) return n;
4697
4698
50.8k
  for (i = 0; i < n; i++) {
4699
19.4k
    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4700
19.4k
    add_char_opt_map_info(map, buf[0], enc);
4701
19.4k
  }
4702
4703
31.4k
  return 0;
4704
31.4k
}
4705
4706
static void
4707
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4708
11.9M
{
4709
11.9M
  const int z = 1<<15; /* 32768: something big value */
4710
4711
11.9M
  int v1, v2;
4712
4713
11.9M
  if (alt->value == 0) return ;
4714
4.91M
  if (now->value == 0) {
4715
2.24M
    copy_opt_map_info(now, alt);
4716
2.24M
    return ;
4717
2.24M
  }
4718
4719
2.66M
  v1 = z / now->value;
4720
2.66M
  v2 = z / alt->value;
4721
2.66M
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4722
6.03k
    copy_opt_map_info(now, alt);
4723
2.66M
}
4724
4725
static int
4726
comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4727
1.67M
{
4728
3.34M
#define COMP_EM_BASE  20
4729
1.67M
  int ve, vm;
4730
4731
1.67M
  if (m->value <= 0) return -1;
4732
4733
1.67M
  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4734
1.67M
  vm = COMP_EM_BASE * 5 * 2 / m->value;
4735
1.67M
  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4736
1.67M
}
4737
4738
static void
4739
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4740
1.98M
{
4741
1.98M
  int i, val;
4742
4743
  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4744
1.98M
  if (to->value == 0) return ;
4745
837k
  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4746
126k
    clear_opt_map_info(to);
4747
126k
    return ;
4748
126k
  }
4749
4750
710k
  alt_merge_mml(&to->mmd, &add->mmd);
4751
4752
710k
  val = 0;
4753
182M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4754
181M
    if (add->map[i])
4755
711k
      to->map[i] = 1;
4756
4757
181M
    if (to->map[i])
4758
1.33M
      val += map_position_value(enc, i);
4759
181M
  }
4760
710k
  to->value = val;
4761
4762
710k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4763
710k
}
4764
4765
static void
4766
set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4767
24.7M
{
4768
24.7M
  copy_mml(&(opt->exb.mmd),  mmd);
4769
24.7M
  copy_mml(&(opt->expr.mmd), mmd);
4770
24.7M
  copy_mml(&(opt->map.mmd),  mmd);
4771
24.7M
}
4772
4773
static void
4774
clear_node_opt_info(NodeOptInfo* opt)
4775
24.7M
{
4776
24.7M
  clear_mml(&opt->len);
4777
24.7M
  clear_opt_anc_info(&opt->anc);
4778
24.7M
  clear_opt_exact_info(&opt->exb);
4779
24.7M
  clear_opt_exact_info(&opt->exm);
4780
24.7M
  clear_opt_exact_info(&opt->expr);
4781
24.7M
  clear_opt_map_info(&opt->map);
4782
24.7M
}
4783
4784
static void
4785
copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4786
3.32M
{
4787
3.32M
  *to = *from;
4788
3.32M
}
4789
4790
static void
4791
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4792
11.9M
{
4793
11.9M
  int exb_reach, exm_reach;
4794
11.9M
  OptAncInfo tanc;
4795
4796
11.9M
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4797
11.9M
  copy_opt_anc_info(&to->anc, &tanc);
4798
4799
11.9M
  if (add->exb.len > 0 && to->len.max == 0) {
4800
587k
    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4801
587k
      to->len.max, add->len.max);
4802
587k
    copy_opt_anc_info(&add->exb.anc, &tanc);
4803
587k
  }
4804
4805
11.9M
  if (add->map.value > 0 && to->len.max == 0) {
4806
1.09M
    if (add->map.mmd.max == 0)
4807
1.07M
      add->map.anc.left_anchor |= to->anc.left_anchor;
4808
1.09M
  }
4809
4810
11.9M
  exb_reach = to->exb.reach_end;
4811
11.9M
  exm_reach = to->exm.reach_end;
4812
4813
11.9M
  if (add->len.max != 0)
4814
8.99M
    to->exb.reach_end = to->exm.reach_end = 0;
4815
4816
11.9M
  if (add->exb.len > 0) {
4817
3.19M
    if (exb_reach) {
4818
0
      concat_opt_exact_info(&to->exb, &add->exb, enc);
4819
0
      clear_opt_exact_info(&add->exb);
4820
0
    }
4821
3.19M
    else if (exm_reach) {
4822
116k
      concat_opt_exact_info(&to->exm, &add->exb, enc);
4823
116k
      clear_opt_exact_info(&add->exb);
4824
116k
    }
4825
3.19M
  }
4826
11.9M
  select_opt_exact_info(enc, &to->exm, &add->exb);
4827
11.9M
  select_opt_exact_info(enc, &to->exm, &add->exm);
4828
4829
11.9M
  if (to->expr.len > 0) {
4830
2.45k
    if (add->len.max > 0) {
4831
2.33k
      if (to->expr.len > (int )add->len.max)
4832
626
  to->expr.len = (int )add->len.max;
4833
4834
2.33k
      if (to->expr.mmd.max == 0)
4835
0
  select_opt_exact_info(enc, &to->exb, &to->expr);
4836
2.33k
      else
4837
2.33k
  select_opt_exact_info(enc, &to->exm, &to->expr);
4838
2.33k
    }
4839
2.45k
  }
4840
11.9M
  else if (add->expr.len > 0) {
4841
551
    copy_opt_exact_info(&to->expr, &add->expr);
4842
551
  }
4843
4844
11.9M
  select_opt_map_info(&to->map, &add->map);
4845
4846
11.9M
  add_mml(&to->len, &add->len);
4847
11.9M
}
4848
4849
static void
4850
alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4851
1.98M
{
4852
1.98M
  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4853
1.98M
  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4854
1.98M
  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4855
1.98M
  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4856
1.98M
  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4857
4858
1.98M
  alt_merge_mml(&to->len, &add->len);
4859
1.98M
}
4860
4861
4862
878k
#define MAX_NODE_OPT_INFO_REF_COUNT    5
4863
4864
static int
4865
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4866
24.7M
{
4867
24.7M
  int type;
4868
24.7M
  int r = 0;
4869
4870
24.7M
  clear_node_opt_info(opt);
4871
24.7M
  set_bound_node_opt_info(opt, &env->mmd);
4872
4873
24.7M
  type = NTYPE(node);
4874
24.7M
  switch (type) {
4875
3.00M
  case NT_LIST:
4876
3.00M
    {
4877
3.00M
      OptEnv nenv;
4878
3.00M
      NodeOptInfo nopt;
4879
3.00M
      Node* nd = node;
4880
4881
3.00M
      copy_opt_env(&nenv, env);
4882
11.9M
      do {
4883
11.9M
  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4884
11.9M
  if (r == 0) {
4885
11.9M
    add_mml(&nenv.mmd, &nopt.len);
4886
11.9M
    concat_left_node_opt_info(env->enc, opt, &nopt);
4887
11.9M
  }
4888
11.9M
      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4889
3.00M
    }
4890
3.00M
    break;
4891
4892
908k
  case NT_ALT:
4893
908k
    {
4894
908k
      NodeOptInfo nopt;
4895
908k
      Node* nd = node;
4896
4897
2.89M
      do {
4898
2.89M
  r = optimize_node_left(NCAR(nd), &nopt, env);
4899
2.89M
  if (r == 0) {
4900
2.89M
    if (nd == node) copy_node_opt_info(opt, &nopt);
4901
1.98M
    else            alt_merge_node_opt_info(opt, &nopt, env);
4902
2.89M
  }
4903
2.89M
      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4904
908k
    }
4905
908k
    break;
4906
4907
4.67M
  case NT_STR:
4908
4.67M
    {
4909
4.67M
      StrNode* sn = NSTR(node);
4910
4.67M
      OnigDistance slen = sn->end - sn->s;
4911
4.67M
      int is_raw = NSTRING_IS_RAW(node);
4912
4913
4.67M
      if (! NSTRING_IS_AMBIG(node)) {
4914
4.63M
  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4915
4.63M
          is_raw, env->enc);
4916
4.63M
  opt->exb.ignore_case = 0;
4917
4.63M
  if (slen > 0) {
4918
4.51M
    add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4919
4.51M
  }
4920
4.63M
  set_mml(&opt->len, slen, slen);
4921
4.63M
      }
4922
32.1k
      else {
4923
32.1k
  OnigDistance max;
4924
4925
32.1k
  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4926
761
    int n = onigenc_strlen(env->enc, sn->s, sn->end);
4927
761
    max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4928
761
  }
4929
31.4k
  else {
4930
31.4k
    concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4931
31.4k
            is_raw, env->enc);
4932
31.4k
    opt->exb.ignore_case = 1;
4933
4934
31.4k
    if (slen > 0) {
4935
31.4k
      r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4936
31.4k
            env->enc, env->case_fold_flag);
4937
31.4k
      if (r != 0) break;
4938
31.4k
    }
4939
4940
31.4k
    max = slen;
4941
31.4k
  }
4942
4943
32.1k
  set_mml(&opt->len, slen, max);
4944
32.1k
      }
4945
4946
4.67M
      if ((OnigDistance )opt->exb.len == slen)
4947
4.31M
  opt->exb.reach_end = 1;
4948
4.67M
    }
4949
0
    break;
4950
4951
4.41M
  case NT_CCLASS:
4952
4.41M
    {
4953
4.41M
      int i, z;
4954
4.41M
      CClassNode* cc = NCCLASS(node);
4955
4956
      /* no need to check ignore case. (set in setup_tree()) */
4957
4958
4.41M
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4959
2.66M
  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4960
2.66M
  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4961
4962
2.66M
  set_mml(&opt->len, min, max);
4963
2.66M
      }
4964
1.74M
      else {
4965
448M
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4966
446M
    z = BITSET_AT(cc->bs, i);
4967
446M
    if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4968
9.32M
      add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4969
9.32M
    }
4970
446M
  }
4971
1.74M
  set_mml(&opt->len, 1, 1);
4972
1.74M
      }
4973
4.41M
    }
4974
4.41M
    break;
4975
4976
2.78k
  case NT_CTYPE:
4977
2.78k
    {
4978
2.78k
      int i, min, max;
4979
2.78k
      int maxcode;
4980
4981
2.78k
      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4982
4983
2.78k
      if (max == 1) {
4984
0
  min = 1;
4985
4986
0
  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4987
0
  switch (NCTYPE(node)->ctype) {
4988
0
  case ONIGENC_CTYPE_WORD:
4989
0
    if (NCTYPE(node)->not != 0) {
4990
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4991
0
        if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
4992
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4993
0
        }
4994
0
      }
4995
0
    }
4996
0
    else {
4997
0
      for (i = 0; i < maxcode; i++) {
4998
0
        if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4999
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5000
0
        }
5001
0
      }
5002
0
    }
5003
0
    break;
5004
0
  }
5005
0
      }
5006
2.78k
      else {
5007
2.78k
  min = ONIGENC_MBC_MINLEN(env->enc);
5008
2.78k
      }
5009
2.78k
      set_mml(&opt->len, min, max);
5010
2.78k
    }
5011
0
    break;
5012
5013
1.28M
  case NT_CANY:
5014
1.28M
    {
5015
1.28M
      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5016
1.28M
      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5017
1.28M
      set_mml(&opt->len, min, max);
5018
1.28M
    }
5019
1.28M
    break;
5020
5021
2.86M
  case NT_ANCHOR:
5022
2.86M
    switch (NANCHOR(node)->type) {
5023
171
    case ANCHOR_BEGIN_BUF:
5024
465
    case ANCHOR_BEGIN_POSITION:
5025
1.92M
    case ANCHOR_BEGIN_LINE:
5026
1.92M
    case ANCHOR_END_BUF:
5027
1.92M
    case ANCHOR_SEMI_END_BUF:
5028
2.77M
    case ANCHOR_END_LINE:
5029
2.77M
    case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5030
2.77M
    case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5031
2.77M
      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5032
2.77M
      break;
5033
5034
1.04k
    case ANCHOR_PREC_READ:
5035
1.04k
      {
5036
1.04k
  NodeOptInfo nopt;
5037
5038
1.04k
  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5039
1.04k
  if (r == 0) {
5040
1.04k
    if (nopt.exb.len > 0)
5041
197
      copy_opt_exact_info(&opt->expr, &nopt.exb);
5042
843
    else if (nopt.exm.len > 0)
5043
351
      copy_opt_exact_info(&opt->expr, &nopt.exm);
5044
5045
1.04k
    opt->expr.reach_end = 0;
5046
5047
1.04k
    if (nopt.map.value > 0)
5048
552
      copy_opt_map_info(&opt->map, &nopt.map);
5049
1.04k
  }
5050
1.04k
      }
5051
1.04k
      break;
5052
5053
16
    case ANCHOR_LOOK_BEHIND_NOT:
5054
16
      break;
5055
2.86M
    }
5056
2.86M
    break;
5057
5058
2.86M
  case NT_BREF:
5059
3.73k
    {
5060
3.73k
      int i;
5061
3.73k
      int* backs;
5062
3.73k
      OnigDistance min, max, tmin, tmax;
5063
3.73k
      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5064
3.73k
      BRefNode* br = NBREF(node);
5065
5066
3.73k
      if (br->state & NST_RECURSION) {
5067
615
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5068
615
  break;
5069
615
      }
5070
3.11k
      backs = BACKREFS_P(br);
5071
3.11k
      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5072
3.11k
      if (r != 0) break;
5073
3.11k
      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5074
3.11k
      if (r != 0) break;
5075
93.5k
      for (i = 1; i < br->back_num; i++) {
5076
90.3k
  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5077
90.3k
  if (r != 0) break;
5078
90.3k
  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5079
90.3k
  if (r != 0) break;
5080
90.3k
  if (min > tmin) min = tmin;
5081
90.3k
  if (max < tmax) max = tmax;
5082
90.3k
      }
5083
3.11k
      if (r == 0) set_mml(&opt->len, min, max);
5084
3.11k
    }
5085
0
    break;
5086
5087
0
#ifdef USE_SUBEXP_CALL
5088
2.92k
  case NT_CALL:
5089
2.92k
    if (IS_CALL_RECURSION(NCALL(node)))
5090
416
      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5091
2.50k
    else {
5092
2.50k
      OnigOptionType save = env->options;
5093
2.50k
      env->options = NENCLOSE(NCALL(node)->target)->option;
5094
2.50k
      r = optimize_node_left(NCALL(node)->target, opt, env);
5095
2.50k
      env->options = save;
5096
2.50k
    }
5097
2.92k
    break;
5098
0
#endif
5099
5100
4.69M
  case NT_QTFR:
5101
4.69M
    {
5102
4.69M
      int i;
5103
4.69M
      OnigDistance min, max;
5104
4.69M
      NodeOptInfo nopt;
5105
4.69M
      QtfrNode* qn = NQTFR(node);
5106
5107
4.69M
      r = optimize_node_left(qn->target, &nopt, env);
5108
4.69M
      if (r) break;
5109
5110
4.69M
      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5111
2.17M
  if (env->mmd.max == 0 &&
5112
663k
      NTYPE(qn->target) == NT_CANY && qn->greedy) {
5113
596
    if (IS_MULTILINE(env->options))
5114
      /* implicit anchor: /.*a/ ==> /\A.*a/ */
5115
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5116
596
    else
5117
596
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5118
596
  }
5119
2.17M
      }
5120
2.51M
      else {
5121
2.51M
  if (qn->lower > 0) {
5122
2.41M
    copy_node_opt_info(opt, &nopt);
5123
2.41M
    if (nopt.exb.len > 0) {
5124
46.0k
      if (nopt.exb.reach_end) {
5125
26.8M
        for (i = 2; i <= qn->lower &&
5126
26.8M
        ! is_full_opt_exact_info(&opt->exb); i++) {
5127
26.8M
    concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5128
26.8M
        }
5129
27.5k
        if (i < qn->lower) {
5130
2.97k
    opt->exb.reach_end = 0;
5131
2.97k
        }
5132
27.5k
      }
5133
46.0k
    }
5134
5135
2.41M
    if (qn->lower != qn->upper) {
5136
2.27M
      opt->exb.reach_end = 0;
5137
2.27M
      opt->exm.reach_end = 0;
5138
2.27M
    }
5139
2.41M
    if (qn->lower > 1)
5140
148k
      opt->exm.reach_end = 0;
5141
2.41M
  }
5142
2.51M
      }
5143
5144
4.69M
      min = distance_multiply(nopt.len.min, qn->lower);
5145
4.69M
      if (IS_REPEAT_INFINITE(qn->upper))
5146
4.44M
  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5147
244k
      else
5148
244k
  max = distance_multiply(nopt.len.max, qn->upper);
5149
5150
4.69M
      set_mml(&opt->len, min, max);
5151
4.69M
    }
5152
0
    break;
5153
5154
2.94M
  case NT_ENCLOSE:
5155
2.94M
    {
5156
2.94M
      EncloseNode* en = NENCLOSE(node);
5157
5158
2.94M
      switch (en->type) {
5159
340k
      case ENCLOSE_OPTION:
5160
340k
  {
5161
340k
    OnigOptionType save = env->options;
5162
5163
340k
    env->options = en->option;
5164
340k
    r = optimize_node_left(en->target, opt, env);
5165
340k
    env->options = save;
5166
340k
  }
5167
340k
  break;
5168
5169
878k
      case ENCLOSE_MEMORY:
5170
878k
#ifdef USE_SUBEXP_CALL
5171
878k
  en->opt_count++;
5172
878k
  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5173
387
    OnigDistance min, max;
5174
5175
387
    min = 0;
5176
387
    max = ONIG_INFINITE_DISTANCE;
5177
387
    if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5178
387
    if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5179
387
    set_mml(&opt->len, min, max);
5180
387
  }
5181
877k
  else
5182
877k
#endif
5183
877k
  {
5184
877k
    r = optimize_node_left(en->target, opt, env);
5185
5186
877k
    if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5187
514
      if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5188
176
        remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5189
514
    }
5190
877k
  }
5191
878k
  break;
5192
5193
1.72M
      case ENCLOSE_STOP_BACKTRACK:
5194
1.72M
      case ENCLOSE_CONDITION:
5195
1.72M
  r = optimize_node_left(en->target, opt, env);
5196
1.72M
  break;
5197
5198
332
      case ENCLOSE_ABSENT:
5199
332
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5200
332
  break;
5201
2.94M
      }
5202
2.94M
    }
5203
2.94M
    break;
5204
5205
2.94M
  default:
5206
#ifdef ONIG_DEBUG
5207
    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5208
      NTYPE(node));
5209
#endif
5210
0
    r = ONIGERR_TYPE_BUG;
5211
0
    break;
5212
24.7M
  }
5213
5214
24.7M
  return r;
5215
24.7M
}
5216
5217
static int
5218
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5219
1.19M
{
5220
1.19M
  int allow_reverse;
5221
5222
1.19M
  if (e->len == 0) return 0;
5223
5224
1.19M
  reg->exact = (UChar* )xmalloc(e->len);
5225
1.19M
  CHECK_NULL_RETURN_MEMERR(reg->exact);
5226
1.19M
  xmemcpy(reg->exact, e->s, e->len);
5227
1.19M
  reg->exact_end = reg->exact + e->len;
5228
5229
1.19M
  allow_reverse =
5230
1.19M
  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5231
5232
1.19M
  if (e->ignore_case > 0) {
5233
1.56k
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5234
1.43k
      e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5235
1.43k
          reg->map, 1);
5236
1.43k
      reg->exact_end = reg->exact + e->len;
5237
1.43k
      if (e->len >= 3) {
5238
638
  reg->optimize = (allow_reverse != 0
5239
638
       ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5240
638
      }
5241
793
      else if (e->len > 0) {
5242
644
  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5243
644
      }
5244
149
      else
5245
149
  return 0;
5246
1.43k
    }
5247
130
    else {
5248
130
      reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5249
130
    }
5250
1.56k
  }
5251
1.19M
  else {
5252
1.19M
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5253
751k
      set_bm_skip(reg->exact, reg->exact_end, reg,
5254
751k
      reg->map, 0);
5255
751k
      reg->optimize = (allow_reverse != 0
5256
751k
         ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5257
751k
    }
5258
440k
    else {
5259
440k
      reg->optimize = ONIG_OPTIMIZE_EXACT;
5260
440k
    }
5261
1.19M
  }
5262
5263
1.19M
  reg->dmin = e->mmd.min;
5264
1.19M
  reg->dmax = e->mmd.max;
5265
5266
1.19M
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5267
1.19M
    reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5268
1.19M
  }
5269
5270
1.19M
  return 0;
5271
1.19M
}
5272
5273
static void
5274
set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5275
742k
{
5276
742k
  int i;
5277
5278
190M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5279
189M
    reg->map[i] = m->map[i];
5280
5281
742k
  reg->optimize   = ONIG_OPTIMIZE_MAP;
5282
742k
  reg->dmin       = m->mmd.min;
5283
742k
  reg->dmax       = m->mmd.max;
5284
5285
742k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5286
742k
    reg->threshold_len = (int )(reg->dmin + 1);
5287
742k
  }
5288
742k
}
5289
5290
static void
5291
set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5292
1.93M
{
5293
1.93M
  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
5294
1.93M
  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5295
1.93M
}
5296
5297
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5298
static void print_optimize_info(FILE* f, regex_t* reg);
5299
#endif
5300
5301
static int
5302
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5303
2.29M
{
5304
5305
2.29M
  int r;
5306
2.29M
  NodeOptInfo opt;
5307
2.29M
  OptEnv env;
5308
5309
2.29M
  env.enc            = reg->enc;
5310
2.29M
  env.options        = reg->options;
5311
2.29M
  env.case_fold_flag = reg->case_fold_flag;
5312
2.29M
  env.scan_env   = scan_env;
5313
2.29M
  clear_mml(&env.mmd);
5314
5315
2.29M
  r = optimize_node_left(node, &opt, &env);
5316
2.29M
  if (r) return r;
5317
5318
2.29M
  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5319
2.29M
        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5320
2.29M
        ANCHOR_LOOK_BEHIND);
5321
5322
2.29M
  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5323
161
    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5324
5325
2.29M
  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5326
2.29M
  ANCHOR_PREC_READ_NOT);
5327
5328
2.29M
  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5329
1.36k
    reg->anchor_dmin = opt.len.min;
5330
1.36k
    reg->anchor_dmax = opt.len.max;
5331
1.36k
  }
5332
5333
2.29M
  if (opt.exb.len > 0 || opt.exm.len > 0) {
5334
1.67M
    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5335
1.67M
    if (opt.map.value > 0 &&
5336
1.67M
  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5337
477k
      goto set_map;
5338
477k
    }
5339
1.19M
    else {
5340
1.19M
      r = set_optimize_exact_info(reg, &opt.exb);
5341
1.19M
      set_sub_anchor(reg, &opt.exb.anc);
5342
1.19M
    }
5343
1.67M
  }
5344
618k
  else if (opt.map.value > 0) {
5345
742k
  set_map:
5346
742k
    set_optimize_map_info(reg, &opt.map);
5347
742k
    set_sub_anchor(reg, &opt.map.anc);
5348
742k
  }
5349
354k
  else {
5350
354k
    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5351
354k
    if (opt.len.max == 0)
5352
176k
      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5353
354k
  }
5354
5355
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5356
  print_optimize_info(stderr, reg);
5357
#endif
5358
2.29M
  return r;
5359
2.29M
}
5360
5361
static void
5362
clear_optimize_info(regex_t* reg)
5363
2.29M
{
5364
2.29M
  reg->optimize      = ONIG_OPTIMIZE_NONE;
5365
2.29M
  reg->anchor        = 0;
5366
2.29M
  reg->anchor_dmin   = 0;
5367
2.29M
  reg->anchor_dmax   = 0;
5368
2.29M
  reg->sub_anchor    = 0;
5369
2.29M
  reg->exact_end     = (UChar* )NULL;
5370
2.29M
  reg->threshold_len = 0;
5371
2.29M
  if (IS_NOT_NULL(reg->exact)) {
5372
0
    xfree(reg->exact);
5373
0
    reg->exact = (UChar* )NULL;
5374
0
  }
5375
2.29M
}
5376
5377
#ifdef ONIG_DEBUG
5378
5379
static void print_enc_string(FILE* fp, OnigEncoding enc,
5380
           const UChar *s, const UChar *end)
5381
{
5382
  fprintf(fp, "\nPATTERN: /");
5383
5384
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5385
    const UChar *p;
5386
    OnigCodePoint code;
5387
5388
    p = s;
5389
    while (p < end) {
5390
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5391
      if (code >= 0x80) {
5392
  fprintf(fp, " 0x%04x ", (int )code);
5393
      }
5394
      else {
5395
  fputc((int )code, fp);
5396
      }
5397
5398
      p += enclen(enc, p, end);
5399
    }
5400
  }
5401
  else {
5402
    while (s < end) {
5403
      fputc((int )*s, fp);
5404
      s++;
5405
    }
5406
  }
5407
5408
  fprintf(fp, "/ (%s)\n", enc->name);
5409
}
5410
#endif  /* ONIG_DEBUG */
5411
5412
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413
static void
5414
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5415
{
5416
  if (a == ONIG_INFINITE_DISTANCE)
5417
    fputs("inf", f);
5418
  else
5419
    fprintf(f, "(%"PRIuPTR")", a);
5420
5421
  fputs("-", f);
5422
5423
  if (b == ONIG_INFINITE_DISTANCE)
5424
    fputs("inf", f);
5425
  else
5426
    fprintf(f, "(%"PRIuPTR")", b);
5427
}
5428
5429
static void
5430
print_anchor(FILE* f, int anchor)
5431
{
5432
  int q = 0;
5433
5434
  fprintf(f, "[");
5435
5436
  if (anchor & ANCHOR_BEGIN_BUF) {
5437
    fprintf(f, "begin-buf");
5438
    q = 1;
5439
  }
5440
  if (anchor & ANCHOR_BEGIN_LINE) {
5441
    if (q) fprintf(f, ", ");
5442
    q = 1;
5443
    fprintf(f, "begin-line");
5444
  }
5445
  if (anchor & ANCHOR_BEGIN_POSITION) {
5446
    if (q) fprintf(f, ", ");
5447
    q = 1;
5448
    fprintf(f, "begin-pos");
5449
  }
5450
  if (anchor & ANCHOR_END_BUF) {
5451
    if (q) fprintf(f, ", ");
5452
    q = 1;
5453
    fprintf(f, "end-buf");
5454
  }
5455
  if (anchor & ANCHOR_SEMI_END_BUF) {
5456
    if (q) fprintf(f, ", ");
5457
    q = 1;
5458
    fprintf(f, "semi-end-buf");
5459
  }
5460
  if (anchor & ANCHOR_END_LINE) {
5461
    if (q) fprintf(f, ", ");
5462
    q = 1;
5463
    fprintf(f, "end-line");
5464
  }
5465
  if (anchor & ANCHOR_ANYCHAR_STAR) {
5466
    if (q) fprintf(f, ", ");
5467
    q = 1;
5468
    fprintf(f, "anychar-star");
5469
  }
5470
  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5471
    if (q) fprintf(f, ", ");
5472
    fprintf(f, "anychar-star-ml");
5473
  }
5474
5475
  fprintf(f, "]");
5476
}
5477
5478
static void
5479
print_optimize_info(FILE* f, regex_t* reg)
5480
{
5481
  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5482
                              "EXACT_IC", "MAP",
5483
                              "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5484
5485
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5486
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5487
  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5488
    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5489
  fprintf(f, "\n");
5490
5491
  if (reg->optimize) {
5492
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5493
    fprintf(f, "\n");
5494
  }
5495
  fprintf(f, "\n");
5496
5497
  if (reg->exact) {
5498
    UChar *p;
5499
    fprintf(f, "exact: [");
5500
    for (p = reg->exact; p < reg->exact_end; p++) {
5501
      fputc(*p, f);
5502
    }
5503
    fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5504
  }
5505
  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5506
    int c, i, n = 0;
5507
5508
    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5509
      if (reg->map[i]) n++;
5510
5511
    fprintf(f, "map: n=%d\n", n);
5512
    if (n > 0) {
5513
      c = 0;
5514
      fputc('[', f);
5515
      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5516
  if (reg->map[i] != 0) {
5517
    if (c > 0)  fputs(", ", f);
5518
    c++;
5519
    if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5520
        ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5521
      fputc(i, f);
5522
    else
5523
      fprintf(f, "%d", i);
5524
  }
5525
      }
5526
      fprintf(f, "]\n");
5527
    }
5528
  }
5529
}
5530
#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5531
5532
5533
extern void
5534
onig_free_body(regex_t* reg)
5535
2.29M
{
5536
2.29M
  if (IS_NOT_NULL(reg)) {
5537
2.29M
    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5538
2.29M
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5539
2.29M
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5540
2.29M
    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5541
5542
2.29M
#ifdef USE_NAMED_GROUP
5543
2.29M
    onig_names_free(reg);
5544
2.29M
#endif
5545
2.29M
  }
5546
2.29M
}
5547
5548
extern void
5549
onig_free(regex_t* reg)
5550
2.29M
{
5551
2.29M
  if (IS_NOT_NULL(reg)) {
5552
2.29M
    onig_free_body(reg);
5553
2.29M
    xfree(reg);
5554
2.29M
  }
5555
2.29M
}
5556
5557
#ifdef RUBY
5558
size_t
5559
onig_memsize(const regex_t *reg)
5560
{
5561
    size_t size = sizeof(regex_t);
5562
    if (IS_NULL(reg)) return 0;
5563
    if (IS_NOT_NULL(reg->p))                size += reg->alloc;
5564
    if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
5565
    if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5566
    if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
5567
5568
    return size;
5569
}
5570
5571
size_t
5572
onig_region_memsize(const OnigRegion *regs)
5573
{
5574
    size_t size = sizeof(*regs);
5575
    if (IS_NULL(regs)) return 0;
5576
    size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5577
    return size;
5578
}
5579
#endif
5580
5581
#define REGEX_TRANSFER(to,from) do {\
5582
  onig_free_body(to);\
5583
  xmemcpy(to, from, sizeof(regex_t));\
5584
  xfree(from);\
5585
} while (0)
5586
5587
#if 0
5588
extern void
5589
onig_transfer(regex_t* to, regex_t* from)
5590
{
5591
  REGEX_TRANSFER(to, from);
5592
}
5593
#endif
5594
5595
#ifdef ONIG_DEBUG_COMPILE
5596
static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5597
#endif
5598
#ifdef ONIG_DEBUG_PARSE_TREE
5599
static void print_tree(FILE* f, Node* node);
5600
#endif
5601
5602
#ifdef RUBY
5603
extern int
5604
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5605
       OnigErrorInfo* einfo)
5606
{
5607
  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5608
}
5609
#endif
5610
5611
#ifdef RUBY
5612
extern int
5613
onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5614
        OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5615
#else
5616
extern int
5617
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5618
       OnigErrorInfo* einfo)
5619
#endif
5620
2.29M
{
5621
2.29M
#define COMPILE_INIT_SIZE  20
5622
5623
2.29M
  int r;
5624
2.29M
  OnigDistance init_size;
5625
2.29M
  Node*  root;
5626
2.29M
  ScanEnv  scan_env = {0};
5627
2.29M
#ifdef USE_SUBEXP_CALL
5628
2.29M
  UnsetAddrList  uslist;
5629
2.29M
#endif
5630
5631
2.29M
  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5632
5633
#ifdef RUBY
5634
  scan_env.sourcefile = sourcefile;
5635
  scan_env.sourceline = sourceline;
5636
#endif
5637
5638
#ifdef ONIG_DEBUG
5639
  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5640
#endif
5641
5642
2.29M
  if (reg->alloc == 0) {
5643
2.29M
    init_size = (pattern_end - pattern) * 2;
5644
2.29M
    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5645
2.29M
    r = BBUF_INIT(reg, init_size);
5646
2.29M
    if (r != 0) goto end;
5647
2.29M
  }
5648
0
  else
5649
0
    reg->used = 0;
5650
5651
2.29M
  reg->num_mem            = 0;
5652
2.29M
  reg->num_repeat         = 0;
5653
2.29M
  reg->num_null_check     = 0;
5654
2.29M
  reg->repeat_range_alloc = 0;
5655
2.29M
  reg->repeat_range       = (OnigRepeatRange* )NULL;
5656
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5657
  reg->num_comb_exp_check = 0;
5658
#endif
5659
5660
2.29M
  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5661
2.29M
  if (r != 0) goto err;
5662
5663
#ifdef ONIG_DEBUG_PARSE_TREE
5664
# if 0
5665
  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5666
  print_tree(stderr, root);
5667
# endif
5668
#endif
5669
5670
2.29M
#ifdef USE_NAMED_GROUP
5671
  /* mixed use named group and no-named group */
5672
2.29M
  if (scan_env.num_named > 0 &&
5673
223k
      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5674
223k
      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5675
223k
    if (scan_env.num_named != scan_env.num_mem)
5676
3.06k
      r = disable_noname_group_capture(&root, reg, &scan_env);
5677
220k
    else
5678
220k
      r = numbered_ref_check(root);
5679
5680
223k
    if (r != 0) goto err;
5681
223k
  }
5682
2.29M
#endif
5683
5684
2.29M
#ifdef USE_SUBEXP_CALL
5685
2.29M
  if (scan_env.num_call > 0) {
5686
1.15k
    r = unset_addr_list_init(&uslist, scan_env.num_call);
5687
1.15k
    if (r != 0) goto err;
5688
1.15k
    scan_env.unset_addr_list = &uslist;
5689
1.15k
    r = setup_subexp_call(root, &scan_env);
5690
1.15k
    if (r != 0) goto err_unset;
5691
1.15k
    r = subexp_recursive_check_trav(root, &scan_env);
5692
1.15k
    if (r  < 0) goto err_unset;
5693
1.15k
    r = subexp_inf_recursive_check_trav(root, &scan_env);
5694
1.15k
    if (r != 0) goto err_unset;
5695
5696
1.15k
    reg->num_call = scan_env.num_call;
5697
1.15k
  }
5698
2.28M
  else
5699
2.28M
    reg->num_call = 0;
5700
2.29M
#endif
5701
5702
2.29M
  r = setup_tree(root, reg, 0, &scan_env);
5703
2.29M
  if (r != 0) goto err_unset;
5704
5705
#ifdef ONIG_DEBUG_PARSE_TREE
5706
  print_tree(stderr, root);
5707
#endif
5708
5709
2.29M
  reg->capture_history  = scan_env.capture_history;
5710
2.29M
  reg->bt_mem_start     = scan_env.bt_mem_start;
5711
2.29M
  reg->bt_mem_start    |= reg->capture_history;
5712
2.29M
  if (IS_FIND_CONDITION(reg->options))
5713
0
    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5714
2.29M
  else {
5715
2.29M
    reg->bt_mem_end  = scan_env.bt_mem_end;
5716
2.29M
    reg->bt_mem_end |= reg->capture_history;
5717
2.29M
  }
5718
5719
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5720
  if (scan_env.backrefed_mem == 0
5721
# ifdef USE_SUBEXP_CALL
5722
      || scan_env.num_call == 0
5723
# endif
5724
      ) {
5725
    setup_comb_exp_check(root, 0, &scan_env);
5726
# ifdef USE_SUBEXP_CALL
5727
    if (scan_env.has_recursion != 0) {
5728
      scan_env.num_comb_exp_check = 0;
5729
    }
5730
    else
5731
# endif
5732
    if (scan_env.comb_exp_max_regnum > 0) {
5733
      int i;
5734
      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5735
  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5736
    scan_env.num_comb_exp_check = 0;
5737
    break;
5738
  }
5739
      }
5740
    }
5741
  }
5742
5743
  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5744
#endif
5745
5746
2.29M
  clear_optimize_info(reg);
5747
2.29M
#ifndef ONIG_DONT_OPTIMIZE
5748
2.29M
  r = set_optimize_info_from_tree(root, reg, &scan_env);
5749
2.29M
  if (r != 0) goto err_unset;
5750
2.29M
#endif
5751
5752
2.29M
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5753
757
    xfree(scan_env.mem_nodes_dynamic);
5754
757
    scan_env.mem_nodes_dynamic = (Node** )NULL;
5755
757
  }
5756
5757
2.29M
  r = compile_tree(root, reg);
5758
2.29M
  if (r == 0) {
5759
2.29M
    r = add_opcode(reg, OP_END);
5760
2.29M
#ifdef USE_SUBEXP_CALL
5761
2.29M
    if (scan_env.num_call > 0) {
5762
1.14k
      r = unset_addr_list_fix(&uslist, reg);
5763
1.14k
      unset_addr_list_end(&uslist);
5764
1.14k
      if (r) goto err;
5765
1.14k
    }
5766
2.29M
#endif
5767
5768
2.29M
    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5769
2.75k
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5770
2.28M
    else {
5771
2.28M
      if (reg->bt_mem_start != 0)
5772
2.63k
  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5773
2.28M
      else
5774
2.28M
  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5775
2.28M
    }
5776
2.29M
  }
5777
3
#ifdef USE_SUBEXP_CALL
5778
3
  else if (scan_env.num_call > 0) {
5779
0
    unset_addr_list_end(&uslist);
5780
0
  }
5781
2.29M
#endif
5782
2.29M
  onig_node_free(root);
5783
5784
#ifdef ONIG_DEBUG_COMPILE
5785
# ifdef USE_NAMED_GROUP
5786
  onig_print_names(stderr, reg);
5787
# endif
5788
  print_compiled_byte_code_list(stderr, reg);
5789
#endif
5790
5791
2.29M
 end:
5792
2.29M
  return r;
5793
5794
16
 err_unset:
5795
16
#ifdef USE_SUBEXP_CALL
5796
16
  if (scan_env.num_call > 0) {
5797
6
    unset_addr_list_end(&uslist);
5798
6
  }
5799
16
#endif
5800
133
 err:
5801
133
  if (IS_NOT_NULL(scan_env.error)) {
5802
16
    if (IS_NOT_NULL(einfo)) {
5803
16
      einfo->enc     = scan_env.enc;
5804
16
      einfo->par     = scan_env.error;
5805
16
      einfo->par_end = scan_env.error_end;
5806
16
    }
5807
16
  }
5808
5809
133
  onig_node_free(root);
5810
133
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5811
41
      xfree(scan_env.mem_nodes_dynamic);
5812
133
  return r;
5813
16
}
5814
5815
static int onig_inited = 0;
5816
5817
extern int
5818
onig_reg_init(regex_t* reg, OnigOptionType option,
5819
        OnigCaseFoldType case_fold_flag,
5820
        OnigEncoding enc, const OnigSyntaxType* syntax)
5821
2.29M
{
5822
2.29M
  if (! onig_inited)
5823
16
    onig_init();
5824
5825
2.29M
  if (IS_NULL(reg))
5826
0
    return ONIGERR_INVALID_ARGUMENT;
5827
5828
2.29M
  (reg)->exact            = (UChar* )NULL;
5829
2.29M
  (reg)->chain            = (regex_t* )NULL;
5830
2.29M
  (reg)->p                = (UChar* )NULL;
5831
2.29M
  (reg)->name_table       = (void* )NULL;
5832
2.29M
  (reg)->repeat_range     = (OnigRepeatRange* )NULL;
5833
5834
2.29M
  if (ONIGENC_IS_UNDEF(enc))
5835
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5836
5837
2.29M
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5838
2.29M
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5839
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5840
0
  }
5841
5842
2.29M
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5843
0
    option |= syntax->options;
5844
0
    option &= ~ONIG_OPTION_SINGLELINE;
5845
0
  }
5846
2.29M
  else
5847
2.29M
    option |= syntax->options;
5848
5849
2.29M
  (reg)->enc              = enc;
5850
2.29M
  (reg)->options          = option;
5851
2.29M
  (reg)->syntax           = syntax;
5852
2.29M
  (reg)->optimize         = 0;
5853
5854
2.29M
  (reg)->alloc            = 0;
5855
2.29M
  (reg)->used             = 0;
5856
5857
2.29M
  (reg)->case_fold_flag   = case_fold_flag;
5858
2.29M
  return 0;
5859
2.29M
}
5860
5861
extern int
5862
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5863
          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5864
          const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5865
0
{
5866
0
  int r;
5867
5868
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5869
0
  if (r) return r;
5870
5871
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
5872
0
  return r;
5873
0
}
5874
5875
extern int
5876
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5877
    OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5878
    OnigErrorInfo* einfo)
5879
2.29M
{
5880
2.29M
  int r;
5881
5882
2.29M
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5883
2.29M
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5884
5885
2.29M
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5886
2.29M
  if (r) goto err;
5887
5888
2.29M
  r = onig_compile(*reg, pattern, pattern_end, einfo);
5889
2.29M
  if (r) {
5890
136
  err:
5891
136
    onig_free(*reg);
5892
136
    *reg = NULL;
5893
136
  }
5894
2.29M
  return r;
5895
2.29M
}
5896
5897
extern int
5898
onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5899
0
{
5900
0
  return onig_init();
5901
0
}
5902
5903
extern int
5904
onig_init(void)
5905
88.7k
{
5906
88.7k
  if (onig_inited != 0)
5907
86.9k
    return 0;
5908
5909
1.86k
  onig_inited = 1;
5910
5911
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5912
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
5913
#endif
5914
5915
1.86k
  onigenc_init();
5916
  /* onigenc_set_default_caseconv_table((UChar* )0); */
5917
5918
#ifdef ONIG_DEBUG_STATISTICS
5919
  onig_statistics_init();
5920
#endif
5921
5922
1.86k
  return 0;
5923
88.7k
}
5924
5925
5926
static OnigEndCallListItemType* EndCallTop;
5927
5928
extern void onig_add_end_call(void (*func)(void))
5929
0
{
5930
0
  OnigEndCallListItemType* item;
5931
5932
0
  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5933
0
  if (item == 0) return ;
5934
5935
0
  item->next = EndCallTop;
5936
0
  item->func = func;
5937
5938
0
  EndCallTop = item;
5939
0
}
5940
5941
static void
5942
exec_end_call_list(void)
5943
1.84k
{
5944
1.84k
  OnigEndCallListItemType* prev;
5945
1.84k
  void (*func)(void);
5946
5947
1.84k
  while (EndCallTop != 0) {
5948
0
    func = EndCallTop->func;
5949
0
    (*func)();
5950
5951
0
    prev = EndCallTop;
5952
0
    EndCallTop = EndCallTop->next;
5953
0
    xfree(prev);
5954
0
  }
5955
1.84k
}
5956
5957
extern int
5958
onig_end(void)
5959
1.84k
{
5960
1.84k
  exec_end_call_list();
5961
5962
#ifdef ONIG_DEBUG_STATISTICS
5963
  onig_print_statistics(stderr);
5964
#endif
5965
5966
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5967
  _CrtDumpMemoryLeaks();
5968
#endif
5969
5970
1.84k
  onig_inited = 0;
5971
5972
1.84k
  return 0;
5973
1.84k
}
5974
5975
extern int
5976
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5977
2.86M
{
5978
2.86M
  OnigCodePoint n, *data;
5979
2.86M
  OnigCodePoint low, high, x;
5980
5981
2.86M
  GET_CODE_POINT(n, p);
5982
2.86M
  data = (OnigCodePoint* )p;
5983
2.86M
  data++;
5984
5985
13.1M
  for (low = 0, high = n; low < high; ) {
5986
10.3M
    x = (low + high) >> 1;
5987
10.3M
    if (code > data[x * 2 + 1])
5988
5.41M
      low = x + 1;
5989
4.90M
    else
5990
4.90M
      high = x;
5991
10.3M
  }
5992
5993
2.86M
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5994
2.86M
}
5995
5996
extern int
5997
onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5998
8.72M
{
5999
8.72M
  int found;
6000
6001
8.72M
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6002
7.17M
    if (IS_NULL(cc->mbuf)) {
6003
4.31M
      found = 0;
6004
4.31M
    }
6005
2.85M
    else {
6006
2.85M
      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6007
2.85M
    }
6008
7.17M
  }
6009
1.54M
  else {
6010
1.54M
    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6011
1.54M
  }
6012
6013
8.72M
  if (IS_NCCLASS_NOT(cc))
6014
2.10M
    return !found;
6015
6.62M
  else
6016
6.62M
    return found;
6017
8.72M
}
6018
6019
extern int
6020
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6021
8.72M
{
6022
8.72M
  int len;
6023
6024
8.72M
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6025
0
    len = 2;
6026
0
  }
6027
8.72M
  else {
6028
8.72M
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6029
8.72M
  }
6030
8.72M
  return onig_is_code_in_cc_len(len, code, cc);
6031
8.72M
}
6032
6033
6034
#ifdef ONIG_DEBUG
6035
6036
/* arguments type */
6037
# define ARG_SPECIAL     -1
6038
# define ARG_NON          0
6039
# define ARG_RELADDR      1
6040
# define ARG_ABSADDR      2
6041
# define ARG_LENGTH       3
6042
# define ARG_MEMNUM       4
6043
# define ARG_OPTION       5
6044
# define ARG_STATE_CHECK  6
6045
6046
OnigOpInfoType OnigOpInfo[] = {
6047
  { OP_FINISH,            "finish",          ARG_NON },
6048
  { OP_END,               "end",             ARG_NON },
6049
  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
6050
  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
6051
  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
6052
  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
6053
  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
6054
  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
6055
  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
6056
  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
6057
  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
6058
  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
6059
  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
6060
  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
6061
  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
6062
  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
6063
  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
6064
  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
6065
  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
6066
  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
6067
  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
6068
  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
6069
  { OP_ANYCHAR,           "anychar",         ARG_NON },
6070
  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
6071
  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
6072
  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
6073
  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6074
  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6075
  { OP_WORD,                "word",            ARG_NON },
6076
  { OP_NOT_WORD,            "not-word",        ARG_NON },
6077
  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
6078
  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
6079
  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
6080
  { OP_WORD_END,            "word-end",        ARG_NON },
6081
  { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
6082
  { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
6083
  { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
6084
  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6085
  { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
6086
  { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
6087
  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
6088
  { OP_END_BUF,             "end-buf",         ARG_NON },
6089
  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
6090
  { OP_END_LINE,            "end-line",        ARG_NON },
6091
  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
6092
  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
6093
  { OP_BACKREF1,            "backref1",             ARG_NON },
6094
  { OP_BACKREF2,            "backref2",             ARG_NON },
6095
  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
6096
  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
6097
  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
6098
  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
6099
  { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
6100
  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
6101
  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
6102
  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
6103
  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
6104
  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
6105
  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
6106
  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
6107
  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
6108
  { OP_KEEP,                "keep",                 ARG_NON },
6109
  { OP_FAIL,                "fail",                 ARG_NON },
6110
  { OP_JUMP,                "jump",                 ARG_RELADDR },
6111
  { OP_PUSH,                "push",                 ARG_RELADDR },
6112
  { OP_POP,                 "pop",                  ARG_NON },
6113
  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
6114
  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
6115
  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
6116
  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
6117
  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
6118
  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
6119
  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
6120
  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
6121
  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
6122
  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
6123
  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
6124
  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
6125
  { OP_PUSH_POS,             "push-pos",             ARG_NON },
6126
  { OP_POP_POS,              "pop-pos",              ARG_NON },
6127
  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
6128
  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
6129
  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
6130
  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
6131
  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
6132
  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6133
  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6134
  { OP_PUSH_ABSENT_POS,      "push-absent-pos",      ARG_NON },
6135
  { OP_ABSENT,               "absent",               ARG_RELADDR },
6136
  { OP_ABSENT_END,           "absent-end",           ARG_NON },
6137
  { OP_CALL,                 "call",                 ARG_ABSADDR },
6138
  { OP_RETURN,               "return",               ARG_NON },
6139
  { OP_CONDITION,            "condition",            ARG_SPECIAL },
6140
  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
6141
  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6142
  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
6143
  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
6144
  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6145
    "state-check-anychar-ml*", ARG_STATE_CHECK },
6146
  { -1, "", ARG_NON }
6147
};
6148
6149
static const char*
6150
op2name(int opcode)
6151
{
6152
  int i;
6153
6154
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6155
    if (opcode == OnigOpInfo[i].opcode)
6156
      return OnigOpInfo[i].name;
6157
  }
6158
  return "";
6159
}
6160
6161
static int
6162
op2arg_type(int opcode)
6163
{
6164
  int i;
6165
6166
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6167
    if (opcode == OnigOpInfo[i].opcode)
6168
      return OnigOpInfo[i].arg_type;
6169
  }
6170
  return ARG_SPECIAL;
6171
}
6172
6173
# ifdef ONIG_DEBUG_PARSE_TREE
6174
static void
6175
Indent(FILE* f, int indent)
6176
{
6177
  int i;
6178
  for (i = 0; i < indent; i++) putc(' ', f);
6179
}
6180
# endif /* ONIG_DEBUG_PARSE_TREE */
6181
6182
static void
6183
p_string(FILE* f, ptrdiff_t len, UChar* s)
6184
{
6185
  fputs(":", f);
6186
  while (len-- > 0) { fputc(*s++, f); }
6187
}
6188
6189
static void
6190
p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6191
{
6192
  int x = len * mb_len;
6193
6194
  fprintf(f, ":%d:", len);
6195
  while (x-- > 0) { fputc(*s++, f); }
6196
}
6197
6198
extern void
6199
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6200
                              OnigEncoding enc)
6201
{
6202
  int i, n, arg_type;
6203
  RelAddrType addr;
6204
  LengthType len;
6205
  MemNumType mem;
6206
  StateCheckNumType scn;
6207
  OnigCodePoint code;
6208
  UChar *q;
6209
6210
  fprintf(f, "[%s", op2name(*bp));
6211
  arg_type = op2arg_type(*bp);
6212
  if (arg_type != ARG_SPECIAL) {
6213
    bp++;
6214
    switch (arg_type) {
6215
    case ARG_NON:
6216
      break;
6217
    case ARG_RELADDR:
6218
      GET_RELADDR_INC(addr, bp);
6219
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6220
      break;
6221
    case ARG_ABSADDR:
6222
      GET_ABSADDR_INC(addr, bp);
6223
      fprintf(f, ":(%d)", addr);
6224
      break;
6225
    case ARG_LENGTH:
6226
      GET_LENGTH_INC(len, bp);
6227
      fprintf(f, ":%d", len);
6228
      break;
6229
    case ARG_MEMNUM:
6230
      mem = *((MemNumType* )bp);
6231
      bp += SIZE_MEMNUM;
6232
      fprintf(f, ":%d", mem);
6233
      break;
6234
    case ARG_OPTION:
6235
      {
6236
  OnigOptionType option = *((OnigOptionType* )bp);
6237
  bp += SIZE_OPTION;
6238
  fprintf(f, ":%d", option);
6239
      }
6240
      break;
6241
6242
    case ARG_STATE_CHECK:
6243
      scn = *((StateCheckNumType* )bp);
6244
      bp += SIZE_STATE_CHECK_NUM;
6245
      fprintf(f, ":%d", scn);
6246
      break;
6247
    }
6248
  }
6249
  else {
6250
    switch (*bp++) {
6251
    case OP_EXACT1:
6252
    case OP_ANYCHAR_STAR_PEEK_NEXT:
6253
    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6254
      p_string(f, 1, bp++); break;
6255
    case OP_EXACT2:
6256
      p_string(f, 2, bp); bp += 2; break;
6257
    case OP_EXACT3:
6258
      p_string(f, 3, bp); bp += 3; break;
6259
    case OP_EXACT4:
6260
      p_string(f, 4, bp); bp += 4; break;
6261
    case OP_EXACT5:
6262
      p_string(f, 5, bp); bp += 5; break;
6263
    case OP_EXACTN:
6264
      GET_LENGTH_INC(len, bp);
6265
      p_len_string(f, len, 1, bp);
6266
      bp += len;
6267
      break;
6268
6269
    case OP_EXACTMB2N1:
6270
      p_string(f, 2, bp); bp += 2; break;
6271
    case OP_EXACTMB2N2:
6272
      p_string(f, 4, bp); bp += 4; break;
6273
    case OP_EXACTMB2N3:
6274
      p_string(f, 6, bp); bp += 6; break;
6275
    case OP_EXACTMB2N:
6276
      GET_LENGTH_INC(len, bp);
6277
      p_len_string(f, len, 2, bp);
6278
      bp += len * 2;
6279
      break;
6280
    case OP_EXACTMB3N:
6281
      GET_LENGTH_INC(len, bp);
6282
      p_len_string(f, len, 3, bp);
6283
      bp += len * 3;
6284
      break;
6285
    case OP_EXACTMBN:
6286
      {
6287
  int mb_len;
6288
6289
  GET_LENGTH_INC(mb_len, bp);
6290
  GET_LENGTH_INC(len, bp);
6291
  fprintf(f, ":%d:%d:", mb_len, len);
6292
  n = len * mb_len;
6293
  while (n-- > 0) { fputc(*bp++, f); }
6294
      }
6295
      break;
6296
6297
    case OP_EXACT1_IC:
6298
      len = enclen(enc, bp, bpend);
6299
      p_string(f, len, bp);
6300
      bp += len;
6301
      break;
6302
    case OP_EXACTN_IC:
6303
      GET_LENGTH_INC(len, bp);
6304
      p_len_string(f, len, 1, bp);
6305
      bp += len;
6306
      break;
6307
6308
    case OP_CCLASS:
6309
      n = bitset_on_num((BitSetRef )bp);
6310
      bp += SIZE_BITSET;
6311
      fprintf(f, ":%d", n);
6312
      break;
6313
6314
    case OP_CCLASS_NOT:
6315
      n = bitset_on_num((BitSetRef )bp);
6316
      bp += SIZE_BITSET;
6317
      fprintf(f, ":%d", n);
6318
      break;
6319
6320
    case OP_CCLASS_MB:
6321
    case OP_CCLASS_MB_NOT:
6322
      GET_LENGTH_INC(len, bp);
6323
      q = bp;
6324
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6325
      ALIGNMENT_RIGHT(q);
6326
# endif
6327
      GET_CODE_POINT(code, q);
6328
      bp += len;
6329
      fprintf(f, ":%d:%d", (int )code, len);
6330
      break;
6331
6332
    case OP_CCLASS_MIX:
6333
    case OP_CCLASS_MIX_NOT:
6334
      n = bitset_on_num((BitSetRef )bp);
6335
      bp += SIZE_BITSET;
6336
      GET_LENGTH_INC(len, bp);
6337
      q = bp;
6338
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6339
      ALIGNMENT_RIGHT(q);
6340
# endif
6341
      GET_CODE_POINT(code, q);
6342
      bp += len;
6343
      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6344
      break;
6345
6346
    case OP_BACKREFN_IC:
6347
      mem = *((MemNumType* )bp);
6348
      bp += SIZE_MEMNUM;
6349
      fprintf(f, ":%d", mem);
6350
      break;
6351
6352
    case OP_BACKREF_MULTI_IC:
6353
    case OP_BACKREF_MULTI:
6354
      fputs(" ", f);
6355
      GET_LENGTH_INC(len, bp);
6356
      for (i = 0; i < len; i++) {
6357
  GET_MEMNUM_INC(mem, bp);
6358
  if (i > 0) fputs(", ", f);
6359
  fprintf(f, "%d", mem);
6360
      }
6361
      break;
6362
6363
    case OP_BACKREF_WITH_LEVEL:
6364
      {
6365
  OnigOptionType option;
6366
  LengthType level;
6367
6368
  GET_OPTION_INC(option, bp);
6369
  fprintf(f, ":%d", option);
6370
  GET_LENGTH_INC(level, bp);
6371
  fprintf(f, ":%d", level);
6372
6373
  fputs(" ", f);
6374
  GET_LENGTH_INC(len, bp);
6375
  for (i = 0; i < len; i++) {
6376
    GET_MEMNUM_INC(mem, bp);
6377
    if (i > 0) fputs(", ", f);
6378
    fprintf(f, "%d", mem);
6379
  }
6380
      }
6381
      break;
6382
6383
    case OP_REPEAT:
6384
    case OP_REPEAT_NG:
6385
      {
6386
  mem = *((MemNumType* )bp);
6387
  bp += SIZE_MEMNUM;
6388
  addr = *((RelAddrType* )bp);
6389
  bp += SIZE_RELADDR;
6390
  fprintf(f, ":%d:%d", mem, addr);
6391
      }
6392
      break;
6393
6394
    case OP_PUSH_OR_JUMP_EXACT1:
6395
    case OP_PUSH_IF_PEEK_NEXT:
6396
      addr = *((RelAddrType* )bp);
6397
      bp += SIZE_RELADDR;
6398
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6399
      p_string(f, 1, bp);
6400
      bp += 1;
6401
      break;
6402
6403
    case OP_LOOK_BEHIND:
6404
      GET_LENGTH_INC(len, bp);
6405
      fprintf(f, ":%d", len);
6406
      break;
6407
6408
    case OP_PUSH_LOOK_BEHIND_NOT:
6409
      GET_RELADDR_INC(addr, bp);
6410
      GET_LENGTH_INC(len, bp);
6411
      fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6412
      break;
6413
6414
    case OP_STATE_CHECK_PUSH:
6415
    case OP_STATE_CHECK_PUSH_OR_JUMP:
6416
      scn = *((StateCheckNumType* )bp);
6417
      bp += SIZE_STATE_CHECK_NUM;
6418
      addr = *((RelAddrType* )bp);
6419
      bp += SIZE_RELADDR;
6420
      fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6421
      break;
6422
6423
    case OP_CONDITION:
6424
      GET_MEMNUM_INC(mem, bp);
6425
      GET_RELADDR_INC(addr, bp);
6426
      fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6427
      break;
6428
6429
    default:
6430
      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6431
        bp[-1]);
6432
    }
6433
  }
6434
  fputs("]", f);
6435
  if (nextp) *nextp = bp;
6436
}
6437
6438
# ifdef ONIG_DEBUG_COMPILE
6439
static void
6440
print_compiled_byte_code_list(FILE* f, regex_t* reg)
6441
{
6442
  int ncode;
6443
  UChar* bp = reg->p;
6444
  UChar* end = reg->p + reg->used;
6445
6446
  fprintf(f, "code length: %d", reg->used);
6447
6448
  ncode = -1;
6449
  while (bp < end) {
6450
    ncode++;
6451
    if (ncode % 5 == 0)
6452
      fprintf(f, "\n%ld:", bp - reg->p);
6453
    else
6454
      fprintf(f, " %ld:", bp - reg->p);
6455
    onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6456
  }
6457
6458
  fprintf(f, "\n");
6459
}
6460
# endif /* ONIG_DEBUG_COMPILE */
6461
6462
# ifdef ONIG_DEBUG_PARSE_TREE
6463
static void
6464
print_indent_tree(FILE* f, Node* node, int indent)
6465
{
6466
  int i, type, container_p = 0;
6467
  int add = 3;
6468
  UChar* p;
6469
6470
  Indent(f, indent);
6471
  if (IS_NULL(node)) {
6472
    fprintf(f, "ERROR: null node!!!\n");
6473
    exit (0);
6474
  }
6475
6476
  type = NTYPE(node);
6477
  switch (type) {
6478
  case NT_LIST:
6479
  case NT_ALT:
6480
    if (NTYPE(node) == NT_LIST)
6481
      fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6482
    else
6483
      fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6484
6485
    print_indent_tree(f, NCAR(node), indent + add);
6486
    while (IS_NOT_NULL(node = NCDR(node))) {
6487
      if (NTYPE(node) != type) {
6488
  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6489
  exit(0);
6490
      }
6491
      print_indent_tree(f, NCAR(node), indent + add);
6492
    }
6493
    break;
6494
6495
  case NT_STR:
6496
    fprintf(f, "<string%s:%"PRIxPTR">",
6497
      (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6498
    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6499
      if (*p >= 0x20 && *p < 0x7f)
6500
  fputc(*p, f);
6501
      else {
6502
  fprintf(f, " 0x%02x", *p);
6503
      }
6504
    }
6505
    break;
6506
6507
  case NT_CCLASS:
6508
    fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6509
    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6510
    if (NCCLASS(node)->mbuf) {
6511
      BBuf* bbuf = NCCLASS(node)->mbuf;
6512
      OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6513
      OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6514
      fprintf(f, "%d", *data++);
6515
      for (; data < end; data+=2) {
6516
  fprintf(f, ",");
6517
  fprintf(f, "%04x-%04x", data[0], data[1]);
6518
      }
6519
    }
6520
    break;
6521
6522
  case NT_CTYPE:
6523
    fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6524
    switch (NCTYPE(node)->ctype) {
6525
    case ONIGENC_CTYPE_WORD:
6526
      if (NCTYPE(node)->not != 0)
6527
  fputs("not word",       f);
6528
      else
6529
  fputs("word",           f);
6530
      break;
6531
6532
    default:
6533
      fprintf(f, "ERROR: undefined ctype.\n");
6534
      exit(0);
6535
    }
6536
    break;
6537
6538
  case NT_CANY:
6539
    fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6540
    break;
6541
6542
  case NT_ANCHOR:
6543
    fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6544
    switch (NANCHOR(node)->type) {
6545
    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6546
    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6547
    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6548
    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6549
    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6550
    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6551
6552
    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6553
    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6554
#  ifdef USE_WORD_BEGIN_END
6555
    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6556
    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6557
#  endif
6558
    case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
6559
    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
6560
    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
6561
    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6562
    case ANCHOR_KEEP:            fputs("keep",f);            break;
6563
6564
    default:
6565
      fprintf(f, "ERROR: undefined anchor type.\n");
6566
      break;
6567
    }
6568
    break;
6569
6570
  case NT_BREF:
6571
    {
6572
      int* p;
6573
      BRefNode* br = NBREF(node);
6574
      p = BACKREFS_P(br);
6575
      fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6576
      for (i = 0; i < br->back_num; i++) {
6577
  if (i > 0) fputs(", ", f);
6578
  fprintf(f, "%d", p[i]);
6579
      }
6580
    }
6581
    break;
6582
6583
#  ifdef USE_SUBEXP_CALL
6584
  case NT_CALL:
6585
    {
6586
      CallNode* cn = NCALL(node);
6587
      fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6588
      p_string(f, cn->name_end - cn->name, cn->name);
6589
    }
6590
    break;
6591
#  endif
6592
6593
  case NT_QTFR:
6594
    fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6595
      NQTFR(node)->lower, NQTFR(node)->upper,
6596
      (NQTFR(node)->greedy ? "" : "?"));
6597
    print_indent_tree(f, NQTFR(node)->target, indent + add);
6598
    break;
6599
6600
  case NT_ENCLOSE:
6601
    fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6602
    switch (NENCLOSE(node)->type) {
6603
    case ENCLOSE_OPTION:
6604
      fprintf(f, "option:%d", NENCLOSE(node)->option);
6605
      break;
6606
    case ENCLOSE_MEMORY:
6607
      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6608
      break;
6609
    case ENCLOSE_STOP_BACKTRACK:
6610
      fprintf(f, "stop-bt");
6611
      break;
6612
    case ENCLOSE_CONDITION:
6613
      fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6614
      break;
6615
    case ENCLOSE_ABSENT:
6616
      fprintf(f, "absent");
6617
      break;
6618
6619
    default:
6620
      break;
6621
    }
6622
    fprintf(f, "\n");
6623
    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6624
    break;
6625
6626
  default:
6627
    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6628
    break;
6629
  }
6630
6631
  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6632
      type != NT_ENCLOSE)
6633
    fprintf(f, "\n");
6634
6635
  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6636
6637
  fflush(f);
6638
}
6639
6640
static void
6641
print_tree(FILE* f, Node* node)
6642
{
6643
  print_indent_tree(f, node, 0);
6644
}
6645
# endif /* ONIG_DEBUG_PARSE_TREE */
6646
#endif /* ONIG_DEBUG */