Coverage Report

Created: 2023-01-10 06:17

/src/fluent-bit/lib/onigmo/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regparse.h"
32
33
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35
extern OnigCaseFoldType
36
onig_get_default_case_fold_flag(void)
37
0
{
38
0
  return OnigDefaultCaseFoldFlag;
39
0
}
40
41
extern int
42
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43
0
{
44
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
45
0
  return 0;
46
0
}
47
48
49
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51
#endif
52
53
#if 0
54
static UChar*
55
str_dup(UChar* s, UChar* end)
56
{
57
  ptrdiff_t len = end - s;
58
59
  if (len > 0) {
60
    UChar* r = (UChar* )xmalloc(len + 1);
61
    CHECK_NULL_RETURN(r);
62
    xmemcpy(r, s, len);
63
    r[len] = (UChar )0;
64
    return r;
65
  }
66
  else return NULL;
67
}
68
#endif
69
70
static void
71
swap_node(Node* a, Node* b)
72
5.14k
{
73
5.14k
  Node c;
74
5.14k
  c = *a; *a = *b; *b = c;
75
76
5.14k
  if (NTYPE(a) == NT_STR) {
77
0
    StrNode* sn = NSTR(a);
78
0
    if (sn->capa == 0) {
79
0
      size_t len = sn->end - sn->s;
80
0
      sn->s   = sn->buf;
81
0
      sn->end = sn->s + len;
82
0
    }
83
0
  }
84
85
5.14k
  if (NTYPE(b) == NT_STR) {
86
0
    StrNode* sn = NSTR(b);
87
0
    if (sn->capa == 0) {
88
0
      size_t len = sn->end - sn->s;
89
0
      sn->s   = sn->buf;
90
0
      sn->end = sn->s + len;
91
0
    }
92
0
  }
93
5.14k
}
94
95
static OnigDistance
96
distance_add(OnigDistance d1, OnigDistance d2)
97
156k
{
98
156k
  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99
49.1k
    return ONIG_INFINITE_DISTANCE;
100
107k
  else {
101
107k
    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102
0
    else return ONIG_INFINITE_DISTANCE;
103
107k
  }
104
156k
}
105
106
static OnigDistance
107
distance_multiply(OnigDistance d, int m)
108
13.0k
{
109
13.0k
  if (m == 0) return 0;
110
111
8.31k
  if (d < ONIG_INFINITE_DISTANCE / m)
112
8.31k
    return d * m;
113
0
  else
114
0
    return ONIG_INFINITE_DISTANCE;
115
8.31k
}
116
117
static int
118
bitset_is_empty(BitSetRef bs)
119
0
{
120
0
  int i;
121
0
  for (i = 0; i < BITSET_SIZE; i++) {
122
0
    if (bs[i] != 0) return 0;
123
0
  }
124
0
  return 1;
125
0
}
126
127
#ifdef ONIG_DEBUG
128
static int
129
bitset_on_num(BitSetRef bs)
130
{
131
  int i, n;
132
133
  n = 0;
134
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135
    if (BITSET_AT(bs, i)) n++;
136
  }
137
  return n;
138
}
139
#endif
140
141
extern int
142
onig_bbuf_init(BBuf* buf, OnigDistance size)
143
14.2k
{
144
14.2k
  if (size <= 0) {
145
0
    size   = 0;
146
0
    buf->p = NULL;
147
0
  }
148
14.2k
  else {
149
14.2k
    buf->p = (UChar* )xmalloc(size);
150
14.2k
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151
14.2k
  }
152
153
14.2k
  buf->alloc = (unsigned int )size;
154
14.2k
  buf->used  = 0;
155
14.2k
  return 0;
156
14.2k
}
157
158
159
#ifdef USE_SUBEXP_CALL
160
161
static int
162
unset_addr_list_init(UnsetAddrList* uslist, int size)
163
0
{
164
0
  UnsetAddr* p;
165
166
0
  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167
0
  CHECK_NULL_RETURN_MEMERR(p);
168
0
  uslist->num   = 0;
169
0
  uslist->alloc = size;
170
0
  uslist->us    = p;
171
0
  return 0;
172
0
}
173
174
static void
175
unset_addr_list_end(UnsetAddrList* uslist)
176
0
{
177
0
  if (IS_NOT_NULL(uslist->us))
178
0
    xfree(uslist->us);
179
0
}
180
181
static int
182
unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183
0
{
184
0
  UnsetAddr* p;
185
0
  int size;
186
187
0
  if (uslist->num >= uslist->alloc) {
188
0
    size = uslist->alloc * 2;
189
0
    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190
0
    CHECK_NULL_RETURN_MEMERR(p);
191
0
    uslist->alloc = size;
192
0
    uslist->us    = p;
193
0
  }
194
195
0
  uslist->us[uslist->num].offset = offset;
196
0
  uslist->us[uslist->num].target = node;
197
0
  uslist->num++;
198
0
  return 0;
199
0
}
200
#endif /* USE_SUBEXP_CALL */
201
202
203
static int
204
add_opcode(regex_t* reg, int opcode)
205
92.6k
{
206
92.6k
  BBUF_ADD1(reg, opcode);
207
92.6k
  return 0;
208
92.6k
}
209
210
#ifdef USE_COMBINATION_EXPLOSION_CHECK
211
static int
212
add_state_check_num(regex_t* reg, int num)
213
{
214
  StateCheckNumType n = (StateCheckNumType )num;
215
216
  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217
  return 0;
218
}
219
#endif
220
221
static int
222
add_rel_addr(regex_t* reg, int addr)
223
25.7k
{
224
25.7k
  RelAddrType ra = (RelAddrType )addr;
225
226
25.7k
  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227
25.7k
  return 0;
228
25.7k
}
229
230
static int
231
add_abs_addr(regex_t* reg, int addr)
232
0
{
233
0
  AbsAddrType ra = (AbsAddrType )addr;
234
235
0
  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236
0
  return 0;
237
0
}
238
239
static int
240
add_length(regex_t* reg, OnigDistance len)
241
7.12k
{
242
7.12k
  LengthType l = (LengthType )len;
243
244
7.12k
  BBUF_ADD(reg, &l, SIZE_LENGTH);
245
7.12k
  return 0;
246
7.12k
}
247
248
static int
249
add_mem_num(regex_t* reg, int num)
250
3.96k
{
251
3.96k
  MemNumType n = (MemNumType )num;
252
253
3.96k
  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254
3.96k
  return 0;
255
3.96k
}
256
257
#if 0
258
static int
259
add_pointer(regex_t* reg, void* addr)
260
{
261
  PointerType ptr = (PointerType )addr;
262
263
  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264
  return 0;
265
}
266
#endif
267
268
static int
269
add_option(regex_t* reg, OnigOptionType option)
270
0
{
271
0
  BBUF_ADD(reg, &option, SIZE_OPTION);
272
0
  return 0;
273
0
}
274
275
static int
276
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277
25.7k
{
278
25.7k
  int r;
279
280
25.7k
  r = add_opcode(reg, opcode);
281
25.7k
  if (r) return r;
282
25.7k
  r = add_rel_addr(reg, addr);
283
25.7k
  return r;
284
25.7k
}
285
286
static int
287
add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288
17.4k
{
289
17.4k
  BBUF_ADD(reg, bytes, len);
290
17.4k
  return 0;
291
17.4k
}
292
293
static int
294
add_bitset(regex_t* reg, BitSetRef bs)
295
15.8k
{
296
15.8k
  BBUF_ADD(reg, bs, SIZE_BITSET);
297
15.8k
  return 0;
298
15.8k
}
299
300
static int
301
add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302
0
{
303
0
  int r;
304
0
305
0
  r = add_opcode(reg, opcode);
306
0
  if (r) return r;
307
0
  r = add_option(reg, option);
308
0
  return r;
309
0
}
310
311
static int compile_length_tree(Node* node, regex_t* reg);
312
static int compile_tree(Node* node, regex_t* reg);
313
314
315
#define IS_NEED_STR_LEN_OP_EXACT(op) \
316
29.7k
   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
317
29.7k
    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
318
319
static int
320
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321
29.7k
{
322
29.7k
  int op;
323
29.7k
  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325
29.7k
  if (ignore_case) {
326
0
    switch (str_len) {
327
0
    case 1:  op = OP_EXACT1_IC; break;
328
0
    default: op = OP_EXACTN_IC; break;
329
0
    }
330
0
  }
331
29.7k
  else {
332
29.7k
    switch (mb_len) {
333
29.7k
    case 1:
334
29.7k
      switch (str_len) {
335
9.50k
      case 1:  op = OP_EXACT1; break;
336
792
      case 2:  op = OP_EXACT2; break;
337
1.18k
      case 3:  op = OP_EXACT3; break;
338
1.58k
      case 4:  op = OP_EXACT4; break;
339
2.37k
      case 5:  op = OP_EXACT5; break;
340
14.2k
      default: op = OP_EXACTN; break;
341
29.7k
      }
342
29.7k
      break;
343
344
29.7k
    case 2:
345
0
      switch (str_len) {
346
0
      case 1:  op = OP_EXACTMB2N1; break;
347
0
      case 2:  op = OP_EXACTMB2N2; break;
348
0
      case 3:  op = OP_EXACTMB2N3; break;
349
0
      default: op = OP_EXACTMB2N;  break;
350
0
      }
351
0
      break;
352
353
0
    case 3:
354
0
      op = OP_EXACTMB3N;
355
0
      break;
356
357
0
    default:
358
0
      op = OP_EXACTMBN;
359
0
      break;
360
29.7k
    }
361
29.7k
  }
362
29.7k
  return op;
363
29.7k
}
364
365
static int
366
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367
4.35k
{
368
4.35k
  int r;
369
4.35k
  int saved_num_null_check = reg->num_null_check;
370
371
4.35k
  if (empty_info != 0) {
372
0
    r = add_opcode(reg, OP_NULL_CHECK_START);
373
0
    if (r) return r;
374
0
    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375
0
    if (r) return r;
376
0
    reg->num_null_check++;
377
0
  }
378
379
4.35k
  r = compile_tree(node, reg);
380
4.35k
  if (r) return r;
381
382
4.35k
  if (empty_info != 0) {
383
0
    if (empty_info == NQ_TARGET_IS_EMPTY)
384
0
      r = add_opcode(reg, OP_NULL_CHECK_END);
385
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390
0
    if (r) return r;
391
0
    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392
0
  }
393
4.35k
  return r;
394
4.35k
}
395
396
#ifdef USE_SUBEXP_CALL
397
static int
398
compile_call(CallNode* node, regex_t* reg)
399
0
{
400
0
  int r;
401
402
0
  r = add_opcode(reg, OP_CALL);
403
0
  if (r) return r;
404
0
  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405
0
                          node->target);
406
0
  if (r) return r;
407
0
  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408
0
  return r;
409
0
}
410
#endif
411
412
static int
413
compile_tree_n_times(Node* node, int n, regex_t* reg)
414
11.8k
{
415
11.8k
  int i, r;
416
417
19.0k
  for (i = 0; i < n; i++) {
418
7.12k
    r = compile_tree(node, reg);
419
7.12k
    if (r) return r;
420
7.12k
  }
421
11.8k
  return 0;
422
11.8k
}
423
424
static int
425
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426
                          regex_t* reg ARG_UNUSED, int ignore_case)
427
13.0k
{
428
13.0k
  int len;
429
13.0k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431
13.0k
  len = SIZE_OPCODE;
432
433
13.0k
  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
434
13.0k
  if (IS_NEED_STR_LEN_OP_EXACT(op))
435
7.12k
    len += SIZE_LENGTH;
436
437
13.0k
  len += (int )byte_len;
438
13.0k
  return len;
439
13.0k
}
440
441
static int
442
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443
                   regex_t* reg, int ignore_case)
444
16.6k
{
445
16.6k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446
16.6k
  add_opcode(reg, op);
447
448
16.6k
  if (op == OP_EXACTMBN)
449
0
    add_length(reg, mb_len);
450
451
16.6k
  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452
7.12k
    if (op == OP_EXACTN_IC)
453
0
      add_length(reg, byte_len);
454
7.12k
    else
455
7.12k
      add_length(reg, byte_len / mb_len);
456
7.12k
  }
457
458
16.6k
  add_bytes(reg, s, byte_len);
459
16.6k
  return 0;
460
16.6k
}
461
462
463
static int
464
compile_length_string_node(Node* node, regex_t* reg)
465
13.0k
{
466
13.0k
  int rlen, r, len, prev_len, blen, ambig;
467
13.0k
  OnigEncoding enc = reg->enc;
468
13.0k
  UChar *p, *prev;
469
13.0k
  StrNode* sn;
470
471
13.0k
  sn = NSTR(node);
472
13.0k
  if (sn->end <= sn->s)
473
0
    return 0;
474
475
13.0k
  ambig = NSTRING_IS_AMBIG(node);
476
477
13.0k
  p = prev = sn->s;
478
13.0k
  prev_len = enclen(enc, p, sn->end);
479
13.0k
  p += prev_len;
480
13.0k
  blen = prev_len;
481
13.0k
  rlen = 0;
482
483
93.8k
  for (; p < sn->end; ) {
484
80.7k
    len = enclen(enc, p, sn->end);
485
80.7k
    if (len == prev_len || ambig) {
486
80.7k
      blen += len;
487
80.7k
    }
488
0
    else {
489
0
      r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490
0
      rlen += r;
491
0
      prev = p;
492
0
      blen = len;
493
0
      prev_len = len;
494
0
    }
495
80.7k
    p += len;
496
80.7k
  }
497
13.0k
  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498
13.0k
  rlen += r;
499
13.0k
  return rlen;
500
13.0k
}
501
502
static int
503
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504
0
{
505
0
  if (sn->end <= sn->s)
506
0
    return 0;
507
508
0
  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509
0
}
510
511
static int
512
compile_string_node(Node* node, regex_t* reg)
513
17.0k
{
514
17.0k
  int r, len, prev_len, blen, ambig;
515
17.0k
  OnigEncoding enc = reg->enc;
516
17.0k
  UChar *p, *prev, *end;
517
17.0k
  StrNode* sn;
518
519
17.0k
  sn = NSTR(node);
520
17.0k
  if (sn->end <= sn->s)
521
396
    return 0;
522
523
16.6k
  end = sn->end;
524
16.6k
  ambig = NSTRING_IS_AMBIG(node);
525
526
16.6k
  p = prev = sn->s;
527
16.6k
  prev_len = enclen(enc, p, end);
528
16.6k
  p += prev_len;
529
16.6k
  blen = prev_len;
530
531
145k
  for (; p < end; ) {
532
129k
    len = enclen(enc, p, end);
533
129k
    if (len == prev_len || ambig) {
534
129k
      blen += len;
535
129k
    }
536
0
    else {
537
0
      r = add_compile_string(prev, prev_len, blen, reg, ambig);
538
0
      if (p + len > end) {
539
0
        return 0;
540
0
      }
541
0
      if (r) return r;
542
543
0
      prev  = p;
544
0
      blen  = len;
545
0
      prev_len = len;
546
0
    }
547
548
129k
    p += len;
549
129k
  }
550
16.6k
  return add_compile_string(prev, prev_len, blen, reg, ambig);
551
16.6k
}
552
553
static int
554
compile_string_raw_node(StrNode* sn, regex_t* reg)
555
0
{
556
0
  if (sn->end <= sn->s)
557
0
    return 0;
558
559
0
  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560
0
}
561
562
static int
563
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
564
0
{
565
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566
0
  add_length(reg, mbuf->used);
567
0
  return add_bytes(reg, mbuf->p, mbuf->used);
568
#else
569
  int r, pad_size;
570
  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
571
572
  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573
  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575
576
  r = add_bytes(reg, mbuf->p, mbuf->used);
577
578
  /* padding for return value from compile_length_cclass_node() to be fix. */
579
  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581
  return r;
582
#endif
583
0
}
584
585
static int
586
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
587
13.0k
{
588
13.0k
  int len;
589
590
13.0k
  if (IS_NULL(cc->mbuf)) {
591
13.0k
    len = SIZE_OPCODE + SIZE_BITSET;
592
13.0k
  }
593
0
  else {
594
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595
0
      len = SIZE_OPCODE;
596
0
    }
597
0
    else {
598
0
      len = SIZE_OPCODE + SIZE_BITSET;
599
0
    }
600
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601
0
    len += SIZE_LENGTH + cc->mbuf->used;
602
#else
603
    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604
#endif
605
0
  }
606
607
13.0k
  return len;
608
13.0k
}
609
610
static int
611
compile_cclass_node(CClassNode* cc, regex_t* reg)
612
15.8k
{
613
15.8k
  int r;
614
615
15.8k
  if (IS_NULL(cc->mbuf)) {
616
15.8k
    if (IS_NCCLASS_NOT(cc))
617
4.35k
      add_opcode(reg, OP_CCLASS_NOT);
618
11.4k
    else
619
11.4k
      add_opcode(reg, OP_CCLASS);
620
621
15.8k
    r = add_bitset(reg, cc->bs);
622
15.8k
  }
623
0
  else {
624
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
625
0
      if (IS_NCCLASS_NOT(cc))
626
0
  add_opcode(reg, OP_CCLASS_MB_NOT);
627
0
      else
628
0
  add_opcode(reg, OP_CCLASS_MB);
629
630
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
631
0
    }
632
0
    else {
633
0
      if (IS_NCCLASS_NOT(cc))
634
0
  add_opcode(reg, OP_CCLASS_MIX_NOT);
635
0
      else
636
0
  add_opcode(reg, OP_CCLASS_MIX);
637
638
0
      r = add_bitset(reg, cc->bs);
639
0
      if (r) return r;
640
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
641
0
    }
642
0
  }
643
644
15.8k
  return r;
645
15.8k
}
646
647
static int
648
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
649
0
{
650
0
#define REPEAT_RANGE_ALLOC  4
651
652
0
  OnigRepeatRange* p;
653
654
0
  if (reg->repeat_range_alloc == 0) {
655
0
    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
656
0
    CHECK_NULL_RETURN_MEMERR(p);
657
0
    reg->repeat_range = p;
658
0
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
659
0
  }
660
0
  else if (reg->repeat_range_alloc <= id) {
661
0
    int n;
662
0
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
663
0
    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
664
0
                                    sizeof(OnigRepeatRange) * n);
665
0
    CHECK_NULL_RETURN_MEMERR(p);
666
0
    reg->repeat_range = p;
667
0
    reg->repeat_range_alloc = n;
668
0
  }
669
0
  else {
670
0
    p = reg->repeat_range;
671
0
  }
672
673
0
  p[id].lower = lower;
674
0
  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
675
0
  return 0;
676
0
}
677
678
static int
679
compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
680
                          regex_t* reg)
681
0
{
682
0
  int r;
683
0
  int num_repeat = reg->num_repeat;
684
685
0
  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
686
0
  if (r) return r;
687
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
688
0
  reg->num_repeat++;
689
0
  if (r) return r;
690
0
  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
691
0
  if (r) return r;
692
693
0
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
694
0
  if (r) return r;
695
696
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
697
0
  if (r) return r;
698
699
0
  if (
700
0
#ifdef USE_SUBEXP_CALL
701
0
      reg->num_call > 0 ||
702
0
#endif
703
0
      IS_QUANTIFIER_IN_REPEAT(qn)) {
704
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
705
0
  }
706
0
  else {
707
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
708
0
  }
709
0
  if (r) return r;
710
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
711
0
  return r;
712
0
}
713
714
static int
715
is_anychar_star_quantifier(QtfrNode* qn)
716
6.73k
{
717
6.73k
  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
718
6.73k
      NTYPE(qn->target) == NT_CANY)
719
1.98k
    return 1;
720
4.75k
  else
721
4.75k
    return 0;
722
6.73k
}
723
724
4.35k
#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
725
#define CKN_ON   (ckn > 0)
726
727
#ifdef USE_COMBINATION_EXPLOSION_CHECK
728
729
static int
730
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
731
{
732
  int len, mod_tlen, cklen;
733
  int ckn;
734
  int infinite = IS_REPEAT_INFINITE(qn->upper);
735
  int empty_info = qn->target_empty_info;
736
  int tlen = compile_length_tree(qn->target, reg);
737
738
  if (tlen < 0) return tlen;
739
740
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
741
742
  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
743
744
  /* anychar repeat */
745
  if (NTYPE(qn->target) == NT_CANY) {
746
    if (qn->greedy && infinite) {
747
      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
748
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
749
      else
750
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
751
    }
752
  }
753
754
  if (empty_info != 0)
755
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
756
  else
757
    mod_tlen = tlen;
758
759
  if (infinite && qn->lower <= 1) {
760
    if (qn->greedy) {
761
      if (qn->lower == 1)
762
  len = SIZE_OP_JUMP;
763
      else
764
  len = 0;
765
766
      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
767
    }
768
    else {
769
      if (qn->lower == 0)
770
  len = SIZE_OP_JUMP;
771
      else
772
  len = 0;
773
774
      len += mod_tlen + SIZE_OP_PUSH + cklen;
775
    }
776
  }
777
  else if (qn->upper == 0) {
778
    if (qn->is_referred != 0) /* /(?<n>..){0}/ */
779
      len = SIZE_OP_JUMP + tlen;
780
    else
781
      len = 0;
782
  }
783
  else if (qn->upper == 1 && qn->greedy) {
784
    if (qn->lower == 0) {
785
      if (CKN_ON) {
786
  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
787
      }
788
      else {
789
  len = SIZE_OP_PUSH + tlen;
790
      }
791
    }
792
    else {
793
      len = tlen;
794
    }
795
  }
796
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
797
    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
798
  }
799
  else {
800
    len = SIZE_OP_REPEAT_INC
801
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
802
    if (CKN_ON)
803
      len += SIZE_OP_STATE_CHECK;
804
  }
805
806
  return len;
807
}
808
809
static int
810
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
811
{
812
  int r, mod_tlen;
813
  int ckn;
814
  int infinite = IS_REPEAT_INFINITE(qn->upper);
815
  int empty_info = qn->target_empty_info;
816
  int tlen = compile_length_tree(qn->target, reg);
817
818
  if (tlen < 0) return tlen;
819
820
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
821
822
  if (is_anychar_star_quantifier(qn)) {
823
    r = compile_tree_n_times(qn->target, qn->lower, reg);
824
    if (r) return r;
825
    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
826
      if (IS_MULTILINE(reg->options))
827
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
828
      else
829
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
830
      if (r) return r;
831
      if (CKN_ON) {
832
  r = add_state_check_num(reg, ckn);
833
  if (r) return r;
834
      }
835
836
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
837
    }
838
    else {
839
      if (IS_MULTILINE(reg->options)) {
840
  r = add_opcode(reg, (CKN_ON ?
841
             OP_STATE_CHECK_ANYCHAR_ML_STAR
842
           : OP_ANYCHAR_ML_STAR));
843
      }
844
      else {
845
  r = add_opcode(reg, (CKN_ON ?
846
             OP_STATE_CHECK_ANYCHAR_STAR
847
           : OP_ANYCHAR_STAR));
848
      }
849
      if (r) return r;
850
      if (CKN_ON)
851
  r = add_state_check_num(reg, ckn);
852
853
      return r;
854
    }
855
  }
856
857
  if (empty_info != 0)
858
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
859
  else
860
    mod_tlen = tlen;
861
862
  if (infinite && qn->lower <= 1) {
863
    if (qn->greedy) {
864
      if (qn->lower == 1) {
865
  r = add_opcode_rel_addr(reg, OP_JUMP,
866
      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
867
  if (r) return r;
868
      }
869
870
      if (CKN_ON) {
871
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
872
  if (r) return r;
873
  r = add_state_check_num(reg, ckn);
874
  if (r) return r;
875
  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
876
      }
877
      else {
878
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
879
      }
880
      if (r) return r;
881
      r = compile_tree_empty_check(qn->target, reg, empty_info);
882
      if (r) return r;
883
      r = add_opcode_rel_addr(reg, OP_JUMP,
884
        -(mod_tlen + (int )SIZE_OP_JUMP
885
    + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
886
    }
887
    else {
888
      if (qn->lower == 0) {
889
  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
890
  if (r) return r;
891
      }
892
      r = compile_tree_empty_check(qn->target, reg, empty_info);
893
      if (r) return r;
894
      if (CKN_ON) {
895
  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
896
  if (r) return r;
897
  r = add_state_check_num(reg, ckn);
898
  if (r) return r;
899
  r = add_rel_addr(reg,
900
     -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
901
      }
902
      else
903
  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
904
    }
905
  }
906
  else if (qn->upper == 0) {
907
    if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
908
      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
909
      if (r) return r;
910
      r = compile_tree(qn->target, reg);
911
    }
912
    else
913
      r = 0;
914
  }
915
  else if (qn->upper == 1 && qn->greedy) {
916
    if (qn->lower == 0) {
917
      if (CKN_ON) {
918
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
919
  if (r) return r;
920
  r = add_state_check_num(reg, ckn);
921
  if (r) return r;
922
  r = add_rel_addr(reg, tlen);
923
      }
924
      else {
925
  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
926
      }
927
      if (r) return r;
928
    }
929
930
    r = compile_tree(qn->target, reg);
931
  }
932
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
933
    if (CKN_ON) {
934
      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
935
      if (r) return r;
936
      r = add_state_check_num(reg, ckn);
937
      if (r) return r;
938
      r = add_rel_addr(reg, SIZE_OP_JUMP);
939
    }
940
    else {
941
      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
942
    }
943
944
    if (r) return r;
945
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
946
    if (r) return r;
947
    r = compile_tree(qn->target, reg);
948
  }
949
  else {
950
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
951
    if (CKN_ON) {
952
      if (r) return r;
953
      r = add_opcode(reg, OP_STATE_CHECK);
954
      if (r) return r;
955
      r = add_state_check_num(reg, ckn);
956
    }
957
  }
958
  return r;
959
}
960
961
#else /* USE_COMBINATION_EXPLOSION_CHECK */
962
963
static int
964
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
965
3.16k
{
966
3.16k
  int len, mod_tlen;
967
3.16k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
968
3.16k
  int empty_info = qn->target_empty_info;
969
3.16k
  int tlen = compile_length_tree(qn->target, reg);
970
971
3.16k
  if (tlen < 0) return tlen;
972
973
  /* anychar repeat */
974
3.16k
  if (NTYPE(qn->target) == NT_CANY) {
975
0
    if (qn->greedy && infinite) {
976
0
      if (IS_NOT_NULL(qn->next_head_exact))
977
0
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
978
0
      else
979
0
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
980
0
    }
981
0
  }
982
983
3.16k
  if (empty_info != 0)
984
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
985
3.16k
  else
986
3.16k
    mod_tlen = tlen;
987
988
3.16k
  if (infinite &&
989
3.16k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
990
3.16k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
991
0
      len = SIZE_OP_JUMP;
992
0
    }
993
3.16k
    else {
994
3.16k
      len = tlen * qn->lower;
995
3.16k
    }
996
997
3.16k
    if (qn->greedy) {
998
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
999
      if (IS_NOT_NULL(qn->head_exact))
1000
  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1001
      else
1002
#endif
1003
3.16k
      if (IS_NOT_NULL(qn->next_head_exact))
1004
2.37k
  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1005
792
      else
1006
792
  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1007
3.16k
    }
1008
0
    else
1009
0
      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1010
3.16k
  }
1011
0
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1012
0
    len = SIZE_OP_JUMP + tlen;
1013
0
  }
1014
0
  else if (!infinite && qn->greedy &&
1015
0
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1016
0
                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1017
0
    len = tlen * qn->lower;
1018
0
    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1019
0
  }
1020
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1021
0
    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1022
0
  }
1023
0
  else {
1024
0
    len = SIZE_OP_REPEAT_INC
1025
0
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1026
0
  }
1027
1028
3.16k
  return len;
1029
3.16k
}
1030
1031
static int
1032
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1033
6.73k
{
1034
6.73k
  int i, r, mod_tlen;
1035
6.73k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
1036
6.73k
  int empty_info = qn->target_empty_info;
1037
6.73k
  int tlen = compile_length_tree(qn->target, reg);
1038
1039
6.73k
  if (tlen < 0) return tlen;
1040
1041
6.73k
  if (is_anychar_star_quantifier(qn)) {
1042
1.98k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1043
1.98k
    if (r) return r;
1044
1.98k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1045
792
      if (IS_MULTILINE(reg->options))
1046
0
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1047
792
      else
1048
792
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1049
792
      if (r) return r;
1050
792
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1051
792
    }
1052
1.18k
    else {
1053
1.18k
      if (IS_MULTILINE(reg->options))
1054
0
  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1055
1.18k
      else
1056
1.18k
  return add_opcode(reg, OP_ANYCHAR_STAR);
1057
1.18k
    }
1058
1.98k
  }
1059
1060
4.75k
  if (empty_info != 0)
1061
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1062
4.75k
  else
1063
4.75k
    mod_tlen = tlen;
1064
1065
4.75k
  if (infinite &&
1066
4.75k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1067
4.35k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1068
0
      if (qn->greedy) {
1069
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1070
  if (IS_NOT_NULL(qn->head_exact))
1071
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1072
  else
1073
#endif
1074
0
  if (IS_NOT_NULL(qn->next_head_exact))
1075
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1076
0
  else
1077
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1078
0
      }
1079
0
      else {
1080
0
  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1081
0
      }
1082
0
      if (r) return r;
1083
0
    }
1084
4.35k
    else {
1085
4.35k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1086
4.35k
      if (r) return r;
1087
4.35k
    }
1088
1089
4.35k
    if (qn->greedy) {
1090
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1091
      if (IS_NOT_NULL(qn->head_exact)) {
1092
  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093
           mod_tlen + SIZE_OP_JUMP);
1094
  if (r) return r;
1095
  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097
  if (r) return r;
1098
  r = add_opcode_rel_addr(reg, OP_JUMP,
1099
  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100
      }
1101
      else
1102
#endif
1103
3.96k
      if (IS_NOT_NULL(qn->next_head_exact)) {
1104
0
  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1105
0
        mod_tlen + SIZE_OP_JUMP);
1106
0
  if (r) return r;
1107
0
  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1108
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1109
0
  if (r) return r;
1110
0
  r = add_opcode_rel_addr(reg, OP_JUMP,
1111
0
          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1112
0
      }
1113
3.96k
      else {
1114
3.96k
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1115
3.96k
  if (r) return r;
1116
3.96k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1117
3.96k
  if (r) return r;
1118
3.96k
  r = add_opcode_rel_addr(reg, OP_JUMP,
1119
3.96k
         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1120
3.96k
      }
1121
3.96k
    }
1122
396
    else {
1123
396
      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1124
396
      if (r) return r;
1125
396
      r = compile_tree_empty_check(qn->target, reg, empty_info);
1126
396
      if (r) return r;
1127
396
      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1128
396
    }
1129
4.35k
  }
1130
396
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1131
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1132
0
    if (r) return r;
1133
0
    r = compile_tree(qn->target, reg);
1134
0
  }
1135
396
  else if (!infinite && qn->greedy &&
1136
396
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1137
396
                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1138
396
    int n = qn->upper - qn->lower;
1139
1140
396
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1141
396
    if (r) return r;
1142
1143
792
    for (i = 0; i < n; i++) {
1144
396
      r = add_opcode_rel_addr(reg, OP_PUSH,
1145
396
         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1146
396
      if (r) return r;
1147
396
      r = compile_tree(qn->target, reg);
1148
396
      if (r) return r;
1149
396
    }
1150
396
  }
1151
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1152
0
    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1153
0
    if (r) return r;
1154
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1155
0
    if (r) return r;
1156
0
    r = compile_tree(qn->target, reg);
1157
0
  }
1158
0
  else {
1159
0
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1160
0
  }
1161
4.75k
  return r;
1162
4.75k
}
1163
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164
1165
static int
1166
compile_length_option_node(EncloseNode* node, regex_t* reg)
1167
0
{
1168
0
  int tlen;
1169
0
  OnigOptionType prev = reg->options;
1170
1171
0
  reg->options = node->option;
1172
0
  tlen = compile_length_tree(node->target, reg);
1173
0
  reg->options = prev;
1174
1175
0
  if (tlen < 0) return tlen;
1176
1177
0
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178
0
    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1179
0
           + tlen + SIZE_OP_SET_OPTION;
1180
0
  }
1181
0
  else
1182
0
    return tlen;
1183
0
}
1184
1185
static int
1186
compile_option_node(EncloseNode* node, regex_t* reg)
1187
396
{
1188
396
  int r;
1189
396
  OnigOptionType prev = reg->options;
1190
1191
396
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1192
0
    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1193
0
    if (r) return r;
1194
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1195
0
    if (r) return r;
1196
0
    r = add_opcode(reg, OP_FAIL);
1197
0
    if (r) return r;
1198
0
  }
1199
1200
396
  reg->options = node->option;
1201
396
  r = compile_tree(node->target, reg);
1202
396
  reg->options = prev;
1203
1204
396
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1205
0
    if (r) return r;
1206
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1207
0
  }
1208
396
  return r;
1209
396
}
1210
1211
static int
1212
compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1213
2.37k
{
1214
2.37k
  int len;
1215
2.37k
  int tlen;
1216
1217
2.37k
  if (node->type == ENCLOSE_OPTION)
1218
0
    return compile_length_option_node(node, reg);
1219
1220
2.37k
  if (node->target) {
1221
2.37k
    tlen = compile_length_tree(node->target, reg);
1222
2.37k
    if (tlen < 0) return tlen;
1223
2.37k
  }
1224
0
  else
1225
0
    tlen = 0;
1226
1227
2.37k
  switch (node->type) {
1228
0
  case ENCLOSE_MEMORY:
1229
0
#ifdef USE_SUBEXP_CALL
1230
0
    if (IS_ENCLOSE_CALLED(node)) {
1231
0
      len = SIZE_OP_MEMORY_START_PUSH + tlen
1232
0
    + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1233
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1234
0
  len += (IS_ENCLOSE_RECURSION(node)
1235
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1236
0
      else
1237
0
  len += (IS_ENCLOSE_RECURSION(node)
1238
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1239
0
    }
1240
0
    else if (IS_ENCLOSE_RECURSION(node)) {
1241
0
      len = SIZE_OP_MEMORY_START_PUSH;
1242
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1243
0
         ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1244
0
    }
1245
0
    else
1246
0
#endif
1247
0
    {
1248
0
      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1249
0
  len = SIZE_OP_MEMORY_START_PUSH;
1250
0
      else
1251
0
  len = SIZE_OP_MEMORY_START;
1252
1253
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1254
0
         ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1255
0
    }
1256
0
    break;
1257
1258
2.37k
  case ENCLOSE_STOP_BACKTRACK:
1259
2.37k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1260
2.37k
      QtfrNode* qn = NQTFR(node->target);
1261
2.37k
      tlen = compile_length_tree(qn->target, reg);
1262
2.37k
      if (tlen < 0) return tlen;
1263
1264
2.37k
      len = tlen * qn->lower
1265
2.37k
    + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1266
2.37k
    }
1267
0
    else {
1268
0
      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1269
0
    }
1270
2.37k
    break;
1271
1272
2.37k
  case ENCLOSE_CONDITION:
1273
0
    len = SIZE_OP_CONDITION;
1274
0
    if (NTYPE(node->target) == NT_ALT) {
1275
0
      Node* x = node->target;
1276
1277
0
      tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1278
0
      if (tlen < 0) return tlen;
1279
0
      len += tlen + SIZE_OP_JUMP;
1280
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1281
0
      x = NCDR(x);
1282
0
      tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1283
0
      if (tlen < 0) return tlen;
1284
0
      len += tlen;
1285
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1286
0
    }
1287
0
    else {
1288
0
      return ONIGERR_PARSER_BUG;
1289
0
    }
1290
0
    break;
1291
1292
0
  case ENCLOSE_ABSENT:
1293
0
    len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1294
0
    break;
1295
1296
0
  default:
1297
0
    return ONIGERR_TYPE_BUG;
1298
0
    break;
1299
2.37k
  }
1300
1301
2.37k
  return len;
1302
2.37k
}
1303
1304
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1305
1306
static int
1307
compile_enclose_node(EncloseNode* node, regex_t* reg)
1308
7.52k
{
1309
7.52k
  int r, len;
1310
1311
7.52k
  if (node->type == ENCLOSE_OPTION)
1312
396
    return compile_option_node(node, reg);
1313
1314
7.12k
  switch (node->type) {
1315
1.98k
  case ENCLOSE_MEMORY:
1316
1.98k
#ifdef USE_SUBEXP_CALL
1317
1.98k
    if (IS_ENCLOSE_CALLED(node)) {
1318
0
      r = add_opcode(reg, OP_CALL);
1319
0
      if (r) return r;
1320
0
      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1321
0
      node->state |= NST_ADDR_FIXED;
1322
0
      r = add_abs_addr(reg, (int )node->call_addr);
1323
0
      if (r) return r;
1324
0
      len = compile_length_tree(node->target, reg);
1325
0
      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1326
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1327
0
  len += (IS_ENCLOSE_RECURSION(node)
1328
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1329
0
      else
1330
0
  len += (IS_ENCLOSE_RECURSION(node)
1331
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1332
1333
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1334
0
      if (r) return r;
1335
0
    }
1336
1.98k
#endif
1337
1.98k
    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1338
0
      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1339
1.98k
    else
1340
1.98k
      r = add_opcode(reg, OP_MEMORY_START);
1341
1.98k
    if (r) return r;
1342
1.98k
    r = add_mem_num(reg, node->regnum);
1343
1.98k
    if (r) return r;
1344
1.98k
    r = compile_tree(node->target, reg);
1345
1.98k
    if (r) return r;
1346
1.98k
#ifdef USE_SUBEXP_CALL
1347
1.98k
    if (IS_ENCLOSE_CALLED(node)) {
1348
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1349
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350
0
           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1351
0
      else
1352
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1353
0
           ? OP_MEMORY_END_REC : OP_MEMORY_END));
1354
1355
0
      if (r) return r;
1356
0
      r = add_mem_num(reg, node->regnum);
1357
0
      if (r) return r;
1358
0
      r = add_opcode(reg, OP_RETURN);
1359
0
    }
1360
1.98k
    else if (IS_ENCLOSE_RECURSION(node)) {
1361
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1362
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1363
0
      else
1364
0
  r = add_opcode(reg, OP_MEMORY_END_REC);
1365
0
      if (r) return r;
1366
0
      r = add_mem_num(reg, node->regnum);
1367
0
    }
1368
1.98k
    else
1369
1.98k
#endif
1370
1.98k
    {
1371
1.98k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1372
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1373
1.98k
      else
1374
1.98k
  r = add_opcode(reg, OP_MEMORY_END);
1375
1.98k
      if (r) return r;
1376
1.98k
      r = add_mem_num(reg, node->regnum);
1377
1.98k
    }
1378
1.98k
    break;
1379
1380
5.14k
  case ENCLOSE_STOP_BACKTRACK:
1381
5.14k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1382
5.14k
      QtfrNode* qn = NQTFR(node->target);
1383
5.14k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1384
5.14k
      if (r) return r;
1385
1386
5.14k
      len = compile_length_tree(qn->target, reg);
1387
5.14k
      if (len < 0) return len;
1388
1389
5.14k
      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1390
5.14k
      if (r) return r;
1391
5.14k
      r = compile_tree(qn->target, reg);
1392
5.14k
      if (r) return r;
1393
5.14k
      r = add_opcode(reg, OP_POP);
1394
5.14k
      if (r) return r;
1395
5.14k
      r = add_opcode_rel_addr(reg, OP_JUMP,
1396
5.14k
   -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1397
5.14k
    }
1398
0
    else {
1399
0
      r = add_opcode(reg, OP_PUSH_STOP_BT);
1400
0
      if (r) return r;
1401
0
      r = compile_tree(node->target, reg);
1402
0
      if (r) return r;
1403
0
      r = add_opcode(reg, OP_POP_STOP_BT);
1404
0
    }
1405
5.14k
    break;
1406
1407
5.14k
  case ENCLOSE_CONDITION:
1408
0
    r = add_opcode(reg, OP_CONDITION);
1409
0
    if (r) return r;
1410
0
    r = add_mem_num(reg, node->regnum);
1411
0
    if (r) return r;
1412
1413
0
    if (NTYPE(node->target) == NT_ALT) {
1414
0
      Node* x = node->target;
1415
0
      int len2;
1416
1417
0
      len = compile_length_tree(NCAR(x), reg);  /* yes-node */
1418
0
      if (len < 0) return len;
1419
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1420
0
      x = NCDR(x);
1421
0
      len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1422
0
      if (len2 < 0) return len2;
1423
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1424
1425
0
      x = node->target;
1426
0
      r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1427
0
      if (r) return r;
1428
0
      r = compile_tree(NCAR(x), reg);   /* yes-node */
1429
0
      if (r) return r;
1430
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1431
0
      if (r) return r;
1432
0
      x = NCDR(x);
1433
0
      r = compile_tree(NCAR(x), reg);   /* no-node */
1434
0
    }
1435
0
    else {
1436
0
      return ONIGERR_PARSER_BUG;
1437
0
    }
1438
0
    break;
1439
1440
0
  case ENCLOSE_ABSENT:
1441
0
    len = compile_length_tree(node->target, reg);
1442
0
    if (len < 0) return len;
1443
1444
0
    r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1445
0
    if (r) return r;
1446
0
    r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1447
0
    if (r) return r;
1448
0
    r = compile_tree(node->target, reg);
1449
0
    if (r) return r;
1450
0
    r = add_opcode(reg, OP_ABSENT_END);
1451
0
    break;
1452
1453
0
  default:
1454
0
    return ONIGERR_TYPE_BUG;
1455
0
    break;
1456
7.12k
  }
1457
1458
7.12k
  return r;
1459
7.12k
}
1460
1461
static int
1462
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1463
1.58k
{
1464
1.58k
  int len;
1465
1.58k
  int tlen = 0;
1466
1467
1.58k
  if (node->target) {
1468
0
    tlen = compile_length_tree(node->target, reg);
1469
0
    if (tlen < 0) return tlen;
1470
0
  }
1471
1472
1.58k
  switch (node->type) {
1473
0
  case ANCHOR_PREC_READ:
1474
0
    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1475
0
    break;
1476
0
  case ANCHOR_PREC_READ_NOT:
1477
0
    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1478
0
    break;
1479
0
  case ANCHOR_LOOK_BEHIND:
1480
0
    len = SIZE_OP_LOOK_BEHIND + tlen;
1481
0
    break;
1482
0
  case ANCHOR_LOOK_BEHIND_NOT:
1483
0
    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1484
0
    break;
1485
1486
1.58k
  default:
1487
1.58k
    len = SIZE_OPCODE;
1488
1.58k
    break;
1489
1.58k
  }
1490
1491
1.58k
  return len;
1492
1.58k
}
1493
1494
static int
1495
compile_anchor_node(AnchorNode* node, regex_t* reg)
1496
11.4k
{
1497
11.4k
  int r, len;
1498
1499
11.4k
  switch (node->type) {
1500
0
  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1501
0
  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1502
7.92k
  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1503
3.16k
  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1504
0
  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1505
0
  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1506
1507
396
  case ANCHOR_WORD_BOUND:
1508
396
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1509
396
    else                      r = add_opcode(reg, OP_WORD_BOUND);
1510
396
    break;
1511
0
  case ANCHOR_NOT_WORD_BOUND:
1512
0
    if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1513
0
    else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
1514
0
    break;
1515
0
#ifdef USE_WORD_BEGIN_END
1516
0
  case ANCHOR_WORD_BEGIN:
1517
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1518
0
    else                      r = add_opcode(reg, OP_WORD_BEGIN);
1519
0
    break;
1520
0
  case ANCHOR_WORD_END:
1521
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
1522
0
    else                      r = add_opcode(reg, OP_WORD_END);
1523
0
    break;
1524
0
#endif
1525
0
  case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
1526
1527
0
  case ANCHOR_PREC_READ:
1528
0
    r = add_opcode(reg, OP_PUSH_POS);
1529
0
    if (r) return r;
1530
0
    r = compile_tree(node->target, reg);
1531
0
    if (r) return r;
1532
0
    r = add_opcode(reg, OP_POP_POS);
1533
0
    break;
1534
1535
0
  case ANCHOR_PREC_READ_NOT:
1536
0
    len = compile_length_tree(node->target, reg);
1537
0
    if (len < 0) return len;
1538
0
    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1539
0
    if (r) return r;
1540
0
    r = compile_tree(node->target, reg);
1541
0
    if (r) return r;
1542
0
    r = add_opcode(reg, OP_FAIL_POS);
1543
0
    break;
1544
1545
0
  case ANCHOR_LOOK_BEHIND:
1546
0
    {
1547
0
      int n;
1548
0
      r = add_opcode(reg, OP_LOOK_BEHIND);
1549
0
      if (r) return r;
1550
0
      if (node->char_len < 0) {
1551
0
  r = get_char_length_tree(node->target, reg, &n);
1552
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1553
0
      }
1554
0
      else
1555
0
  n = node->char_len;
1556
0
      r = add_length(reg, n);
1557
0
      if (r) return r;
1558
0
      r = compile_tree(node->target, reg);
1559
0
    }
1560
0
    break;
1561
1562
0
  case ANCHOR_LOOK_BEHIND_NOT:
1563
0
    {
1564
0
      int n;
1565
0
      len = compile_length_tree(node->target, reg);
1566
0
      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1567
0
         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1568
0
      if (r) return r;
1569
0
      if (node->char_len < 0) {
1570
0
  r = get_char_length_tree(node->target, reg, &n);
1571
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1572
0
      }
1573
0
      else
1574
0
  n = node->char_len;
1575
0
      r = add_length(reg, n);
1576
0
      if (r) return r;
1577
0
      r = compile_tree(node->target, reg);
1578
0
      if (r) return r;
1579
0
      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1580
0
    }
1581
0
    break;
1582
1583
0
  default:
1584
0
    return ONIGERR_TYPE_BUG;
1585
0
    break;
1586
11.4k
  }
1587
1588
11.4k
  return r;
1589
11.4k
}
1590
1591
static int
1592
compile_length_tree(Node* node, regex_t* reg)
1593
38.8k
{
1594
38.8k
  int len, type, r;
1595
1596
38.8k
  type = NTYPE(node);
1597
38.8k
  switch (type) {
1598
3.16k
  case NT_LIST:
1599
3.16k
    len = 0;
1600
7.92k
    do {
1601
7.92k
      r = compile_length_tree(NCAR(node), reg);
1602
7.92k
      if (r < 0) return r;
1603
7.92k
      len += r;
1604
7.92k
    } while (IS_NOT_NULL(node = NCDR(node)));
1605
3.16k
    r = len;
1606
3.16k
    break;
1607
1608
0
  case NT_ALT:
1609
0
    {
1610
0
      int n = 0;
1611
0
      len = 0;
1612
0
      do {
1613
0
  r = compile_length_tree(NCAR(node), reg);
1614
0
  if (r < 0) return r;
1615
0
  len += r;
1616
0
  n++;
1617
0
      } while (IS_NOT_NULL(node = NCDR(node)));
1618
0
      r = len;
1619
0
      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1620
0
    }
1621
0
    break;
1622
1623
13.0k
  case NT_STR:
1624
13.0k
    if (NSTRING_IS_RAW(node))
1625
0
      r = compile_length_string_raw_node(NSTR(node), reg);
1626
13.0k
    else
1627
13.0k
      r = compile_length_string_node(node, reg);
1628
13.0k
    break;
1629
1630
13.0k
  case NT_CCLASS:
1631
13.0k
    r = compile_length_cclass_node(NCCLASS(node), reg);
1632
13.0k
    break;
1633
1634
0
  case NT_CTYPE:
1635
2.37k
  case NT_CANY:
1636
2.37k
    r = SIZE_OPCODE;
1637
2.37k
    break;
1638
1639
0
  case NT_BREF:
1640
0
    {
1641
0
      BRefNode* br = NBREF(node);
1642
1643
0
#ifdef USE_BACKREF_WITH_LEVEL
1644
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1645
0
  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1646
0
            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1647
0
      }
1648
0
      else
1649
0
#endif
1650
0
      if (br->back_num == 1) {
1651
0
  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1652
0
       ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1653
0
      }
1654
0
      else {
1655
0
  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1656
0
      }
1657
0
    }
1658
0
    break;
1659
1660
0
#ifdef USE_SUBEXP_CALL
1661
0
  case NT_CALL:
1662
0
    r = SIZE_OP_CALL;
1663
0
    break;
1664
0
#endif
1665
1666
3.16k
  case NT_QTFR:
1667
3.16k
    r = compile_length_quantifier_node(NQTFR(node), reg);
1668
3.16k
    break;
1669
1670
2.37k
  case NT_ENCLOSE:
1671
2.37k
    r = compile_length_enclose_node(NENCLOSE(node), reg);
1672
2.37k
    break;
1673
1674
1.58k
  case NT_ANCHOR:
1675
1.58k
    r = compile_length_anchor_node(NANCHOR(node), reg);
1676
1.58k
    break;
1677
1678
0
  default:
1679
0
    return ONIGERR_TYPE_BUG;
1680
0
    break;
1681
38.8k
  }
1682
1683
38.8k
  return r;
1684
38.8k
}
1685
1686
static int
1687
compile_tree(Node* node, regex_t* reg)
1688
73.2k
{
1689
73.2k
  int n, type, len, pos, r = 0;
1690
1691
73.2k
  type = NTYPE(node);
1692
73.2k
  switch (type) {
1693
9.50k
  case NT_LIST:
1694
39.2k
    do {
1695
39.2k
      r = compile_tree(NCAR(node), reg);
1696
39.2k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1697
9.50k
    break;
1698
1699
2.37k
  case NT_ALT:
1700
2.37k
    {
1701
2.37k
      Node* x = node;
1702
2.37k
      len = 0;
1703
5.54k
      do {
1704
5.54k
  len += compile_length_tree(NCAR(x), reg);
1705
5.54k
  if (NCDR(x) != NULL) {
1706
3.16k
    len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1707
3.16k
  }
1708
5.54k
      } while (IS_NOT_NULL(x = NCDR(x)));
1709
2.37k
      pos = reg->used + len;  /* goal position */
1710
1711
5.54k
      do {
1712
5.54k
  len = compile_length_tree(NCAR(node), reg);
1713
5.54k
  if (IS_NOT_NULL(NCDR(node))) {
1714
3.16k
    r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1715
3.16k
    if (r) break;
1716
3.16k
  }
1717
5.54k
  r = compile_tree(NCAR(node), reg);
1718
5.54k
  if (r) break;
1719
5.54k
  if (IS_NOT_NULL(NCDR(node))) {
1720
3.16k
    len = pos - (reg->used + SIZE_OP_JUMP);
1721
3.16k
    r = add_opcode_rel_addr(reg, OP_JUMP, len);
1722
3.16k
    if (r) break;
1723
3.16k
  }
1724
5.54k
      } while (IS_NOT_NULL(node = NCDR(node)));
1725
2.37k
    }
1726
0
    break;
1727
1728
17.0k
  case NT_STR:
1729
17.0k
    if (NSTRING_IS_RAW(node))
1730
0
      r = compile_string_raw_node(NSTR(node), reg);
1731
17.0k
    else
1732
17.0k
      r = compile_string_node(node, reg);
1733
17.0k
    break;
1734
1735
15.8k
  case NT_CCLASS:
1736
15.8k
    r = compile_cclass_node(NCCLASS(node), reg);
1737
15.8k
    break;
1738
1739
0
  case NT_CTYPE:
1740
0
    {
1741
0
      int op;
1742
1743
0
      switch (NCTYPE(node)->ctype) {
1744
0
      case ONIGENC_CTYPE_WORD:
1745
0
  if (NCTYPE(node)->ascii_range != 0) {
1746
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
1747
0
    else                         op = OP_ASCII_WORD;
1748
0
  }
1749
0
  else {
1750
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1751
0
    else                         op = OP_WORD;
1752
0
  }
1753
0
  break;
1754
0
      default:
1755
0
  return ONIGERR_TYPE_BUG;
1756
0
  break;
1757
0
      }
1758
0
      r = add_opcode(reg, op);
1759
0
    }
1760
0
    break;
1761
1762
2.77k
  case NT_CANY:
1763
2.77k
    if (IS_MULTILINE(reg->options))
1764
0
      r = add_opcode(reg, OP_ANYCHAR_ML);
1765
2.77k
    else
1766
2.77k
      r = add_opcode(reg, OP_ANYCHAR);
1767
2.77k
    break;
1768
1769
0
  case NT_BREF:
1770
0
    {
1771
0
      BRefNode* br = NBREF(node);
1772
1773
0
#ifdef USE_BACKREF_WITH_LEVEL
1774
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1775
0
  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1776
0
  if (r) return r;
1777
0
  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1778
0
  if (r) return r;
1779
0
  r = add_length(reg, br->nest_level);
1780
0
  if (r) return r;
1781
1782
0
  goto add_bacref_mems;
1783
0
      }
1784
0
      else
1785
0
#endif
1786
0
      if (br->back_num == 1) {
1787
0
  n = br->back_static[0];
1788
0
  if (IS_IGNORECASE(reg->options)) {
1789
0
    r = add_opcode(reg, OP_BACKREFN_IC);
1790
0
    if (r) return r;
1791
0
    r = add_mem_num(reg, n);
1792
0
  }
1793
0
  else {
1794
0
    switch (n) {
1795
0
    case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1796
0
    case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1797
0
    default:
1798
0
      r = add_opcode(reg, OP_BACKREFN);
1799
0
      if (r) return r;
1800
0
      r = add_mem_num(reg, n);
1801
0
      break;
1802
0
    }
1803
0
  }
1804
0
      }
1805
0
      else {
1806
0
  int i;
1807
0
  int* p;
1808
1809
0
  if (IS_IGNORECASE(reg->options)) {
1810
0
    r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1811
0
  }
1812
0
  else {
1813
0
    r = add_opcode(reg, OP_BACKREF_MULTI);
1814
0
  }
1815
0
  if (r) return r;
1816
1817
0
#ifdef USE_BACKREF_WITH_LEVEL
1818
0
      add_bacref_mems:
1819
0
#endif
1820
0
  r = add_length(reg, br->back_num);
1821
0
  if (r) return r;
1822
0
  p = BACKREFS_P(br);
1823
0
  for (i = br->back_num - 1; i >= 0; i--) {
1824
0
    r = add_mem_num(reg, p[i]);
1825
0
    if (r) return r;
1826
0
  }
1827
0
      }
1828
0
    }
1829
0
    break;
1830
1831
0
#ifdef USE_SUBEXP_CALL
1832
0
  case NT_CALL:
1833
0
    r = compile_call(NCALL(node), reg);
1834
0
    break;
1835
0
#endif
1836
1837
6.73k
  case NT_QTFR:
1838
6.73k
    r = compile_quantifier_node(NQTFR(node), reg);
1839
6.73k
    break;
1840
1841
7.52k
  case NT_ENCLOSE:
1842
7.52k
    r = compile_enclose_node(NENCLOSE(node), reg);
1843
7.52k
    break;
1844
1845
11.4k
  case NT_ANCHOR:
1846
11.4k
    r = compile_anchor_node(NANCHOR(node), reg);
1847
11.4k
    break;
1848
1849
0
  default:
1850
#ifdef ONIG_DEBUG
1851
    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1852
#endif
1853
0
    break;
1854
73.2k
  }
1855
1856
73.2k
  return r;
1857
73.2k
}
1858
1859
#ifdef USE_NAMED_GROUP
1860
1861
static int
1862
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1863
0
{
1864
0
  int r = 0;
1865
0
  Node* node = *plink;
1866
1867
0
  switch (NTYPE(node)) {
1868
0
  case NT_LIST:
1869
0
  case NT_ALT:
1870
0
    do {
1871
0
      r = noname_disable_map(&(NCAR(node)), map, counter);
1872
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873
0
    break;
1874
1875
0
  case NT_QTFR:
1876
0
    {
1877
0
      Node** ptarget = &(NQTFR(node)->target);
1878
0
      Node*  old = *ptarget;
1879
0
      r = noname_disable_map(ptarget, map, counter);
1880
0
      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1881
0
  onig_reduce_nested_quantifier(node, *ptarget);
1882
0
      }
1883
0
    }
1884
0
    break;
1885
1886
0
  case NT_ENCLOSE:
1887
0
    {
1888
0
      EncloseNode* en = NENCLOSE(node);
1889
0
      if (en->type == ENCLOSE_MEMORY) {
1890
0
  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1891
0
    (*counter)++;
1892
0
    map[en->regnum].new_val = *counter;
1893
0
    en->regnum = *counter;
1894
0
  }
1895
0
  else if (en->regnum != 0) {
1896
0
    *plink = en->target;
1897
0
    en->target = NULL_NODE;
1898
0
    onig_node_free(node);
1899
0
    r = noname_disable_map(plink, map, counter);
1900
0
    break;
1901
0
  }
1902
0
      }
1903
0
      r = noname_disable_map(&(en->target), map, counter);
1904
0
    }
1905
0
    break;
1906
1907
0
  case NT_ANCHOR:
1908
0
    if (NANCHOR(node)->target)
1909
0
      r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1910
0
    break;
1911
1912
0
  default:
1913
0
    break;
1914
0
  }
1915
1916
0
  return r;
1917
0
}
1918
1919
static int
1920
renumber_node_backref(Node* node, GroupNumRemap* map)
1921
0
{
1922
0
  int i, pos, n, old_num;
1923
0
  int *backs;
1924
0
  BRefNode* bn = NBREF(node);
1925
1926
0
  if (! IS_BACKREF_NAME_REF(bn))
1927
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1928
1929
0
  old_num = bn->back_num;
1930
0
  if (IS_NULL(bn->back_dynamic))
1931
0
    backs = bn->back_static;
1932
0
  else
1933
0
    backs = bn->back_dynamic;
1934
1935
0
  for (i = 0, pos = 0; i < old_num; i++) {
1936
0
    n = map[backs[i]].new_val;
1937
0
    if (n > 0) {
1938
0
      backs[pos] = n;
1939
0
      pos++;
1940
0
    }
1941
0
  }
1942
1943
0
  bn->back_num = pos;
1944
0
  return 0;
1945
0
}
1946
1947
static int
1948
renumber_by_map(Node* node, GroupNumRemap* map)
1949
0
{
1950
0
  int r = 0;
1951
1952
0
  switch (NTYPE(node)) {
1953
0
  case NT_LIST:
1954
0
  case NT_ALT:
1955
0
    do {
1956
0
      r = renumber_by_map(NCAR(node), map);
1957
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1958
0
    break;
1959
0
  case NT_QTFR:
1960
0
    r = renumber_by_map(NQTFR(node)->target, map);
1961
0
    break;
1962
0
  case NT_ENCLOSE:
1963
0
    {
1964
0
      EncloseNode* en = NENCLOSE(node);
1965
0
      if (en->type == ENCLOSE_CONDITION)
1966
0
  en->regnum = map[en->regnum].new_val;
1967
0
      r = renumber_by_map(en->target, map);
1968
0
    }
1969
0
    break;
1970
1971
0
  case NT_BREF:
1972
0
    r = renumber_node_backref(node, map);
1973
0
    break;
1974
1975
0
  case NT_ANCHOR:
1976
0
    if (NANCHOR(node)->target)
1977
0
      r = renumber_by_map(NANCHOR(node)->target, map);
1978
0
    break;
1979
1980
0
  default:
1981
0
    break;
1982
0
  }
1983
1984
0
  return r;
1985
0
}
1986
1987
static int
1988
numbered_ref_check(Node* node)
1989
7.92k
{
1990
7.92k
  int r = 0;
1991
1992
7.92k
  switch (NTYPE(node)) {
1993
396
  case NT_LIST:
1994
1.18k
  case NT_ALT:
1995
5.14k
    do {
1996
5.14k
      r = numbered_ref_check(NCAR(node));
1997
5.14k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998
1.18k
    break;
1999
792
  case NT_QTFR:
2000
792
    r = numbered_ref_check(NQTFR(node)->target);
2001
792
    break;
2002
1.58k
  case NT_ENCLOSE:
2003
1.58k
    r = numbered_ref_check(NENCLOSE(node)->target);
2004
1.58k
    break;
2005
2006
0
  case NT_BREF:
2007
0
    if (! IS_BACKREF_NAME_REF(NBREF(node)))
2008
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2009
0
    break;
2010
2011
792
  case NT_ANCHOR:
2012
792
    if (NANCHOR(node)->target)
2013
0
      r = numbered_ref_check(NANCHOR(node)->target);
2014
792
    break;
2015
2016
3.56k
  default:
2017
3.56k
    break;
2018
7.92k
  }
2019
2020
7.92k
  return r;
2021
7.92k
}
2022
2023
static int
2024
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025
0
{
2026
0
  int r, i, pos, counter;
2027
0
  BitStatusType loc;
2028
0
  GroupNumRemap* map;
2029
2030
0
  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2031
0
  CHECK_NULL_RETURN_MEMERR(map);
2032
0
  for (i = 1; i <= env->num_mem; i++) {
2033
0
    map[i].new_val = 0;
2034
0
  }
2035
0
  counter = 0;
2036
0
  r = noname_disable_map(root, map, &counter);
2037
0
  if (r != 0) return r;
2038
2039
0
  r = renumber_by_map(*root, map);
2040
0
  if (r != 0) return r;
2041
2042
0
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043
0
    if (map[i].new_val > 0) {
2044
0
      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2045
0
      pos++;
2046
0
    }
2047
0
  }
2048
2049
0
  loc = env->capture_history;
2050
0
  BIT_STATUS_CLEAR(env->capture_history);
2051
0
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052
0
    if (BIT_STATUS_AT(loc, i)) {
2053
0
      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054
0
    }
2055
0
  }
2056
2057
0
  env->num_mem = env->num_named;
2058
0
  reg->num_mem = env->num_named;
2059
2060
0
  return onig_renumber_name_table(reg, map);
2061
0
}
2062
#endif /* USE_NAMED_GROUP */
2063
2064
#ifdef USE_SUBEXP_CALL
2065
static int
2066
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067
0
{
2068
0
  int i, offset;
2069
0
  EncloseNode* en;
2070
0
  AbsAddrType addr;
2071
2072
0
  for (i = 0; i < uslist->num; i++) {
2073
0
    en = NENCLOSE(uslist->us[i].target);
2074
0
    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075
0
    addr = en->call_addr;
2076
0
    offset = uslist->us[i].offset;
2077
2078
0
    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079
0
  }
2080
0
  return 0;
2081
0
}
2082
#endif
2083
2084
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085
static int
2086
quantifiers_memory_node_info(Node* node)
2087
0
{
2088
0
  int r = 0;
2089
2090
0
  switch (NTYPE(node)) {
2091
0
  case NT_LIST:
2092
0
  case NT_ALT:
2093
0
    {
2094
0
      int v;
2095
0
      do {
2096
0
  v = quantifiers_memory_node_info(NCAR(node));
2097
0
  if (v > r) r = v;
2098
0
      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099
0
    }
2100
0
    break;
2101
2102
0
# ifdef USE_SUBEXP_CALL
2103
0
  case NT_CALL:
2104
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2105
0
      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106
0
    }
2107
0
    else
2108
0
      r = quantifiers_memory_node_info(NCALL(node)->target);
2109
0
    break;
2110
0
# endif
2111
2112
0
  case NT_QTFR:
2113
0
    {
2114
0
      QtfrNode* qn = NQTFR(node);
2115
0
      if (qn->upper != 0) {
2116
0
  r = quantifiers_memory_node_info(qn->target);
2117
0
      }
2118
0
    }
2119
0
    break;
2120
2121
0
  case NT_ENCLOSE:
2122
0
    {
2123
0
      EncloseNode* en = NENCLOSE(node);
2124
0
      switch (en->type) {
2125
0
      case ENCLOSE_MEMORY:
2126
0
  return NQ_TARGET_IS_EMPTY_MEM;
2127
0
  break;
2128
2129
0
      case ENCLOSE_OPTION:
2130
0
      case ENCLOSE_STOP_BACKTRACK:
2131
0
      case ENCLOSE_CONDITION:
2132
0
      case ENCLOSE_ABSENT:
2133
0
  r = quantifiers_memory_node_info(en->target);
2134
0
  break;
2135
0
      default:
2136
0
  break;
2137
0
      }
2138
0
    }
2139
0
    break;
2140
2141
0
  case NT_BREF:
2142
0
  case NT_STR:
2143
0
  case NT_CTYPE:
2144
0
  case NT_CCLASS:
2145
0
  case NT_CANY:
2146
0
  case NT_ANCHOR:
2147
0
  default:
2148
0
    break;
2149
0
  }
2150
2151
0
  return r;
2152
0
}
2153
#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155
static int
2156
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157
14.2k
{
2158
14.2k
  OnigDistance tmin;
2159
14.2k
  int r = 0;
2160
2161
14.2k
  *min = 0;
2162
14.2k
  switch (NTYPE(node)) {
2163
0
  case NT_BREF:
2164
0
    {
2165
0
      int i;
2166
0
      int* backs;
2167
0
      Node** nodes = SCANENV_MEM_NODES(env);
2168
0
      BRefNode* br = NBREF(node);
2169
0
      if (br->state & NST_RECURSION) break;
2170
2171
0
      backs = BACKREFS_P(br);
2172
0
      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2173
0
      r = get_min_match_length(nodes[backs[0]], min, env);
2174
0
      if (r != 0) break;
2175
0
      for (i = 1; i < br->back_num; i++) {
2176
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2177
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178
0
  if (r != 0) break;
2179
0
  if (*min > tmin) *min = tmin;
2180
0
      }
2181
0
    }
2182
0
    break;
2183
2184
0
#ifdef USE_SUBEXP_CALL
2185
0
  case NT_CALL:
2186
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2187
0
      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188
0
      if (IS_ENCLOSE_MIN_FIXED(en))
2189
0
  *min = en->min_len;
2190
0
    }
2191
0
    else
2192
0
      r = get_min_match_length(NCALL(node)->target, min, env);
2193
0
    break;
2194
0
#endif
2195
2196
792
  case NT_LIST:
2197
1.58k
    do {
2198
1.58k
      r = get_min_match_length(NCAR(node), &tmin, env);
2199
1.58k
      if (r == 0) *min += tmin;
2200
1.58k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201
792
    break;
2202
2203
0
  case NT_ALT:
2204
0
    {
2205
0
      Node *x, *y;
2206
0
      y = node;
2207
0
      do {
2208
0
  x = NCAR(y);
2209
0
  r = get_min_match_length(x, &tmin, env);
2210
0
  if (r != 0) break;
2211
0
  if (y == node) *min = tmin;
2212
0
  else if (*min > tmin) *min = tmin;
2213
0
      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214
0
    }
2215
0
    break;
2216
2217
1.18k
  case NT_STR:
2218
1.18k
    {
2219
1.18k
      StrNode* sn = NSTR(node);
2220
1.18k
      *min = sn->end - sn->s;
2221
1.18k
    }
2222
1.18k
    break;
2223
2224
0
  case NT_CTYPE:
2225
0
    *min = 1;
2226
0
    break;
2227
2228
9.10k
  case NT_CCLASS:
2229
11.4k
  case NT_CANY:
2230
11.4k
    *min = 1;
2231
11.4k
    break;
2232
2233
792
  case NT_QTFR:
2234
792
    {
2235
792
      QtfrNode* qn = NQTFR(node);
2236
2237
792
      if (qn->lower > 0) {
2238
792
  r = get_min_match_length(qn->target, min, env);
2239
792
  if (r == 0)
2240
792
    *min = distance_multiply(*min, qn->lower);
2241
792
      }
2242
792
    }
2243
792
    break;
2244
2245
0
  case NT_ENCLOSE:
2246
0
    {
2247
0
      EncloseNode* en = NENCLOSE(node);
2248
0
      switch (en->type) {
2249
0
      case ENCLOSE_MEMORY:
2250
0
        if (IS_ENCLOSE_MIN_FIXED(en))
2251
0
          *min = en->min_len;
2252
0
        else {
2253
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254
0
      *min = 0;  /* recursive */
2255
0
    else {
2256
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2257
0
      r = get_min_match_length(en->target, min, env);
2258
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2259
0
      if (r == 0) {
2260
0
        en->min_len = *min;
2261
0
        SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2262
0
      }
2263
0
    }
2264
0
        }
2265
0
        break;
2266
2267
0
      case ENCLOSE_OPTION:
2268
0
      case ENCLOSE_STOP_BACKTRACK:
2269
0
      case ENCLOSE_CONDITION:
2270
0
  r = get_min_match_length(en->target, min, env);
2271
0
  break;
2272
2273
0
      case ENCLOSE_ABSENT:
2274
0
  break;
2275
0
      }
2276
0
    }
2277
0
    break;
2278
2279
0
  case NT_ANCHOR:
2280
0
  default:
2281
0
    break;
2282
14.2k
  }
2283
2284
14.2k
  return r;
2285
14.2k
}
2286
2287
static int
2288
get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289
0
{
2290
0
  OnigDistance tmax;
2291
0
  int r = 0;
2292
2293
0
  *max = 0;
2294
0
  switch (NTYPE(node)) {
2295
0
  case NT_LIST:
2296
0
    do {
2297
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2298
0
      if (r == 0)
2299
0
  *max = distance_add(*max, tmax);
2300
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301
0
    break;
2302
2303
0
  case NT_ALT:
2304
0
    do {
2305
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2306
0
      if (r == 0 && *max < tmax) *max = tmax;
2307
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308
0
    break;
2309
2310
0
  case NT_STR:
2311
0
    {
2312
0
      StrNode* sn = NSTR(node);
2313
0
      *max = sn->end - sn->s;
2314
0
    }
2315
0
    break;
2316
2317
0
  case NT_CTYPE:
2318
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319
0
    break;
2320
2321
0
  case NT_CCLASS:
2322
0
  case NT_CANY:
2323
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324
0
    break;
2325
2326
0
  case NT_BREF:
2327
0
    {
2328
0
      int i;
2329
0
      int* backs;
2330
0
      Node** nodes = SCANENV_MEM_NODES(env);
2331
0
      BRefNode* br = NBREF(node);
2332
0
      if (br->state & NST_RECURSION) {
2333
0
  *max = ONIG_INFINITE_DISTANCE;
2334
0
  break;
2335
0
      }
2336
0
      backs = BACKREFS_P(br);
2337
0
      for (i = 0; i < br->back_num; i++) {
2338
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2339
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340
0
  if (r != 0) break;
2341
0
  if (*max < tmax) *max = tmax;
2342
0
      }
2343
0
    }
2344
0
    break;
2345
2346
0
#ifdef USE_SUBEXP_CALL
2347
0
  case NT_CALL:
2348
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2349
0
      r = get_max_match_length(NCALL(node)->target, max, env);
2350
0
    else
2351
0
      *max = ONIG_INFINITE_DISTANCE;
2352
0
    break;
2353
0
#endif
2354
2355
0
  case NT_QTFR:
2356
0
    {
2357
0
      QtfrNode* qn = NQTFR(node);
2358
2359
0
      if (qn->upper != 0) {
2360
0
  r = get_max_match_length(qn->target, max, env);
2361
0
  if (r == 0 && *max != 0) {
2362
0
    if (! IS_REPEAT_INFINITE(qn->upper))
2363
0
      *max = distance_multiply(*max, qn->upper);
2364
0
    else
2365
0
      *max = ONIG_INFINITE_DISTANCE;
2366
0
  }
2367
0
      }
2368
0
    }
2369
0
    break;
2370
2371
0
  case NT_ENCLOSE:
2372
0
    {
2373
0
      EncloseNode* en = NENCLOSE(node);
2374
0
      switch (en->type) {
2375
0
      case ENCLOSE_MEMORY:
2376
0
  if (IS_ENCLOSE_MAX_FIXED(en))
2377
0
    *max = en->max_len;
2378
0
  else {
2379
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2380
0
      *max = ONIG_INFINITE_DISTANCE;
2381
0
    else {
2382
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2383
0
      r = get_max_match_length(en->target, max, env);
2384
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2385
0
      if (r == 0) {
2386
0
        en->max_len = *max;
2387
0
        SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2388
0
      }
2389
0
    }
2390
0
  }
2391
0
  break;
2392
2393
0
      case ENCLOSE_OPTION:
2394
0
      case ENCLOSE_STOP_BACKTRACK:
2395
0
      case ENCLOSE_CONDITION:
2396
0
  r = get_max_match_length(en->target, max, env);
2397
0
  break;
2398
2399
0
      case ENCLOSE_ABSENT:
2400
0
  break;
2401
0
      }
2402
0
    }
2403
0
    break;
2404
2405
0
  case NT_ANCHOR:
2406
0
  default:
2407
0
    break;
2408
0
  }
2409
2410
0
  return r;
2411
0
}
2412
2413
0
#define GET_CHAR_LEN_VARLEN           -1
2414
0
#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2415
2416
/* fixed size pattern node only */
2417
static int
2418
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419
0
{
2420
0
  int tlen;
2421
0
  int r = 0;
2422
2423
0
  level++;
2424
0
  *len = 0;
2425
0
  switch (NTYPE(node)) {
2426
0
  case NT_LIST:
2427
0
    do {
2428
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429
0
      if (r == 0)
2430
0
  *len = (int )distance_add(*len, tlen);
2431
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432
0
    break;
2433
2434
0
  case NT_ALT:
2435
0
    {
2436
0
      int tlen2;
2437
0
      int varlen = 0;
2438
2439
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440
0
      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441
0
  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442
0
  if (r == 0) {
2443
0
    if (tlen != tlen2)
2444
0
      varlen = 1;
2445
0
  }
2446
0
      }
2447
0
      if (r == 0) {
2448
0
  if (varlen != 0) {
2449
0
    if (level == 1)
2450
0
      r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2451
0
    else
2452
0
      r = GET_CHAR_LEN_VARLEN;
2453
0
  }
2454
0
  else
2455
0
    *len = tlen;
2456
0
      }
2457
0
    }
2458
0
    break;
2459
2460
0
  case NT_STR:
2461
0
    {
2462
0
      StrNode* sn = NSTR(node);
2463
0
      UChar *s = sn->s;
2464
0
      while (s < sn->end) {
2465
0
  s += enclen(reg->enc, s, sn->end);
2466
0
  (*len)++;
2467
0
      }
2468
0
    }
2469
0
    break;
2470
2471
0
  case NT_QTFR:
2472
0
    {
2473
0
      QtfrNode* qn = NQTFR(node);
2474
0
      if (qn->lower == qn->upper) {
2475
0
  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476
0
  if (r == 0)
2477
0
    *len = (int )distance_multiply(tlen, qn->lower);
2478
0
      }
2479
0
      else
2480
0
  r = GET_CHAR_LEN_VARLEN;
2481
0
    }
2482
0
    break;
2483
2484
0
#ifdef USE_SUBEXP_CALL
2485
0
  case NT_CALL:
2486
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2487
0
      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488
0
    else
2489
0
      r = GET_CHAR_LEN_VARLEN;
2490
0
    break;
2491
0
#endif
2492
2493
0
  case NT_CTYPE:
2494
0
    *len = 1;
2495
0
    break;
2496
2497
0
  case NT_CCLASS:
2498
0
  case NT_CANY:
2499
0
    *len = 1;
2500
0
    break;
2501
2502
0
  case NT_ENCLOSE:
2503
0
    {
2504
0
      EncloseNode* en = NENCLOSE(node);
2505
0
      switch (en->type) {
2506
0
      case ENCLOSE_MEMORY:
2507
0
#ifdef USE_SUBEXP_CALL
2508
0
  if (IS_ENCLOSE_CLEN_FIXED(en))
2509
0
    *len = en->char_len;
2510
0
  else {
2511
0
    r = get_char_length_tree1(en->target, reg, len, level);
2512
0
    if (r == 0) {
2513
0
      en->char_len = *len;
2514
0
      SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2515
0
    }
2516
0
  }
2517
0
  break;
2518
0
#endif
2519
0
      case ENCLOSE_OPTION:
2520
0
      case ENCLOSE_STOP_BACKTRACK:
2521
0
      case ENCLOSE_CONDITION:
2522
0
  r = get_char_length_tree1(en->target, reg, len, level);
2523
0
  break;
2524
0
      case ENCLOSE_ABSENT:
2525
0
      default:
2526
0
  break;
2527
0
      }
2528
0
    }
2529
0
    break;
2530
2531
0
  case NT_ANCHOR:
2532
0
    break;
2533
2534
0
  default:
2535
0
    r = GET_CHAR_LEN_VARLEN;
2536
0
    break;
2537
0
  }
2538
2539
0
  return r;
2540
0
}
2541
2542
static int
2543
get_char_length_tree(Node* node, regex_t* reg, int* len)
2544
0
{
2545
0
  return get_char_length_tree1(node, reg, len, 0);
2546
0
}
2547
2548
/* x is not included y ==>  1 : 0 */
2549
static int
2550
is_not_included(Node* x, Node* y, regex_t* reg)
2551
5.14k
{
2552
5.14k
  int i;
2553
5.14k
  OnigDistance len;
2554
5.14k
  OnigCodePoint code;
2555
5.14k
  UChar *p;
2556
5.14k
  int ytype;
2557
2558
10.2k
 retry:
2559
10.2k
  ytype = NTYPE(y);
2560
10.2k
  switch (NTYPE(x)) {
2561
0
  case NT_CTYPE:
2562
0
    {
2563
0
      switch (ytype) {
2564
0
      case NT_CTYPE:
2565
0
  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566
0
      NCTYPE(y)->not   != NCTYPE(x)->not &&
2567
0
      NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568
0
    return 1;
2569
0
  else
2570
0
    return 0;
2571
0
  break;
2572
2573
0
      case NT_CCLASS:
2574
5.14k
      swap:
2575
5.14k
  {
2576
5.14k
    Node* tmp;
2577
5.14k
    tmp = x; x = y; y = tmp;
2578
5.14k
    goto retry;
2579
0
  }
2580
0
  break;
2581
2582
0
      case NT_STR:
2583
0
  goto swap;
2584
0
  break;
2585
2586
0
      default:
2587
0
  break;
2588
0
      }
2589
0
    }
2590
0
    break;
2591
2592
5.14k
  case NT_CCLASS:
2593
5.14k
    {
2594
5.14k
      CClassNode* xc = NCCLASS(x);
2595
5.14k
      switch (ytype) {
2596
0
      case NT_CTYPE:
2597
0
  switch (NCTYPE(y)->ctype) {
2598
0
  case ONIGENC_CTYPE_WORD:
2599
0
    if (NCTYPE(y)->not == 0) {
2600
0
      if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601
0
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602
0
    if (BITSET_AT(xc->bs, i)) {
2603
0
      if (NCTYPE(y)->ascii_range) {
2604
0
        if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605
0
      }
2606
0
      else {
2607
0
        if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608
0
      }
2609
0
    }
2610
0
        }
2611
0
        return 1;
2612
0
      }
2613
0
      return 0;
2614
0
    }
2615
0
    else {
2616
0
      if (IS_NOT_NULL(xc->mbuf)) return 0;
2617
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618
0
        int is_word;
2619
0
        if (NCTYPE(y)->ascii_range)
2620
0
    is_word = IS_CODE_SB_WORD(reg->enc, i);
2621
0
        else
2622
0
    is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623
0
        if (! is_word) {
2624
0
    if (!IS_NCCLASS_NOT(xc)) {
2625
0
      if (BITSET_AT(xc->bs, i))
2626
0
        return 0;
2627
0
    }
2628
0
    else {
2629
0
      if (! BITSET_AT(xc->bs, i))
2630
0
        return 0;
2631
0
    }
2632
0
        }
2633
0
      }
2634
0
      return 1;
2635
0
    }
2636
0
    break;
2637
2638
0
  default:
2639
0
    break;
2640
0
  }
2641
0
  break;
2642
2643
0
      case NT_CCLASS:
2644
0
  {
2645
0
    int v;
2646
0
    CClassNode* yc = NCCLASS(y);
2647
2648
0
    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649
0
      v = BITSET_AT(xc->bs, i);
2650
0
      if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651
0
    (v == 0 && IS_NCCLASS_NOT(xc))) {
2652
0
        v = BITSET_AT(yc->bs, i);
2653
0
        if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654
0
      (v == 0 && IS_NCCLASS_NOT(yc)))
2655
0
    return 0;
2656
0
      }
2657
0
    }
2658
0
    if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659
0
        (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660
0
      return 1;
2661
0
    return 0;
2662
0
  }
2663
0
  break;
2664
2665
5.14k
      case NT_STR:
2666
5.14k
  goto swap;
2667
0
  break;
2668
2669
0
      default:
2670
0
  break;
2671
5.14k
      }
2672
5.14k
    }
2673
0
    break;
2674
2675
5.14k
  case NT_STR:
2676
5.14k
    {
2677
5.14k
      StrNode* xs = NSTR(x);
2678
5.14k
      if (NSTRING_LEN(x) == 0)
2679
0
  break;
2680
2681
5.14k
      switch (ytype) {
2682
0
      case NT_CTYPE:
2683
0
  switch (NCTYPE(y)->ctype) {
2684
0
  case ONIGENC_CTYPE_WORD:
2685
0
    if (NCTYPE(y)->ascii_range) {
2686
0
      if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687
0
        return NCTYPE(y)->not;
2688
0
      else
2689
0
        return !(NCTYPE(y)->not);
2690
0
    }
2691
0
    else {
2692
0
      if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693
0
        return NCTYPE(y)->not;
2694
0
      else
2695
0
        return !(NCTYPE(y)->not);
2696
0
    }
2697
0
    break;
2698
0
  default:
2699
0
    break;
2700
0
  }
2701
0
  break;
2702
2703
5.14k
      case NT_CCLASS:
2704
5.14k
  {
2705
5.14k
    CClassNode* cc = NCCLASS(y);
2706
2707
5.14k
    code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708
5.14k
             xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709
5.14k
    return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710
0
  }
2711
0
  break;
2712
2713
0
      case NT_STR:
2714
0
  {
2715
0
    UChar *q;
2716
0
    StrNode* ys = NSTR(y);
2717
0
    len = NSTRING_LEN(x);
2718
0
    if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719
0
    if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720
      /* tiny version */
2721
0
      return 0;
2722
0
    }
2723
0
    else {
2724
0
      for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725
0
        if (*p != *q) return 1;
2726
0
      }
2727
0
    }
2728
0
  }
2729
0
  break;
2730
2731
0
      default:
2732
0
  break;
2733
5.14k
      }
2734
5.14k
    }
2735
0
    break;
2736
2737
0
  default:
2738
0
    break;
2739
10.2k
  }
2740
2741
0
  return 0;
2742
10.2k
}
2743
2744
static Node*
2745
get_head_value_node(Node* node, int exact, regex_t* reg)
2746
27.7k
{
2747
27.7k
  Node* n = NULL_NODE;
2748
2749
27.7k
  switch (NTYPE(node)) {
2750
0
  case NT_BREF:
2751
792
  case NT_ALT:
2752
2.77k
  case NT_CANY:
2753
2.77k
#ifdef USE_SUBEXP_CALL
2754
2.77k
  case NT_CALL:
2755
2.77k
#endif
2756
2.77k
    break;
2757
2758
0
  case NT_CTYPE:
2759
8.71k
  case NT_CCLASS:
2760
8.71k
    if (exact == 0) {
2761
7.92k
      n = node;
2762
7.92k
    }
2763
8.71k
    break;
2764
2765
0
  case NT_LIST:
2766
0
    n = get_head_value_node(NCAR(node), exact, reg);
2767
0
    break;
2768
2769
11.0k
  case NT_STR:
2770
11.0k
    {
2771
11.0k
      StrNode* sn = NSTR(node);
2772
2773
11.0k
      if (sn->end <= sn->s)
2774
0
  break;
2775
2776
11.0k
      if (exact == 0 ||
2777
11.0k
    NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2778
11.0k
  n = node;
2779
11.0k
      }
2780
11.0k
    }
2781
0
    break;
2782
2783
3.96k
  case NT_QTFR:
2784
3.96k
    {
2785
3.96k
      QtfrNode* qn = NQTFR(node);
2786
3.96k
      if (qn->lower > 0) {
2787
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2788
  if (IS_NOT_NULL(qn->head_exact))
2789
    n = qn->head_exact;
2790
  else
2791
#endif
2792
792
    n = get_head_value_node(qn->target, exact, reg);
2793
792
      }
2794
3.96k
    }
2795
3.96k
    break;
2796
2797
0
  case NT_ENCLOSE:
2798
0
    {
2799
0
      EncloseNode* en = NENCLOSE(node);
2800
0
      switch (en->type) {
2801
0
      case ENCLOSE_OPTION:
2802
0
  {
2803
0
    OnigOptionType options = reg->options;
2804
2805
0
    reg->options = NENCLOSE(node)->option;
2806
0
    n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2807
0
    reg->options = options;
2808
0
  }
2809
0
  break;
2810
2811
0
      case ENCLOSE_MEMORY:
2812
0
      case ENCLOSE_STOP_BACKTRACK:
2813
0
      case ENCLOSE_CONDITION:
2814
0
  n = get_head_value_node(en->target, exact, reg);
2815
0
  break;
2816
2817
0
      case ENCLOSE_ABSENT:
2818
0
  break;
2819
0
      }
2820
0
    }
2821
0
    break;
2822
2823
1.18k
  case NT_ANCHOR:
2824
1.18k
    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2825
0
      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2826
1.18k
    break;
2827
2828
0
  default:
2829
0
    break;
2830
27.7k
  }
2831
2832
27.7k
  return n;
2833
27.7k
}
2834
2835
static int
2836
check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2837
0
{
2838
0
  int type, r = 0;
2839
2840
0
  type = NTYPE(node);
2841
0
  if ((NTYPE2BIT(type) & type_mask) == 0)
2842
0
    return 1;
2843
2844
0
  switch (type) {
2845
0
  case NT_LIST:
2846
0
  case NT_ALT:
2847
0
    do {
2848
0
      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2849
0
        anchor_mask);
2850
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2851
0
    break;
2852
2853
0
  case NT_QTFR:
2854
0
    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2855
0
      anchor_mask);
2856
0
    break;
2857
2858
0
  case NT_ENCLOSE:
2859
0
    {
2860
0
      EncloseNode* en = NENCLOSE(node);
2861
0
      if ((en->type & enclose_mask) == 0)
2862
0
  return 1;
2863
2864
0
      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2865
0
    }
2866
0
    break;
2867
2868
0
  case NT_ANCHOR:
2869
0
    type = NANCHOR(node)->type;
2870
0
    if ((type & anchor_mask) == 0)
2871
0
      return 1;
2872
2873
0
    if (NANCHOR(node)->target)
2874
0
      r = check_type_tree(NANCHOR(node)->target,
2875
0
        type_mask, enclose_mask, anchor_mask);
2876
0
    break;
2877
2878
0
  default:
2879
0
    break;
2880
0
  }
2881
0
  return r;
2882
0
}
2883
2884
#ifdef USE_SUBEXP_CALL
2885
2886
0
# define RECURSION_EXIST       1
2887
0
# define RECURSION_INFINITE    2
2888
2889
static int
2890
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2891
0
{
2892
0
  int type;
2893
0
  int r = 0;
2894
2895
0
  type = NTYPE(node);
2896
0
  switch (type) {
2897
0
  case NT_LIST:
2898
0
    {
2899
0
      Node *x;
2900
0
      OnigDistance min;
2901
0
      int ret;
2902
2903
0
      x = node;
2904
0
      do {
2905
0
  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2906
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2907
0
  r |= ret;
2908
0
  if (head) {
2909
0
    ret = get_min_match_length(NCAR(x), &min, env);
2910
0
    if (ret != 0) return ret;
2911
0
    if (min != 0) head = 0;
2912
0
  }
2913
0
      } while (IS_NOT_NULL(x = NCDR(x)));
2914
0
    }
2915
0
    break;
2916
2917
0
  case NT_ALT:
2918
0
    {
2919
0
      int ret;
2920
0
      r = RECURSION_EXIST;
2921
0
      do {
2922
0
  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2923
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2924
0
  r &= ret;
2925
0
      } while (IS_NOT_NULL(node = NCDR(node)));
2926
0
    }
2927
0
    break;
2928
2929
0
  case NT_QTFR:
2930
0
    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2931
0
    if (r == RECURSION_EXIST) {
2932
0
      if (NQTFR(node)->lower == 0) r = 0;
2933
0
    }
2934
0
    break;
2935
2936
0
  case NT_ANCHOR:
2937
0
    {
2938
0
      AnchorNode* an = NANCHOR(node);
2939
0
      switch (an->type) {
2940
0
      case ANCHOR_PREC_READ:
2941
0
      case ANCHOR_PREC_READ_NOT:
2942
0
      case ANCHOR_LOOK_BEHIND:
2943
0
      case ANCHOR_LOOK_BEHIND_NOT:
2944
0
  r = subexp_inf_recursive_check(an->target, env, head);
2945
0
  break;
2946
0
      }
2947
0
    }
2948
0
    break;
2949
2950
0
  case NT_CALL:
2951
0
    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2952
0
    break;
2953
2954
0
  case NT_ENCLOSE:
2955
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2956
0
      return 0;
2957
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2958
0
      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2959
0
    else {
2960
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
2961
0
      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2963
0
    }
2964
0
    break;
2965
2966
0
  default:
2967
0
    break;
2968
0
  }
2969
2970
0
  return r;
2971
0
}
2972
2973
static int
2974
subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2975
0
{
2976
0
  int type;
2977
0
  int r = 0;
2978
2979
0
  type = NTYPE(node);
2980
0
  switch (type) {
2981
0
  case NT_LIST:
2982
0
  case NT_ALT:
2983
0
    do {
2984
0
      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2985
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2986
0
    break;
2987
2988
0
  case NT_QTFR:
2989
0
    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2990
0
    break;
2991
2992
0
  case NT_ANCHOR:
2993
0
    {
2994
0
      AnchorNode* an = NANCHOR(node);
2995
0
      switch (an->type) {
2996
0
      case ANCHOR_PREC_READ:
2997
0
      case ANCHOR_PREC_READ_NOT:
2998
0
      case ANCHOR_LOOK_BEHIND:
2999
0
      case ANCHOR_LOOK_BEHIND_NOT:
3000
0
  r = subexp_inf_recursive_check_trav(an->target, env);
3001
0
  break;
3002
0
      }
3003
0
    }
3004
0
    break;
3005
3006
0
  case NT_ENCLOSE:
3007
0
    {
3008
0
      EncloseNode* en = NENCLOSE(node);
3009
3010
0
      if (IS_ENCLOSE_RECURSION(en)) {
3011
0
  SET_ENCLOSE_STATUS(node, NST_MARK1);
3012
0
  r = subexp_inf_recursive_check(en->target, env, 1);
3013
0
  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014
0
  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3015
0
      }
3016
0
      r = subexp_inf_recursive_check_trav(en->target, env);
3017
0
    }
3018
3019
0
    break;
3020
3021
0
  default:
3022
0
    break;
3023
0
  }
3024
3025
0
  return r;
3026
0
}
3027
3028
static int
3029
subexp_recursive_check(Node* node)
3030
0
{
3031
0
  int r = 0;
3032
3033
0
  switch (NTYPE(node)) {
3034
0
  case NT_LIST:
3035
0
  case NT_ALT:
3036
0
    do {
3037
0
      r |= subexp_recursive_check(NCAR(node));
3038
0
    } while (IS_NOT_NULL(node = NCDR(node)));
3039
0
    break;
3040
3041
0
  case NT_QTFR:
3042
0
    r = subexp_recursive_check(NQTFR(node)->target);
3043
0
    break;
3044
3045
0
  case NT_ANCHOR:
3046
0
    {
3047
0
      AnchorNode* an = NANCHOR(node);
3048
0
      switch (an->type) {
3049
0
      case ANCHOR_PREC_READ:
3050
0
      case ANCHOR_PREC_READ_NOT:
3051
0
      case ANCHOR_LOOK_BEHIND:
3052
0
      case ANCHOR_LOOK_BEHIND_NOT:
3053
0
  r = subexp_recursive_check(an->target);
3054
0
  break;
3055
0
      }
3056
0
    }
3057
0
    break;
3058
3059
0
  case NT_CALL:
3060
0
    r = subexp_recursive_check(NCALL(node)->target);
3061
0
    if (r != 0) SET_CALL_RECURSION(node);
3062
0
    break;
3063
3064
0
  case NT_ENCLOSE:
3065
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3066
0
      return 0;
3067
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3068
0
      return 1; /* recursion */
3069
0
    else {
3070
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
3071
0
      r = subexp_recursive_check(NENCLOSE(node)->target);
3072
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3073
0
    }
3074
0
    break;
3075
3076
0
  default:
3077
0
    break;
3078
0
  }
3079
3080
0
  return r;
3081
0
}
3082
3083
3084
static int
3085
subexp_recursive_check_trav(Node* node, ScanEnv* env)
3086
0
{
3087
0
# define FOUND_CALLED_NODE    1
3088
3089
0
  int type;
3090
0
  int r = 0;
3091
3092
0
  type = NTYPE(node);
3093
0
  switch (type) {
3094
0
  case NT_LIST:
3095
0
  case NT_ALT:
3096
0
    {
3097
0
      int ret;
3098
0
      do {
3099
0
  ret = subexp_recursive_check_trav(NCAR(node), env);
3100
0
  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3101
0
  else if (ret < 0) return ret;
3102
0
      } while (IS_NOT_NULL(node = NCDR(node)));
3103
0
    }
3104
0
    break;
3105
3106
0
  case NT_QTFR:
3107
0
    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3108
0
    if (NQTFR(node)->upper == 0) {
3109
0
      if (r == FOUND_CALLED_NODE)
3110
0
  NQTFR(node)->is_referred = 1;
3111
0
    }
3112
0
    break;
3113
3114
0
  case NT_ANCHOR:
3115
0
    {
3116
0
      AnchorNode* an = NANCHOR(node);
3117
0
      switch (an->type) {
3118
0
      case ANCHOR_PREC_READ:
3119
0
      case ANCHOR_PREC_READ_NOT:
3120
0
      case ANCHOR_LOOK_BEHIND:
3121
0
      case ANCHOR_LOOK_BEHIND_NOT:
3122
0
  r = subexp_recursive_check_trav(an->target, env);
3123
0
  break;
3124
0
      }
3125
0
    }
3126
0
    break;
3127
3128
0
  case NT_ENCLOSE:
3129
0
    {
3130
0
      EncloseNode* en = NENCLOSE(node);
3131
3132
0
      if (! IS_ENCLOSE_RECURSION(en)) {
3133
0
  if (IS_ENCLOSE_CALLED(en)) {
3134
0
    SET_ENCLOSE_STATUS(node, NST_MARK1);
3135
0
    r = subexp_recursive_check(en->target);
3136
0
    if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137
0
    CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3138
0
  }
3139
0
      }
3140
0
      r = subexp_recursive_check_trav(en->target, env);
3141
0
      if (IS_ENCLOSE_CALLED(en))
3142
0
  r |= FOUND_CALLED_NODE;
3143
0
    }
3144
0
    break;
3145
3146
0
  default:
3147
0
    break;
3148
0
  }
3149
3150
0
  return r;
3151
0
}
3152
3153
static int
3154
setup_subexp_call(Node* node, ScanEnv* env)
3155
0
{
3156
0
  int type;
3157
0
  int r = 0;
3158
3159
0
  type = NTYPE(node);
3160
0
  switch (type) {
3161
0
  case NT_LIST:
3162
0
    do {
3163
0
      r = setup_subexp_call(NCAR(node), env);
3164
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3165
0
    break;
3166
3167
0
  case NT_ALT:
3168
0
    do {
3169
0
      r = setup_subexp_call(NCAR(node), env);
3170
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3171
0
    break;
3172
3173
0
  case NT_QTFR:
3174
0
    r = setup_subexp_call(NQTFR(node)->target, env);
3175
0
    break;
3176
0
  case NT_ENCLOSE:
3177
0
    r = setup_subexp_call(NENCLOSE(node)->target, env);
3178
0
    break;
3179
3180
0
  case NT_CALL:
3181
0
    {
3182
0
      CallNode* cn = NCALL(node);
3183
0
      Node** nodes = SCANENV_MEM_NODES(env);
3184
3185
0
      if (cn->group_num != 0) {
3186
0
  int gnum = cn->group_num;
3187
3188
0
# ifdef USE_NAMED_GROUP
3189
0
  if (env->num_named > 0 &&
3190
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3191
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3192
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3193
0
  }
3194
0
# endif
3195
0
  if (gnum > env->num_mem) {
3196
0
    onig_scan_env_set_error_string(env,
3197
0
     ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3198
0
    return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3199
0
  }
3200
3201
0
# ifdef USE_NAMED_GROUP
3202
0
      set_call_attr:
3203
0
# endif
3204
0
  cn->target = nodes[cn->group_num];
3205
0
  if (IS_NULL(cn->target)) {
3206
0
    onig_scan_env_set_error_string(env,
3207
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3208
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3209
0
  }
3210
0
  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3211
0
  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3212
0
  cn->unset_addr_list = env->unset_addr_list;
3213
0
      }
3214
0
# ifdef USE_NAMED_GROUP
3215
0
#  ifdef USE_PERL_SUBEXP_CALL
3216
0
      else if (cn->name == cn->name_end) {
3217
0
  goto set_call_attr;
3218
0
      }
3219
0
#  endif
3220
0
      else {
3221
0
  int *refs;
3222
3223
0
  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3224
0
             &refs);
3225
0
  if (n <= 0) {
3226
0
    onig_scan_env_set_error_string(env,
3227
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3228
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3229
0
  }
3230
0
  else if (n > 1 &&
3231
0
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3232
0
    onig_scan_env_set_error_string(env,
3233
0
      ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3234
0
    return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3235
0
  }
3236
0
  else {
3237
0
    cn->group_num = refs[0];
3238
0
    goto set_call_attr;
3239
0
  }
3240
0
      }
3241
0
# endif
3242
0
    }
3243
0
    break;
3244
3245
0
  case NT_ANCHOR:
3246
0
    {
3247
0
      AnchorNode* an = NANCHOR(node);
3248
3249
0
      switch (an->type) {
3250
0
      case ANCHOR_PREC_READ:
3251
0
      case ANCHOR_PREC_READ_NOT:
3252
0
      case ANCHOR_LOOK_BEHIND:
3253
0
      case ANCHOR_LOOK_BEHIND_NOT:
3254
0
  r = setup_subexp_call(an->target, env);
3255
0
  break;
3256
0
      }
3257
0
    }
3258
0
    break;
3259
3260
0
  default:
3261
0
    break;
3262
0
  }
3263
3264
0
  return r;
3265
0
}
3266
#endif
3267
3268
7.52k
#define IN_ALT          (1<<0)
3269
1.98k
#define IN_NOT          (1<<1)
3270
23.7k
#define IN_REPEAT       (1<<2)
3271
13.8k
#define IN_VAR_REPEAT   (1<<3)
3272
1.98k
#define IN_CALL         (1<<4)
3273
1.98k
#define IN_RECCALL      (1<<5)
3274
0
#define IN_LOOK_BEHIND  (1<<6)
3275
3276
/* divide different length alternatives in look-behind.
3277
  (?<=A|B) ==> (?<=A)|(?<=B)
3278
  (?<!A|B) ==> (?<!A)(?<!B)
3279
*/
3280
static int
3281
divide_look_behind_alternatives(Node* node)
3282
0
{
3283
0
  Node *head, *np, *insert_node;
3284
0
  AnchorNode* an = NANCHOR(node);
3285
0
  int anc_type = an->type;
3286
3287
0
  head = an->target;
3288
0
  np = NCAR(head);
3289
0
  swap_node(node, head);
3290
0
  NCAR(node) = head;
3291
0
  NANCHOR(head)->target = np;
3292
3293
0
  np = node;
3294
0
  while ((np = NCDR(np)) != NULL_NODE) {
3295
0
    insert_node = onig_node_new_anchor(anc_type);
3296
0
    CHECK_NULL_RETURN_MEMERR(insert_node);
3297
0
    NANCHOR(insert_node)->target = NCAR(np);
3298
0
    NCAR(np) = insert_node;
3299
0
  }
3300
3301
0
  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3302
0
    np = node;
3303
0
    do {
3304
0
      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3305
0
    } while ((np = NCDR(np)) != NULL_NODE);
3306
0
  }
3307
0
  return 0;
3308
0
}
3309
3310
static int
3311
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3312
0
{
3313
0
  int r, len;
3314
0
  AnchorNode* an = NANCHOR(node);
3315
3316
0
  r = get_char_length_tree(an->target, reg, &len);
3317
0
  if (r == 0)
3318
0
    an->char_len = len;
3319
0
  else if (r == GET_CHAR_LEN_VARLEN)
3320
0
    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3321
0
  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3322
0
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3323
0
      r = divide_look_behind_alternatives(node);
3324
0
    else
3325
0
      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3326
0
  }
3327
3328
0
  return r;
3329
0
}
3330
3331
static int
3332
next_setup(Node* node, Node* next_node, regex_t* reg)
3333
29.7k
{
3334
29.7k
  int type;
3335
3336
31.6k
 retry:
3337
31.6k
  type = NTYPE(node);
3338
31.6k
  if (type == NT_QTFR) {
3339
10.6k
    QtfrNode* qn = NQTFR(node);
3340
10.6k
    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3341
9.90k
#ifdef USE_QTFR_PEEK_NEXT
3342
9.90k
      Node* n = get_head_value_node(next_node, 1, reg);
3343
      /* '\0': for UTF-16BE etc... */
3344
9.90k
      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3345
5.94k
  qn->next_head_exact = n;
3346
5.94k
      }
3347
9.90k
#endif
3348
      /* automatic possessification a*b ==> (?>a*)b */
3349
9.90k
      if (qn->lower <= 1) {
3350
9.90k
  int ttype = NTYPE(qn->target);
3351
9.90k
  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3352
9.10k
    Node *x, *y;
3353
9.10k
    x = get_head_value_node(qn->target, 0, reg);
3354
9.10k
    if (IS_NOT_NULL(x)) {
3355
7.92k
      y = get_head_value_node(next_node,  0, reg);
3356
7.92k
      if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3357
5.14k
        Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3358
5.14k
        CHECK_NULL_RETURN_MEMERR(en);
3359
5.14k
        SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3360
5.14k
        swap_node(node, en);
3361
5.14k
        NENCLOSE(node)->target = en;
3362
5.14k
      }
3363
7.92k
    }
3364
9.10k
  }
3365
9.90k
      }
3366
9.90k
    }
3367
10.6k
  }
3368
20.9k
  else if (type == NT_ENCLOSE) {
3369
2.37k
    EncloseNode* en = NENCLOSE(node);
3370
2.37k
    if (en->type == ENCLOSE_MEMORY) {
3371
1.98k
      node = en->target;
3372
1.98k
      goto retry;
3373
1.98k
    }
3374
2.37k
  }
3375
29.7k
  return 0;
3376
31.6k
}
3377
3378
3379
static int
3380
update_string_node_case_fold(regex_t* reg, Node *node)
3381
0
{
3382
0
  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3383
0
  UChar *sbuf, *ebuf, *sp;
3384
0
  int r, i, len;
3385
0
  OnigDistance sbuf_size;
3386
0
  StrNode* sn = NSTR(node);
3387
3388
0
  end = sn->end;
3389
0
  sbuf_size = (end - sn->s) * 2;
3390
0
  sbuf = (UChar* )xmalloc(sbuf_size);
3391
0
  CHECK_NULL_RETURN_MEMERR(sbuf);
3392
0
  ebuf = sbuf + sbuf_size;
3393
3394
0
  sp = sbuf;
3395
0
  p = sn->s;
3396
0
  while (p < end) {
3397
0
    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3398
0
    for (i = 0; i < len; i++) {
3399
0
      if (sp >= ebuf) {
3400
0
  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3401
0
  if (IS_NULL(p)) {
3402
0
    xfree(sbuf);
3403
0
    return ONIGERR_MEMORY;
3404
0
  }
3405
0
  sbuf = p;
3406
0
  sp = sbuf + sbuf_size;
3407
0
  sbuf_size *= 2;
3408
0
  ebuf = sbuf + sbuf_size;
3409
0
      }
3410
3411
0
      *sp++ = buf[i];
3412
0
    }
3413
0
  }
3414
3415
0
  r = onig_node_str_set(node, sbuf, sp);
3416
3417
0
  xfree(sbuf);
3418
0
  return r;
3419
0
}
3420
3421
static int
3422
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3423
         regex_t* reg)
3424
0
{
3425
0
  int r;
3426
0
  Node *node;
3427
3428
0
  node = onig_node_new_str(s, end);
3429
0
  if (IS_NULL(node)) return ONIGERR_MEMORY;
3430
3431
0
  r = update_string_node_case_fold(reg, node);
3432
0
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return r;
3435
0
  }
3436
3437
0
  NSTRING_SET_AMBIG(node);
3438
0
  NSTRING_SET_DONT_GET_OPT_INFO(node);
3439
0
  *rnode = node;
3440
0
  return 0;
3441
0
}
3442
3443
static int
3444
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3445
        int slen)
3446
0
{
3447
0
  int i;
3448
3449
0
  for (i = 0; i < item_num; i++) {
3450
0
    if (items[i].byte_len != slen) {
3451
0
      return 1;
3452
0
    }
3453
0
    if (items[i].code_len != 1) {
3454
0
      return 1;
3455
0
    }
3456
0
  }
3457
0
  return 0;
3458
0
}
3459
3460
static int
3461
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3462
          UChar *p, int slen, UChar *end,
3463
          regex_t* reg, Node **rnode)
3464
0
{
3465
0
  int r, i, j, len, varlen;
3466
0
  Node *anode, *var_anode, *snode, *xnode, *an;
3467
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3468
3469
0
  *rnode = var_anode = NULL_NODE;
3470
3471
0
  varlen = 0;
3472
0
  for (i = 0; i < item_num; i++) {
3473
0
    if (items[i].byte_len != slen) {
3474
0
      varlen = 1;
3475
0
      break;
3476
0
    }
3477
0
  }
3478
3479
0
  if (varlen != 0) {
3480
0
    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3481
0
    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3482
3483
0
    xnode = onig_node_new_list(NULL, NULL);
3484
0
    if (IS_NULL(xnode)) goto mem_err;
3485
0
    NCAR(var_anode) = xnode;
3486
3487
0
    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3488
0
    if (IS_NULL(anode)) goto mem_err;
3489
0
    NCAR(xnode) = anode;
3490
0
  }
3491
0
  else {
3492
0
    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3493
0
    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3494
0
  }
3495
3496
0
  snode = onig_node_new_str(p, p + slen);
3497
0
  if (IS_NULL(snode)) goto mem_err;
3498
3499
0
  NCAR(anode) = snode;
3500
3501
0
  for (i = 0; i < item_num; i++) {
3502
0
    snode = onig_node_new_str(NULL, NULL);
3503
0
    if (IS_NULL(snode)) goto mem_err;
3504
3505
0
    for (j = 0; j < items[i].code_len; j++) {
3506
0
      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3507
0
      if (len < 0) {
3508
0
  r = len;
3509
0
  goto mem_err2;
3510
0
      }
3511
3512
0
      r = onig_node_str_cat(snode, buf, buf + len);
3513
0
      if (r != 0) goto mem_err2;
3514
0
    }
3515
3516
0
    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517
0
    if (IS_NULL(an)) {
3518
0
      goto mem_err2;
3519
0
    }
3520
3521
0
    if (items[i].byte_len != slen) {
3522
0
      Node *rem;
3523
0
      UChar *q = p + items[i].byte_len;
3524
3525
0
      if (q < end) {
3526
0
  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3527
0
  if (r != 0) {
3528
0
    onig_node_free(an);
3529
0
    goto mem_err2;
3530
0
  }
3531
3532
0
  xnode = onig_node_list_add(NULL_NODE, snode);
3533
0
  if (IS_NULL(xnode)) {
3534
0
    onig_node_free(an);
3535
0
    onig_node_free(rem);
3536
0
    goto mem_err2;
3537
0
  }
3538
0
  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3539
0
    onig_node_free(an);
3540
0
    onig_node_free(xnode);
3541
0
    onig_node_free(rem);
3542
0
    goto mem_err;
3543
0
  }
3544
3545
0
  NCAR(an) = xnode;
3546
0
      }
3547
0
      else {
3548
0
  NCAR(an) = snode;
3549
0
      }
3550
3551
0
      NCDR(var_anode) = an;
3552
0
      var_anode = an;
3553
0
    }
3554
0
    else {
3555
0
      NCAR(an)     = snode;
3556
0
      NCDR(anode) = an;
3557
0
      anode = an;
3558
0
    }
3559
0
  }
3560
3561
0
  return varlen;
3562
3563
0
 mem_err2:
3564
0
  onig_node_free(snode);
3565
3566
0
 mem_err:
3567
0
  onig_node_free(*rnode);
3568
3569
0
  return ONIGERR_MEMORY;
3570
0
}
3571
3572
0
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3573
3574
static int
3575
expand_case_fold_string(Node* node, regex_t* reg, int state)
3576
0
{
3577
0
  int r, n, len, alt_num;
3578
0
  int varlen = 0;
3579
0
  int is_in_look_behind;
3580
0
  UChar *start, *end, *p;
3581
0
  Node *top_root, *root, *snode, *prev_node;
3582
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3583
0
  StrNode* sn;
3584
3585
0
  if (NSTRING_IS_AMBIG(node)) return 0;
3586
3587
0
  sn = NSTR(node);
3588
3589
0
  start = sn->s;
3590
0
  end   = sn->end;
3591
0
  if (start >= end) return 0;
3592
3593
0
  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3594
3595
0
  r = 0;
3596
0
  top_root = root = prev_node = snode = NULL_NODE;
3597
0
  alt_num = 1;
3598
0
  p = start;
3599
0
  while (p < end) {
3600
0
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3601
0
             p, end, items);
3602
0
    if (n < 0) {
3603
0
      r = n;
3604
0
      goto err;
3605
0
    }
3606
3607
0
    len = enclen(reg->enc, p, end);
3608
3609
0
    varlen = is_case_fold_variable_len(n, items, len);
3610
0
    if (n == 0 || varlen == 0 || is_in_look_behind) {
3611
0
      if (IS_NULL(snode)) {
3612
0
  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3613
0
          onig_node_free(top_root);
3614
0
    top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3615
0
    if (IS_NULL(root)) {
3616
0
      onig_node_free(prev_node);
3617
0
      goto mem_err;
3618
0
    }
3619
0
  }
3620
3621
0
  prev_node = snode = onig_node_new_str(NULL, NULL);
3622
0
  if (IS_NULL(snode)) goto mem_err;
3623
0
  if (IS_NOT_NULL(root)) {
3624
0
    if (IS_NULL(onig_node_list_add(root, snode))) {
3625
0
      onig_node_free(snode);
3626
0
      goto mem_err;
3627
0
    }
3628
0
  }
3629
0
      }
3630
3631
0
      r = onig_node_str_cat(snode, p, p + len);
3632
0
      if (r != 0) goto err;
3633
0
    }
3634
0
    else {
3635
0
      alt_num *= (n + 1);
3636
0
      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3637
3638
0
      if (IS_NOT_NULL(snode)) {
3639
0
  r = update_string_node_case_fold(reg, snode);
3640
0
  if (r == 0) {
3641
0
    NSTRING_SET_AMBIG(snode);
3642
0
  }
3643
0
      }
3644
0
      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3645
0
        onig_node_free(top_root);
3646
0
  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3647
0
  if (IS_NULL(root)) {
3648
0
    onig_node_free(prev_node);
3649
0
    goto mem_err;
3650
0
  }
3651
0
      }
3652
3653
0
      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3654
0
      if (r < 0) goto mem_err;
3655
0
      if (r == 1) {
3656
0
  if (IS_NULL(root)) {
3657
0
    top_root = prev_node;
3658
0
  }
3659
0
  else {
3660
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3661
0
      onig_node_free(prev_node);
3662
0
      goto mem_err;
3663
0
    }
3664
0
  }
3665
3666
0
  root = NCAR(prev_node);
3667
0
      }
3668
0
      else { /* r == 0 */
3669
0
  if (IS_NOT_NULL(root)) {
3670
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671
0
      onig_node_free(prev_node);
3672
0
      goto mem_err;
3673
0
    }
3674
0
  }
3675
0
      }
3676
3677
0
      snode = NULL_NODE;
3678
0
    }
3679
3680
0
    p += len;
3681
0
  }
3682
0
  if (IS_NOT_NULL(snode)) {
3683
0
    r = update_string_node_case_fold(reg, snode);
3684
0
    if (r == 0) {
3685
0
      NSTRING_SET_AMBIG(snode);
3686
0
    }
3687
0
  }
3688
3689
0
  if (p < end) {
3690
0
    Node *srem;
3691
3692
0
    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3693
0
    if (r != 0) goto mem_err;
3694
3695
0
    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3696
0
      onig_node_free(top_root);
3697
0
      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3698
0
      if (IS_NULL(root)) {
3699
0
  onig_node_free(srem);
3700
0
  onig_node_free(prev_node);
3701
0
  goto mem_err;
3702
0
      }
3703
0
    }
3704
3705
0
    if (IS_NULL(root)) {
3706
0
      prev_node = srem;
3707
0
    }
3708
0
    else {
3709
0
      if (IS_NULL(onig_node_list_add(root, srem))) {
3710
0
  onig_node_free(srem);
3711
0
  goto mem_err;
3712
0
      }
3713
0
    }
3714
0
  }
3715
3716
  /* ending */
3717
0
  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3718
0
  swap_node(node, top_root);
3719
0
  onig_node_free(top_root);
3720
0
  return 0;
3721
3722
0
 mem_err:
3723
0
  r = ONIGERR_MEMORY;
3724
3725
0
 err:
3726
0
  onig_node_free(top_root);
3727
0
  return r;
3728
0
}
3729
3730
3731
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3732
3733
# define CEC_THRES_NUM_BIG_REPEAT         512
3734
# define CEC_INFINITE_NUM          0x7fffffff
3735
3736
# define CEC_IN_INFINITE_REPEAT    (1<<0)
3737
# define CEC_IN_FINITE_REPEAT      (1<<1)
3738
# define CEC_CONT_BIG_REPEAT       (1<<2)
3739
3740
static int
3741
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3742
{
3743
  int type;
3744
  int r = state;
3745
3746
  type = NTYPE(node);
3747
  switch (type) {
3748
  case NT_LIST:
3749
    {
3750
      Node* prev = NULL_NODE;
3751
      do {
3752
  r = setup_comb_exp_check(NCAR(node), r, env);
3753
  prev = NCAR(node);
3754
      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3755
    }
3756
    break;
3757
3758
  case NT_ALT:
3759
    {
3760
      int ret;
3761
      do {
3762
  ret = setup_comb_exp_check(NCAR(node), state, env);
3763
  r |= ret;
3764
      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3765
    }
3766
    break;
3767
3768
  case NT_QTFR:
3769
    {
3770
      int child_state = state;
3771
      int add_state = 0;
3772
      QtfrNode* qn = NQTFR(node);
3773
      Node* target = qn->target;
3774
      int var_num;
3775
3776
      if (! IS_REPEAT_INFINITE(qn->upper)) {
3777
  if (qn->upper > 1) {
3778
    /* {0,1}, {1,1} are allowed */
3779
    child_state |= CEC_IN_FINITE_REPEAT;
3780
3781
    /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3782
    if (env->backrefed_mem == 0) {
3783
      if (NTYPE(qn->target) == NT_ENCLOSE) {
3784
        EncloseNode* en = NENCLOSE(qn->target);
3785
        if (en->type == ENCLOSE_MEMORY) {
3786
    if (NTYPE(en->target) == NT_QTFR) {
3787
      QtfrNode* q = NQTFR(en->target);
3788
      if (IS_REPEAT_INFINITE(q->upper)
3789
          && q->greedy == qn->greedy) {
3790
        qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3791
        if (qn->upper == 1)
3792
          child_state = state;
3793
      }
3794
    }
3795
        }
3796
      }
3797
    }
3798
  }
3799
      }
3800
3801
      if (state & CEC_IN_FINITE_REPEAT) {
3802
  qn->comb_exp_check_num = -1;
3803
      }
3804
      else {
3805
  if (IS_REPEAT_INFINITE(qn->upper)) {
3806
    var_num = CEC_INFINITE_NUM;
3807
    child_state |= CEC_IN_INFINITE_REPEAT;
3808
  }
3809
  else {
3810
    var_num = qn->upper - qn->lower;
3811
  }
3812
3813
  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3814
    add_state |= CEC_CONT_BIG_REPEAT;
3815
3816
  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3817
      ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3818
       var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3819
    if (qn->comb_exp_check_num == 0) {
3820
      env->num_comb_exp_check++;
3821
      qn->comb_exp_check_num = env->num_comb_exp_check;
3822
      if (env->curr_max_regnum > env->comb_exp_max_regnum)
3823
        env->comb_exp_max_regnum = env->curr_max_regnum;
3824
    }
3825
  }
3826
      }
3827
3828
      r = setup_comb_exp_check(target, child_state, env);
3829
      r |= add_state;
3830
    }
3831
    break;
3832
3833
  case NT_ENCLOSE:
3834
    {
3835
      EncloseNode* en = NENCLOSE(node);
3836
3837
      switch (en->type) {
3838
      case ENCLOSE_MEMORY:
3839
  {
3840
    if (env->curr_max_regnum < en->regnum)
3841
      env->curr_max_regnum = en->regnum;
3842
3843
    r = setup_comb_exp_check(en->target, state, env);
3844
  }
3845
  break;
3846
3847
      default:
3848
  r = setup_comb_exp_check(en->target, state, env);
3849
  break;
3850
      }
3851
    }
3852
    break;
3853
3854
# ifdef USE_SUBEXP_CALL
3855
  case NT_CALL:
3856
    if (IS_CALL_RECURSION(NCALL(node)))
3857
      env->has_recursion = 1;
3858
    else
3859
      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3860
    break;
3861
# endif
3862
3863
  default:
3864
    break;
3865
  }
3866
3867
  return r;
3868
}
3869
#endif
3870
3871
/* setup_tree does the following work.
3872
 1. check empty loop. (set qn->target_empty_info)
3873
 2. expand ignore-case in char class.
3874
 3. set memory status bit flags. (reg->mem_stats)
3875
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3876
 5. find invalid patterns in look-behind.
3877
 6. expand repeated string.
3878
 */
3879
static int
3880
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3881
68.1k
{
3882
68.1k
  int type;
3883
68.1k
  int r = 0;
3884
3885
68.1k
restart:
3886
68.1k
  type = NTYPE(node);
3887
68.1k
  switch (type) {
3888
9.50k
  case NT_LIST:
3889
9.50k
    {
3890
9.50k
      Node* prev = NULL_NODE;
3891
39.2k
      do {
3892
39.2k
  r = setup_tree(NCAR(node), reg, state, env);
3893
39.2k
  if (IS_NOT_NULL(prev) && r == 0) {
3894
29.7k
    r = next_setup(prev, NCAR(node), reg);
3895
29.7k
  }
3896
39.2k
  prev = NCAR(node);
3897
39.2k
      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3898
9.50k
    }
3899
9.50k
    break;
3900
3901
2.37k
  case NT_ALT:
3902
5.54k
    do {
3903
5.54k
      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3904
5.54k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3905
2.37k
    break;
3906
3907
9.50k
  case NT_CCLASS:
3908
9.50k
    break;
3909
3910
17.0k
  case NT_STR:
3911
17.0k
    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3912
0
      r = expand_case_fold_string(node, reg, state);
3913
0
    }
3914
17.0k
    break;
3915
3916
0
  case NT_CTYPE:
3917
3.96k
  case NT_CANY:
3918
3.96k
    break;
3919
3920
0
#ifdef USE_SUBEXP_CALL
3921
0
  case NT_CALL:
3922
0
    break;
3923
0
#endif
3924
3925
0
  case NT_BREF:
3926
0
    {
3927
0
      int i;
3928
0
      int* p;
3929
0
      Node** nodes = SCANENV_MEM_NODES(env);
3930
0
      BRefNode* br = NBREF(node);
3931
0
      p = BACKREFS_P(br);
3932
0
      for (i = 0; i < br->back_num; i++) {
3933
0
  if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3934
0
  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3935
0
  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3936
0
#ifdef USE_BACKREF_WITH_LEVEL
3937
0
  if (IS_BACKREF_NEST_LEVEL(br)) {
3938
0
    BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3939
0
  }
3940
0
#endif
3941
0
  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3942
0
      }
3943
0
    }
3944
0
    break;
3945
3946
11.8k
  case NT_QTFR:
3947
11.8k
    {
3948
11.8k
      OnigDistance d;
3949
11.8k
      QtfrNode* qn = NQTFR(node);
3950
11.8k
      Node* target = qn->target;
3951
3952
11.8k
      if ((state & IN_REPEAT) != 0) {
3953
792
  qn->state |= NST_IN_REPEAT;
3954
792
      }
3955
3956
11.8k
      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3957
11.8k
  r = get_min_match_length(target, &d, env);
3958
11.8k
  if (r) break;
3959
11.8k
  if (d == 0) {
3960
0
    qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3961
0
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3962
0
    r = quantifiers_memory_node_info(target);
3963
0
    if (r < 0) break;
3964
0
    if (r > 0) {
3965
0
      qn->target_empty_info = r;
3966
0
    }
3967
0
#endif
3968
#if 0
3969
    r = get_max_match_length(target, &d, env);
3970
    if (r == 0 && d == 0) {
3971
      /*  ()* ==> ()?, ()+ ==> ()  */
3972
      qn->upper = 1;
3973
      if (qn->lower > 1) qn->lower = 1;
3974
      if (NTYPE(target) == NT_STR) {
3975
        qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3976
      }
3977
    }
3978
#endif
3979
0
  }
3980
11.8k
      }
3981
3982
11.8k
      state |= IN_REPEAT;
3983
11.8k
      if (qn->lower != qn->upper)
3984
11.8k
  state |= IN_VAR_REPEAT;
3985
11.8k
      r = setup_tree(target, reg, state, env);
3986
11.8k
      if (r) break;
3987
3988
      /* expand string */
3989
11.8k
#define EXPAND_STRING_MAX_LENGTH  100
3990
11.8k
      if (NTYPE(target) == NT_STR) {
3991
396
  if (qn->lower > 1) {
3992
0
    int i, n = qn->lower;
3993
0
    OnigDistance len = NSTRING_LEN(target);
3994
0
    StrNode* sn = NSTR(target);
3995
0
    Node* np;
3996
3997
0
    np = onig_node_new_str(sn->s, sn->end);
3998
0
    if (IS_NULL(np)) return ONIGERR_MEMORY;
3999
0
    NSTR(np)->flag = sn->flag;
4000
4001
0
    for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4002
0
      r = onig_node_str_cat(np, sn->s, sn->end);
4003
0
      if (r) {
4004
0
        onig_node_free(np);
4005
0
        return r;
4006
0
      }
4007
0
    }
4008
0
    if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4009
0
      Node *np1, *np2;
4010
4011
0
      qn->lower -= i;
4012
0
      if (! IS_REPEAT_INFINITE(qn->upper))
4013
0
        qn->upper -= i;
4014
4015
0
      np1 = onig_node_new_list(np, NULL);
4016
0
      if (IS_NULL(np1)) {
4017
0
        onig_node_free(np);
4018
0
        return ONIGERR_MEMORY;
4019
0
      }
4020
0
      swap_node(np1, node);
4021
0
      np2 = onig_node_list_add(node, np1);
4022
0
      if (IS_NULL(np2)) {
4023
0
        onig_node_free(np1);
4024
0
        return ONIGERR_MEMORY;
4025
0
      }
4026
0
    }
4027
0
    else {
4028
0
      swap_node(np, node);
4029
0
      onig_node_free(np);
4030
0
    }
4031
0
    break; /* break case NT_QTFR: */
4032
0
  }
4033
396
      }
4034
4035
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4036
      if (qn->greedy && (qn->target_empty_info != 0)) {
4037
  if (NTYPE(target) == NT_QTFR) {
4038
    QtfrNode* tqn = NQTFR(target);
4039
    if (IS_NOT_NULL(tqn->head_exact)) {
4040
      qn->head_exact  = tqn->head_exact;
4041
      tqn->head_exact = NULL;
4042
    }
4043
  }
4044
  else {
4045
    qn->head_exact = get_head_value_node(qn->target, 1, reg);
4046
  }
4047
      }
4048
#endif
4049
11.8k
    }
4050
11.8k
    break;
4051
4052
11.8k
  case NT_ENCLOSE:
4053
2.37k
    {
4054
2.37k
      EncloseNode* en = NENCLOSE(node);
4055
4056
2.37k
      switch (en->type) {
4057
396
      case ENCLOSE_OPTION:
4058
396
  {
4059
396
    OnigOptionType options = reg->options;
4060
396
    reg->options = NENCLOSE(node)->option;
4061
396
    r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4062
396
    reg->options = options;
4063
396
  }
4064
396
  break;
4065
4066
1.98k
      case ENCLOSE_MEMORY:
4067
1.98k
  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4068
0
    BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4069
    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4070
0
  }
4071
1.98k
  if (IS_ENCLOSE_CALLED(en))
4072
0
    state |= IN_CALL;
4073
1.98k
  if (IS_ENCLOSE_RECURSION(en))
4074
0
    state |= IN_RECCALL;
4075
1.98k
  else if ((state & IN_RECCALL) != 0)
4076
0
    SET_CALL_RECURSION(node);
4077
1.98k
  r = setup_tree(en->target, reg, state, env);
4078
1.98k
  break;
4079
4080
0
      case ENCLOSE_STOP_BACKTRACK:
4081
0
  {
4082
0
    Node* target = en->target;
4083
0
    r = setup_tree(target, reg, state, env);
4084
0
    if (NTYPE(target) == NT_QTFR) {
4085
0
      QtfrNode* tqn = NQTFR(target);
4086
0
      if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4087
0
    tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4088
0
        int qtype = NTYPE(tqn->target);
4089
0
        if (IS_NODE_TYPE_SIMPLE(qtype))
4090
0
    SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4091
0
      }
4092
0
    }
4093
0
  }
4094
0
  break;
4095
4096
0
      case ENCLOSE_CONDITION:
4097
0
#ifdef USE_NAMED_GROUP
4098
0
  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4099
0
      env->num_named > 0 &&
4100
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4101
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4102
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4103
0
  }
4104
0
#endif
4105
0
  if (NENCLOSE(node)->regnum > env->num_mem)
4106
0
    return ONIGERR_INVALID_BACKREF;
4107
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4108
0
  break;
4109
4110
0
      case ENCLOSE_ABSENT:
4111
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4112
0
  break;
4113
2.37k
      }
4114
2.37k
    }
4115
2.37k
    break;
4116
4117
11.4k
  case NT_ANCHOR:
4118
11.4k
    {
4119
11.4k
      AnchorNode* an = NANCHOR(node);
4120
4121
11.4k
      switch (an->type) {
4122
0
      case ANCHOR_PREC_READ:
4123
0
  r = setup_tree(an->target, reg, state, env);
4124
0
  break;
4125
0
      case ANCHOR_PREC_READ_NOT:
4126
0
  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4127
0
  break;
4128
4129
/* allowed node types in look-behind */
4130
0
#define ALLOWED_TYPE_IN_LB  \
4131
0
  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4132
0
    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4133
4134
0
#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4135
0
#define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
4136
4137
0
#define ALLOWED_ANCHOR_IN_LB \
4138
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4139
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4140
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4141
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4142
0
#define ALLOWED_ANCHOR_IN_LB_NOT \
4143
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4144
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4145
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4146
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4147
4148
0
      case ANCHOR_LOOK_BEHIND:
4149
0
  {
4150
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4151
0
            ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4152
0
    if (r < 0) return r;
4153
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4154
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4155
0
    r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4156
0
    if (r != 0) return r;
4157
0
    r = setup_look_behind(node, reg, env);
4158
0
  }
4159
0
  break;
4160
4161
0
      case ANCHOR_LOOK_BEHIND_NOT:
4162
0
  {
4163
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4164
0
          ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4165
0
    if (r < 0) return r;
4166
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4167
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4168
0
    r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4169
0
       env);
4170
0
    if (r != 0) return r;
4171
0
    r = setup_look_behind(node, reg, env);
4172
0
  }
4173
0
  break;
4174
11.4k
      }
4175
11.4k
    }
4176
11.4k
    break;
4177
4178
11.4k
  default:
4179
0
    break;
4180
68.1k
  }
4181
4182
68.1k
  return r;
4183
68.1k
}
4184
4185
/* set skip map for Sunday's quick search */
4186
static int
4187
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4188
      UChar skip[], int ignore_case)
4189
3.16k
{
4190
3.16k
  OnigDistance i, len;
4191
3.16k
  int clen, flen, n, j, k;
4192
3.16k
  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4193
3.16k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4194
3.16k
  OnigEncoding enc = reg->enc;
4195
4196
3.16k
  len = end - s;
4197
3.16k
  if (len >= ONIG_CHAR_TABLE_SIZE) {
4198
    /* This should not happen. */
4199
0
    return ONIGERR_TYPE_BUG;
4200
0
  }
4201
4202
3.16k
  if (ignore_case) {
4203
0
    for (i = 0; i < len; i += clen) {
4204
0
      p = s + i;
4205
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4206
0
    p, end, items);
4207
0
      clen = enclen(enc, p, end);
4208
0
      if (p + clen > end)
4209
0
  clen = (int )(end - p);
4210
4211
0
      for (j = 0; j < n; j++) {
4212
0
  if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4213
    /* Different length isn't supported. Stop optimization at here. */
4214
0
    end = p;
4215
0
    goto endcheck;
4216
0
  }
4217
0
  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4218
0
  if (flen != clen) {
4219
    /* Different length isn't supported. Stop optimization at here. */
4220
0
    end = p;
4221
0
    goto endcheck;
4222
0
  }
4223
0
      }
4224
0
    }
4225
0
endcheck:
4226
0
    len = end - s;
4227
0
  }
4228
4229
814k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4230
811k
    skip[i] = (UChar )(len + 1);
4231
3.16k
  n = 0;
4232
49.1k
  for (i = 0; i < len; i += clen) {
4233
45.9k
    p = s + i;
4234
45.9k
    if (ignore_case)
4235
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4236
45.9k
               p, end, items);
4237
45.9k
    clen = enclen(enc, p, end);
4238
45.9k
    if (p + clen > end)
4239
0
      clen = (int )(end - p);
4240
4241
91.8k
    for (j = 0; j < clen; j++) {
4242
45.9k
      skip[s[i + j]] = (UChar )(len - i - j);
4243
45.9k
      for (k = 0; k < n; k++) {
4244
0
  ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4245
0
  skip[buf[j]] = (UChar )(len - i - j);
4246
0
      }
4247
45.9k
    }
4248
45.9k
  }
4249
4250
3.16k
  return (int )len;
4251
3.16k
}
4252
4253
typedef struct {
4254
  OnigDistance min;  /* min byte length */
4255
  OnigDistance max;  /* max byte length */
4256
} MinMaxLen;
4257
4258
typedef struct {
4259
  MinMaxLen        mmd;
4260
  OnigEncoding     enc;
4261
  OnigOptionType   options;
4262
  OnigCaseFoldType case_fold_flag;
4263
  ScanEnv*         scan_env;
4264
} OptEnv;
4265
4266
typedef struct {
4267
  int left_anchor;
4268
  int right_anchor;
4269
} OptAncInfo;
4270
4271
typedef struct {
4272
  MinMaxLen  mmd; /* info position */
4273
  OptAncInfo anc;
4274
4275
  int   reach_end;
4276
  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
4277
  int   len;
4278
  UChar s[OPT_EXACT_MAXLEN];
4279
} OptExactInfo;
4280
4281
typedef struct {
4282
  MinMaxLen mmd; /* info position */
4283
  OptAncInfo anc;
4284
4285
  int   value;      /* weighted value */
4286
  UChar map[ONIG_CHAR_TABLE_SIZE];
4287
} OptMapInfo;
4288
4289
typedef struct {
4290
  MinMaxLen    len;
4291
4292
  OptAncInfo   anc;
4293
  OptExactInfo exb;    /* boundary */
4294
  OptExactInfo exm;    /* middle */
4295
  OptExactInfo expr;   /* prec read (?=...) */
4296
4297
  OptMapInfo   map;   /* boundary */
4298
} NodeOptInfo;
4299
4300
4301
static int
4302
map_position_value(OnigEncoding enc, int i)
4303
58.6k
{
4304
58.6k
  static const short int ByteValTable[] = {
4305
58.6k
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4306
58.6k
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4307
58.6k
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4308
58.6k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4309
58.6k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4310
58.6k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4311
58.6k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4312
58.6k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4313
58.6k
  };
4314
4315
58.6k
  if (i < numberof(ByteValTable)) {
4316
58.6k
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4317
0
      return 20;
4318
58.6k
    else
4319
58.6k
      return (int )ByteValTable[i];
4320
58.6k
  }
4321
0
  else
4322
0
    return 4;   /* Take it easy. */
4323
58.6k
}
4324
4325
static int
4326
distance_value(MinMaxLen* mm)
4327
39.6k
{
4328
  /* 1000 / (min-max-dist + 1) */
4329
39.6k
  static const short int dist_vals[] = {
4330
39.6k
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4331
39.6k
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4332
39.6k
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4333
39.6k
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4334
39.6k
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4335
39.6k
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4336
39.6k
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4337
39.6k
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4338
39.6k
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4339
39.6k
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4340
39.6k
  };
4341
4342
39.6k
  OnigDistance d;
4343
4344
39.6k
  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4345
4346
14.6k
  d = mm->max - mm->min;
4347
14.6k
  if (d < numberof(dist_vals))
4348
    /* return dist_vals[d] * 16 / (mm->min + 12); */
4349
14.6k
    return (int )dist_vals[d];
4350
0
  else
4351
0
    return 1;
4352
14.6k
}
4353
4354
static int
4355
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4356
19.8k
{
4357
19.8k
  if (v2 <= 0) return -1;
4358
19.8k
  if (v1 <= 0) return  1;
4359
4360
19.8k
  v1 *= distance_value(d1);
4361
19.8k
  v2 *= distance_value(d2);
4362
4363
19.8k
  if (v2 > v1) return  1;
4364
18.2k
  if (v2 < v1) return -1;
4365
4366
9.50k
  if (d2->min < d1->min) return  1;
4367
9.10k
  if (d2->min > d1->min) return -1;
4368
1.98k
  return 0;
4369
9.10k
}
4370
4371
static int
4372
is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4373
2.77k
{
4374
2.77k
  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4375
2.77k
}
4376
4377
4378
static void
4379
set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4380
42.3k
{
4381
42.3k
  mml->min = min;
4382
42.3k
  mml->max = max;
4383
42.3k
}
4384
4385
static void
4386
clear_mml(MinMaxLen* mml)
4387
309k
{
4388
309k
  mml->min = mml->max = 0;
4389
309k
}
4390
4391
static void
4392
copy_mml(MinMaxLen* to, MinMaxLen* from)
4393
219k
{
4394
219k
  to->min = from->min;
4395
219k
  to->max = from->max;
4396
219k
}
4397
4398
static void
4399
add_mml(MinMaxLen* to, MinMaxLen* from)
4400
78.4k
{
4401
78.4k
  to->min = distance_add(to->min, from->min);
4402
78.4k
  to->max = distance_add(to->max, from->max);
4403
78.4k
}
4404
4405
#if 0
4406
static void
4407
add_len_mml(MinMaxLen* to, OnigDistance len)
4408
{
4409
  to->min = distance_add(to->min, len);
4410
  to->max = distance_add(to->max, len);
4411
}
4412
#endif
4413
4414
static void
4415
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4416
6.33k
{
4417
6.33k
  if (to->min > from->min) to->min = from->min;
4418
6.33k
  if (to->max < from->max) to->max = from->max;
4419
6.33k
}
4420
4421
static void
4422
copy_opt_env(OptEnv* to, OptEnv* from)
4423
9.50k
{
4424
9.50k
  *to = *from;
4425
9.50k
}
4426
4427
static void
4428
clear_opt_anc_info(OptAncInfo* anc)
4429
342k
{
4430
342k
  anc->left_anchor  = 0;
4431
342k
  anc->right_anchor = 0;
4432
342k
}
4433
4434
static void
4435
copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4436
41.9k
{
4437
41.9k
  *to = *from;
4438
41.9k
}
4439
4440
static void
4441
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4442
        OnigDistance left_len, OnigDistance right_len)
4443
41.9k
{
4444
41.9k
  clear_opt_anc_info(to);
4445
4446
41.9k
  to->left_anchor = left->left_anchor;
4447
41.9k
  if (left_len == 0) {
4448
20.1k
    to->left_anchor |= right->left_anchor;
4449
20.1k
  }
4450
4451
41.9k
  to->right_anchor = right->right_anchor;
4452
41.9k
  if (right_len == 0) {
4453
11.8k
    to->right_anchor |= left->right_anchor;
4454
11.8k
  }
4455
30.0k
  else {
4456
30.0k
    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4457
30.0k
  }
4458
41.9k
}
4459
4460
static int
4461
is_left_anchor(int anc)
4462
11.0k
{
4463
11.0k
  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4464
11.0k
      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4465
11.0k
      anc == ANCHOR_PREC_READ_NOT)
4466
3.16k
    return 0;
4467
4468
7.92k
  return 1;
4469
11.0k
}
4470
4471
static int
4472
is_set_opt_anc_info(OptAncInfo* to, int anc)
4473
1.98k
{
4474
1.98k
  if ((to->left_anchor & anc) != 0) return 1;
4475
4476
1.98k
  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4477
1.98k
}
4478
4479
static void
4480
add_opt_anc_info(OptAncInfo* to, int anc)
4481
11.0k
{
4482
11.0k
  if (is_left_anchor(anc))
4483
7.92k
    to->left_anchor |= anc;
4484
3.16k
  else
4485
3.16k
    to->right_anchor |= anc;
4486
11.0k
}
4487
4488
static void
4489
remove_opt_anc_info(OptAncInfo* to, int anc)
4490
0
{
4491
0
  if (is_left_anchor(anc))
4492
0
    to->left_anchor &= ~anc;
4493
0
  else
4494
0
    to->right_anchor &= ~anc;
4495
0
}
4496
4497
static void
4498
alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4499
8.71k
{
4500
8.71k
  to->left_anchor  &= add->left_anchor;
4501
8.71k
  to->right_anchor &= add->right_anchor;
4502
8.71k
}
4503
4504
static int
4505
is_full_opt_exact_info(OptExactInfo* ex)
4506
0
{
4507
0
  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4508
0
}
4509
4510
static void
4511
clear_opt_exact_info(OptExactInfo* ex)
4512
227k
{
4513
227k
  clear_mml(&ex->mmd);
4514
227k
  clear_opt_anc_info(&ex->anc);
4515
227k
  ex->reach_end   = 0;
4516
227k
  ex->ignore_case = -1;   /* unset */
4517
227k
  ex->len         = 0;
4518
227k
  ex->s[0]        = '\0';
4519
227k
}
4520
4521
static void
4522
copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4523
13.4k
{
4524
13.4k
  *to = *from;
4525
13.4k
}
4526
4527
static void
4528
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4529
396
{
4530
396
  int i, j, len;
4531
396
  UChar *p, *end;
4532
396
  OptAncInfo tanc;
4533
4534
396
  if (to->ignore_case < 0)
4535
0
    to->ignore_case = add->ignore_case;
4536
396
  else if (to->ignore_case != add->ignore_case)
4537
0
    return ;  /* avoid */
4538
4539
396
  p = add->s;
4540
396
  end = p + add->len;
4541
1.58k
  for (i = to->len; p < end; ) {
4542
1.18k
    len = enclen(enc, p, end);
4543
1.18k
    if (i + len > OPT_EXACT_MAXLEN) break;
4544
2.37k
    for (j = 0; j < len && p < end; j++)
4545
1.18k
      to->s[i++] = *p++;
4546
1.18k
  }
4547
4548
396
  to->len = i;
4549
396
  to->reach_end = (p == end ? add->reach_end : 0);
4550
4551
396
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4552
396
  if (! to->reach_end) tanc.right_anchor = 0;
4553
396
  copy_opt_anc_info(&to->anc, &tanc);
4554
396
}
4555
4556
static void
4557
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4558
        int raw ARG_UNUSED, OnigEncoding enc)
4559
17.0k
{
4560
17.0k
  int i, j, len;
4561
17.0k
  UChar *p;
4562
4563
140k
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4564
123k
    len = enclen(enc, p, end);
4565
123k
    if (i + len > OPT_EXACT_MAXLEN) break;
4566
247k
    for (j = 0; j < len && p < end; j++)
4567
123k
      to->s[i++] = *p++;
4568
123k
  }
4569
4570
17.0k
  to->len = i;
4571
17.0k
}
4572
4573
static void
4574
alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4575
9.50k
{
4576
9.50k
  int i, j, len;
4577
4578
9.50k
  if (add->len == 0 || to->len == 0) {
4579
6.73k
    clear_opt_exact_info(to);
4580
6.73k
    return ;
4581
6.73k
  }
4582
4583
2.77k
  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4584
396
    clear_opt_exact_info(to);
4585
396
    return ;
4586
396
  }
4587
4588
3.96k
  for (i = 0; i < to->len && i < add->len; ) {
4589
3.96k
    if (to->s[i] != add->s[i]) break;
4590
1.58k
    len = enclen(env->enc, to->s + i, to->s + to->len);
4591
4592
1.58k
    for (j = 1; j < len; j++) {
4593
0
      if (to->s[i+j] != add->s[i+j]) break;
4594
0
    }
4595
1.58k
    if (j < len) break;
4596
1.58k
    i += len;
4597
1.58k
  }
4598
4599
2.37k
  if (! add->reach_end || i < add->len || i < to->len) {
4600
2.37k
    to->reach_end = 0;
4601
2.37k
  }
4602
2.37k
  to->len = i;
4603
2.37k
  if (to->ignore_case < 0)
4604
0
    to->ignore_case = add->ignore_case;
4605
2.37k
  else if (add->ignore_case >= 0)
4606
2.37k
    to->ignore_case |= add->ignore_case;
4607
4608
2.37k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4609
2.37k
  if (! to->reach_end) to->anc.right_anchor = 0;
4610
2.37k
}
4611
4612
static void
4613
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4614
84.7k
{
4615
84.7k
  int v1, v2;
4616
4617
84.7k
  v1 = now->len;
4618
84.7k
  v2 = alt->len;
4619
4620
84.7k
  if (v2 == 0) {
4621
67.7k
    return ;
4622
67.7k
  }
4623
17.0k
  else if (v1 == 0) {
4624
13.4k
    copy_opt_exact_info(now, alt);
4625
13.4k
    return ;
4626
13.4k
  }
4627
3.56k
  else if (v1 <= 2 && v2 <= 2) {
4628
    /* ByteValTable[x] is big value --> low price */
4629
396
    v2 = map_position_value(enc, now->s[0]);
4630
396
    v1 = map_position_value(enc, alt->s[0]);
4631
4632
396
    if (now->len > 1) v1 += 5;
4633
396
    if (alt->len > 1) v2 += 5;
4634
396
  }
4635
4636
3.56k
  if (now->ignore_case <= 0) v1 *= 2;
4637
3.56k
  if (alt->ignore_case <= 0) v2 *= 2;
4638
4639
3.56k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4640
0
    copy_opt_exact_info(now, alt);
4641
3.56k
}
4642
4643
static void
4644
clear_opt_map_info(OptMapInfo* map)
4645
73.2k
{
4646
73.2k
  static const OptMapInfo clean_info = {
4647
73.2k
    {0, 0}, {0, 0}, 0,
4648
73.2k
    {
4649
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4650
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4651
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4652
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4653
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4654
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4655
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4656
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4657
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4658
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4659
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4660
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4661
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4662
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4663
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4664
73.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4665
73.2k
    }
4666
73.2k
  };
4667
4668
73.2k
  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4669
73.2k
}
4670
4671
static void
4672
copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4673
8.31k
{
4674
8.31k
  *to = *from;
4675
8.31k
}
4676
4677
static void
4678
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4679
51.8k
{
4680
51.8k
  if (map->map[c] == 0) {
4681
51.8k
    map->map[c] = 1;
4682
51.8k
    map->value += map_position_value(enc, c);
4683
51.8k
  }
4684
51.8k
}
4685
4686
static int
4687
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4688
                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4689
0
{
4690
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4691
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4692
0
  int i, n;
4693
4694
0
  add_char_opt_map_info(map, p[0], enc);
4695
4696
0
  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4697
0
  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4698
0
  if (n < 0) return n;
4699
4700
0
  for (i = 0; i < n; i++) {
4701
0
    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4702
0
    add_char_opt_map_info(map, buf[0], enc);
4703
0
  }
4704
4705
0
  return 0;
4706
0
}
4707
4708
static void
4709
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4710
39.2k
{
4711
39.2k
  const int z = 1<<15; /* 32768: something big value */
4712
4713
39.2k
  int v1, v2;
4714
4715
39.2k
  if (alt->value == 0) return ;
4716
18.2k
  if (now->value == 0) {
4717
8.31k
    copy_opt_map_info(now, alt);
4718
8.31k
    return ;
4719
8.31k
  }
4720
4721
9.90k
  v1 = z / now->value;
4722
9.90k
  v2 = z / alt->value;
4723
9.90k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4724
0
    copy_opt_map_info(now, alt);
4725
9.90k
}
4726
4727
static int
4728
comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4729
6.33k
{
4730
12.6k
#define COMP_EM_BASE  20
4731
6.33k
  int ve, vm;
4732
4733
6.33k
  if (m->value <= 0) return -1;
4734
4735
6.33k
  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4736
6.33k
  vm = COMP_EM_BASE * 5 * 2 / m->value;
4737
6.33k
  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4738
6.33k
}
4739
4740
static void
4741
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4742
3.16k
{
4743
3.16k
  int i, val;
4744
4745
  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4746
3.16k
  if (to->value == 0) return ;
4747
3.16k
  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4748
0
    clear_opt_map_info(to);
4749
0
    return ;
4750
0
  }
4751
4752
3.16k
  alt_merge_mml(&to->mmd, &add->mmd);
4753
4754
3.16k
  val = 0;
4755
814k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4756
811k
    if (add->map[i])
4757
3.16k
      to->map[i] = 1;
4758
4759
811k
    if (to->map[i])
4760
5.94k
      val += map_position_value(enc, i);
4761
811k
  }
4762
3.16k
  to->value = val;
4763
4764
3.16k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4765
3.16k
}
4766
4767
static void
4768
set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4769
73.2k
{
4770
73.2k
  copy_mml(&(opt->exb.mmd),  mmd);
4771
73.2k
  copy_mml(&(opt->expr.mmd), mmd);
4772
73.2k
  copy_mml(&(opt->map.mmd),  mmd);
4773
73.2k
}
4774
4775
static void
4776
clear_node_opt_info(NodeOptInfo* opt)
4777
73.2k
{
4778
73.2k
  clear_mml(&opt->len);
4779
73.2k
  clear_opt_anc_info(&opt->anc);
4780
73.2k
  clear_opt_exact_info(&opt->exb);
4781
73.2k
  clear_opt_exact_info(&opt->exm);
4782
73.2k
  clear_opt_exact_info(&opt->expr);
4783
73.2k
  clear_opt_map_info(&opt->map);
4784
73.2k
}
4785
4786
static void
4787
copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4788
9.50k
{
4789
9.50k
  *to = *from;
4790
9.50k
}
4791
4792
static void
4793
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4794
39.2k
{
4795
39.2k
  int exb_reach, exm_reach;
4796
39.2k
  OptAncInfo tanc;
4797
4798
39.2k
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4799
39.2k
  copy_opt_anc_info(&to->anc, &tanc);
4800
4801
39.2k
  if (add->exb.len > 0 && to->len.max == 0) {
4802
2.37k
    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4803
2.37k
      to->len.max, add->len.max);
4804
2.37k
    copy_opt_anc_info(&add->exb.anc, &tanc);
4805
2.37k
  }
4806
4807
39.2k
  if (add->map.value > 0 && to->len.max == 0) {
4808
4.35k
    if (add->map.mmd.max == 0)
4809
4.35k
      add->map.anc.left_anchor |= to->anc.left_anchor;
4810
4.35k
  }
4811
4812
39.2k
  exb_reach = to->exb.reach_end;
4813
39.2k
  exm_reach = to->exm.reach_end;
4814
4815
39.2k
  if (add->len.max != 0)
4816
27.3k
    to->exb.reach_end = to->exm.reach_end = 0;
4817
4818
39.2k
  if (add->exb.len > 0) {
4819
11.4k
    if (exb_reach) {
4820
0
      concat_opt_exact_info(&to->exb, &add->exb, enc);
4821
0
      clear_opt_exact_info(&add->exb);
4822
0
    }
4823
11.4k
    else if (exm_reach) {
4824
396
      concat_opt_exact_info(&to->exm, &add->exb, enc);
4825
396
      clear_opt_exact_info(&add->exb);
4826
396
    }
4827
11.4k
  }
4828
39.2k
  select_opt_exact_info(enc, &to->exm, &add->exb);
4829
39.2k
  select_opt_exact_info(enc, &to->exm, &add->exm);
4830
4831
39.2k
  if (to->expr.len > 0) {
4832
0
    if (add->len.max > 0) {
4833
0
      if (to->expr.len > (int )add->len.max)
4834
0
  to->expr.len = (int )add->len.max;
4835
4836
0
      if (to->expr.mmd.max == 0)
4837
0
  select_opt_exact_info(enc, &to->exb, &to->expr);
4838
0
      else
4839
0
  select_opt_exact_info(enc, &to->exm, &to->expr);
4840
0
    }
4841
0
  }
4842
39.2k
  else if (add->expr.len > 0) {
4843
0
    copy_opt_exact_info(&to->expr, &add->expr);
4844
0
  }
4845
4846
39.2k
  select_opt_map_info(&to->map, &add->map);
4847
4848
39.2k
  add_mml(&to->len, &add->len);
4849
39.2k
}
4850
4851
static void
4852
alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4853
3.16k
{
4854
3.16k
  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4855
3.16k
  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4856
3.16k
  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4857
3.16k
  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4858
3.16k
  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4859
4860
3.16k
  alt_merge_mml(&to->len, &add->len);
4861
3.16k
}
4862
4863
4864
1.98k
#define MAX_NODE_OPT_INFO_REF_COUNT    5
4865
4866
static int
4867
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4868
73.2k
{
4869
73.2k
  int type;
4870
73.2k
  int r = 0;
4871
4872
73.2k
  clear_node_opt_info(opt);
4873
73.2k
  set_bound_node_opt_info(opt, &env->mmd);
4874
4875
73.2k
  type = NTYPE(node);
4876
73.2k
  switch (type) {
4877
9.50k
  case NT_LIST:
4878
9.50k
    {
4879
9.50k
      OptEnv nenv;
4880
9.50k
      NodeOptInfo nopt;
4881
9.50k
      Node* nd = node;
4882
4883
9.50k
      copy_opt_env(&nenv, env);
4884
39.2k
      do {
4885
39.2k
  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4886
39.2k
  if (r == 0) {
4887
39.2k
    add_mml(&nenv.mmd, &nopt.len);
4888
39.2k
    concat_left_node_opt_info(env->enc, opt, &nopt);
4889
39.2k
  }
4890
39.2k
      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4891
9.50k
    }
4892
9.50k
    break;
4893
4894
2.37k
  case NT_ALT:
4895
2.37k
    {
4896
2.37k
      NodeOptInfo nopt;
4897
2.37k
      Node* nd = node;
4898
4899
5.54k
      do {
4900
5.54k
  r = optimize_node_left(NCAR(nd), &nopt, env);
4901
5.54k
  if (r == 0) {
4902
5.54k
    if (nd == node) copy_node_opt_info(opt, &nopt);
4903
3.16k
    else            alt_merge_node_opt_info(opt, &nopt, env);
4904
5.54k
  }
4905
5.54k
      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4906
2.37k
    }
4907
2.37k
    break;
4908
4909
17.0k
  case NT_STR:
4910
17.0k
    {
4911
17.0k
      StrNode* sn = NSTR(node);
4912
17.0k
      OnigDistance slen = sn->end - sn->s;
4913
17.0k
      int is_raw = NSTRING_IS_RAW(node);
4914
4915
17.0k
      if (! NSTRING_IS_AMBIG(node)) {
4916
17.0k
  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4917
17.0k
          is_raw, env->enc);
4918
17.0k
  opt->exb.ignore_case = 0;
4919
17.0k
  if (slen > 0) {
4920
16.6k
    add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4921
16.6k
  }
4922
17.0k
  set_mml(&opt->len, slen, slen);
4923
17.0k
      }
4924
0
      else {
4925
0
  OnigDistance max;
4926
4927
0
  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4928
0
    int n = onigenc_strlen(env->enc, sn->s, sn->end);
4929
0
    max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4930
0
  }
4931
0
  else {
4932
0
    concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4933
0
            is_raw, env->enc);
4934
0
    opt->exb.ignore_case = 1;
4935
4936
0
    if (slen > 0) {
4937
0
      r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4938
0
            env->enc, env->case_fold_flag);
4939
0
      if (r != 0) break;
4940
0
    }
4941
4942
0
    max = slen;
4943
0
  }
4944
4945
0
  set_mml(&opt->len, slen, max);
4946
0
      }
4947
4948
17.0k
      if ((OnigDistance )opt->exb.len == slen)
4949
15.4k
  opt->exb.reach_end = 1;
4950
17.0k
    }
4951
0
    break;
4952
4953
9.50k
  case NT_CCLASS:
4954
9.50k
    {
4955
9.50k
      int i, z;
4956
9.50k
      CClassNode* cc = NCCLASS(node);
4957
4958
      /* no need to check ignore case. (set in setup_tree()) */
4959
4960
9.50k
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4961
2.37k
  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4962
2.37k
  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4963
4964
2.37k
  set_mml(&opt->len, min, max);
4965
2.37k
      }
4966
7.12k
      else {
4967
1.83M
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4968
1.82M
    z = BITSET_AT(cc->bs, i);
4969
1.82M
    if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4970
35.2k
      add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4971
35.2k
    }
4972
1.82M
  }
4973
7.12k
  set_mml(&opt->len, 1, 1);
4974
7.12k
      }
4975
9.50k
    }
4976
9.50k
    break;
4977
4978
0
  case NT_CTYPE:
4979
0
    {
4980
0
      int i, min, max;
4981
0
      int maxcode;
4982
4983
0
      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4984
4985
0
      if (max == 1) {
4986
0
  min = 1;
4987
4988
0
  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4989
0
  switch (NCTYPE(node)->ctype) {
4990
0
  case ONIGENC_CTYPE_WORD:
4991
0
    if (NCTYPE(node)->not != 0) {
4992
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4993
0
        if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
4994
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4995
0
        }
4996
0
      }
4997
0
    }
4998
0
    else {
4999
0
      for (i = 0; i < maxcode; i++) {
5000
0
        if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5001
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5002
0
        }
5003
0
      }
5004
0
    }
5005
0
    break;
5006
0
  }
5007
0
      }
5008
0
      else {
5009
0
  min = ONIGENC_MBC_MINLEN(env->enc);
5010
0
      }
5011
0
      set_mml(&opt->len, min, max);
5012
0
    }
5013
0
    break;
5014
5015
3.96k
  case NT_CANY:
5016
3.96k
    {
5017
3.96k
      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5018
3.96k
      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5019
3.96k
      set_mml(&opt->len, min, max);
5020
3.96k
    }
5021
3.96k
    break;
5022
5023
11.4k
  case NT_ANCHOR:
5024
11.4k
    switch (NANCHOR(node)->type) {
5025
0
    case ANCHOR_BEGIN_BUF:
5026
0
    case ANCHOR_BEGIN_POSITION:
5027
7.92k
    case ANCHOR_BEGIN_LINE:
5028
7.92k
    case ANCHOR_END_BUF:
5029
7.92k
    case ANCHOR_SEMI_END_BUF:
5030
11.0k
    case ANCHOR_END_LINE:
5031
11.0k
    case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5032
11.0k
    case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5033
11.0k
      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5034
11.0k
      break;
5035
5036
0
    case ANCHOR_PREC_READ:
5037
0
      {
5038
0
  NodeOptInfo nopt;
5039
5040
0
  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5041
0
  if (r == 0) {
5042
0
    if (nopt.exb.len > 0)
5043
0
      copy_opt_exact_info(&opt->expr, &nopt.exb);
5044
0
    else if (nopt.exm.len > 0)
5045
0
      copy_opt_exact_info(&opt->expr, &nopt.exm);
5046
5047
0
    opt->expr.reach_end = 0;
5048
5049
0
    if (nopt.map.value > 0)
5050
0
      copy_opt_map_info(&opt->map, &nopt.map);
5051
0
  }
5052
0
      }
5053
0
      break;
5054
5055
0
    case ANCHOR_LOOK_BEHIND_NOT:
5056
0
      break;
5057
11.4k
    }
5058
11.4k
    break;
5059
5060
11.4k
  case NT_BREF:
5061
0
    {
5062
0
      int i;
5063
0
      int* backs;
5064
0
      OnigDistance min, max, tmin, tmax;
5065
0
      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5066
0
      BRefNode* br = NBREF(node);
5067
5068
0
      if (br->state & NST_RECURSION) {
5069
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5070
0
  break;
5071
0
      }
5072
0
      backs = BACKREFS_P(br);
5073
0
      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5074
0
      if (r != 0) break;
5075
0
      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5076
0
      if (r != 0) break;
5077
0
      for (i = 1; i < br->back_num; i++) {
5078
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5079
0
  if (r != 0) break;
5080
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5081
0
  if (r != 0) break;
5082
0
  if (min > tmin) min = tmin;
5083
0
  if (max < tmax) max = tmax;
5084
0
      }
5085
0
      if (r == 0) set_mml(&opt->len, min, max);
5086
0
    }
5087
0
    break;
5088
5089
0
#ifdef USE_SUBEXP_CALL
5090
0
  case NT_CALL:
5091
0
    if (IS_CALL_RECURSION(NCALL(node)))
5092
0
      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5093
0
    else {
5094
0
      OnigOptionType save = env->options;
5095
0
      env->options = NENCLOSE(NCALL(node)->target)->option;
5096
0
      r = optimize_node_left(NCALL(node)->target, opt, env);
5097
0
      env->options = save;
5098
0
    }
5099
0
    break;
5100
0
#endif
5101
5102
11.8k
  case NT_QTFR:
5103
11.8k
    {
5104
11.8k
      int i;
5105
11.8k
      OnigDistance min, max;
5106
11.8k
      NodeOptInfo nopt;
5107
11.8k
      QtfrNode* qn = NQTFR(node);
5108
5109
11.8k
      r = optimize_node_left(qn->target, &nopt, env);
5110
11.8k
      if (r) break;
5111
5112
11.8k
      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5113
4.35k
  if (env->mmd.max == 0 &&
5114
4.35k
      NTYPE(qn->target) == NT_CANY && qn->greedy) {
5115
0
    if (IS_MULTILINE(env->options))
5116
      /* implicit anchor: /.*a/ ==> /\A.*a/ */
5117
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5118
0
    else
5119
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5120
0
  }
5121
4.35k
      }
5122
7.52k
      else {
5123
7.52k
  if (qn->lower > 0) {
5124
7.12k
    copy_node_opt_info(opt, &nopt);
5125
7.12k
    if (nopt.exb.len > 0) {
5126
0
      if (nopt.exb.reach_end) {
5127
0
        for (i = 2; i <= qn->lower &&
5128
0
        ! is_full_opt_exact_info(&opt->exb); i++) {
5129
0
    concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5130
0
        }
5131
0
        if (i < qn->lower) {
5132
0
    opt->exb.reach_end = 0;
5133
0
        }
5134
0
      }
5135
0
    }
5136
5137
7.12k
    if (qn->lower != qn->upper) {
5138
7.12k
      opt->exb.reach_end = 0;
5139
7.12k
      opt->exm.reach_end = 0;
5140
7.12k
    }
5141
7.12k
    if (qn->lower > 1)
5142
0
      opt->exm.reach_end = 0;
5143
7.12k
  }
5144
7.52k
      }
5145
5146
11.8k
      min = distance_multiply(nopt.len.min, qn->lower);
5147
11.8k
      if (IS_REPEAT_INFINITE(qn->upper))
5148
11.4k
  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5149
396
      else
5150
396
  max = distance_multiply(nopt.len.max, qn->upper);
5151
5152
11.8k
      set_mml(&opt->len, min, max);
5153
11.8k
    }
5154
0
    break;
5155
5156
7.52k
  case NT_ENCLOSE:
5157
7.52k
    {
5158
7.52k
      EncloseNode* en = NENCLOSE(node);
5159
5160
7.52k
      switch (en->type) {
5161
396
      case ENCLOSE_OPTION:
5162
396
  {
5163
396
    OnigOptionType save = env->options;
5164
5165
396
    env->options = en->option;
5166
396
    r = optimize_node_left(en->target, opt, env);
5167
396
    env->options = save;
5168
396
  }
5169
396
  break;
5170
5171
1.98k
      case ENCLOSE_MEMORY:
5172
1.98k
#ifdef USE_SUBEXP_CALL
5173
1.98k
  en->opt_count++;
5174
1.98k
  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5175
0
    OnigDistance min, max;
5176
5177
0
    min = 0;
5178
0
    max = ONIG_INFINITE_DISTANCE;
5179
0
    if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5180
0
    if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5181
0
    set_mml(&opt->len, min, max);
5182
0
  }
5183
1.98k
  else
5184
1.98k
#endif
5185
1.98k
  {
5186
1.98k
    r = optimize_node_left(en->target, opt, env);
5187
5188
1.98k
    if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5189
0
      if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5190
0
        remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5191
0
    }
5192
1.98k
  }
5193
1.98k
  break;
5194
5195
5.14k
      case ENCLOSE_STOP_BACKTRACK:
5196
5.14k
      case ENCLOSE_CONDITION:
5197
5.14k
  r = optimize_node_left(en->target, opt, env);
5198
5.14k
  break;
5199
5200
0
      case ENCLOSE_ABSENT:
5201
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5202
0
  break;
5203
7.52k
      }
5204
7.52k
    }
5205
7.52k
    break;
5206
5207
7.52k
  default:
5208
#ifdef ONIG_DEBUG
5209
    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5210
      NTYPE(node));
5211
#endif
5212
0
    r = ONIGERR_TYPE_BUG;
5213
0
    break;
5214
73.2k
  }
5215
5216
73.2k
  return r;
5217
73.2k
}
5218
5219
static int
5220
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5221
4.35k
{
5222
4.35k
  int allow_reverse;
5223
5224
4.35k
  if (e->len == 0) return 0;
5225
5226
4.35k
  reg->exact = (UChar* )xmalloc(e->len);
5227
4.35k
  CHECK_NULL_RETURN_MEMERR(reg->exact);
5228
4.35k
  xmemcpy(reg->exact, e->s, e->len);
5229
4.35k
  reg->exact_end = reg->exact + e->len;
5230
5231
4.35k
  allow_reverse =
5232
4.35k
  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5233
5234
4.35k
  if (e->ignore_case > 0) {
5235
0
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5236
0
      e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5237
0
          reg->map, 1);
5238
0
      reg->exact_end = reg->exact + e->len;
5239
0
      if (e->len >= 3) {
5240
0
  reg->optimize = (allow_reverse != 0
5241
0
       ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5242
0
      }
5243
0
      else if (e->len > 0) {
5244
0
  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5245
0
      }
5246
0
      else
5247
0
  return 0;
5248
0
    }
5249
0
    else {
5250
0
      reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5251
0
    }
5252
0
  }
5253
4.35k
  else {
5254
4.35k
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5255
3.16k
      set_bm_skip(reg->exact, reg->exact_end, reg,
5256
3.16k
      reg->map, 0);
5257
3.16k
      reg->optimize = (allow_reverse != 0
5258
3.16k
         ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5259
3.16k
    }
5260
1.18k
    else {
5261
1.18k
      reg->optimize = ONIG_OPTIMIZE_EXACT;
5262
1.18k
    }
5263
4.35k
  }
5264
5265
4.35k
  reg->dmin = e->mmd.min;
5266
4.35k
  reg->dmax = e->mmd.max;
5267
5268
4.35k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5269
4.35k
    reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5270
4.35k
  }
5271
5272
4.35k
  return 0;
5273
4.35k
}
5274
5275
static void
5276
set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5277
3.16k
{
5278
3.16k
  int i;
5279
5280
814k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5281
811k
    reg->map[i] = m->map[i];
5282
5283
3.16k
  reg->optimize   = ONIG_OPTIMIZE_MAP;
5284
3.16k
  reg->dmin       = m->mmd.min;
5285
3.16k
  reg->dmax       = m->mmd.max;
5286
5287
3.16k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5288
3.16k
    reg->threshold_len = (int )(reg->dmin + 1);
5289
3.16k
  }
5290
3.16k
}
5291
5292
static void
5293
set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5294
7.52k
{
5295
7.52k
  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
5296
7.52k
  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5297
7.52k
}
5298
5299
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5300
static void print_optimize_info(FILE* f, regex_t* reg);
5301
#endif
5302
5303
static int
5304
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5305
9.10k
{
5306
5307
9.10k
  int r;
5308
9.10k
  NodeOptInfo opt;
5309
9.10k
  OptEnv env;
5310
5311
9.10k
  env.enc            = reg->enc;
5312
9.10k
  env.options        = reg->options;
5313
9.10k
  env.case_fold_flag = reg->case_fold_flag;
5314
9.10k
  env.scan_env   = scan_env;
5315
9.10k
  clear_mml(&env.mmd);
5316
5317
9.10k
  r = optimize_node_left(node, &opt, &env);
5318
9.10k
  if (r) return r;
5319
5320
9.10k
  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5321
9.10k
        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5322
9.10k
        ANCHOR_LOOK_BEHIND);
5323
5324
9.10k
  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5325
0
    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5326
5327
9.10k
  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5328
9.10k
  ANCHOR_PREC_READ_NOT);
5329
5330
9.10k
  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5331
0
    reg->anchor_dmin = opt.len.min;
5332
0
    reg->anchor_dmax = opt.len.max;
5333
0
  }
5334
5335
9.10k
  if (opt.exb.len > 0 || opt.exm.len > 0) {
5336
6.33k
    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5337
6.33k
    if (opt.map.value > 0 &&
5338
6.33k
  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5339
1.98k
      goto set_map;
5340
1.98k
    }
5341
4.35k
    else {
5342
4.35k
      r = set_optimize_exact_info(reg, &opt.exb);
5343
4.35k
      set_sub_anchor(reg, &opt.exb.anc);
5344
4.35k
    }
5345
6.33k
  }
5346
2.77k
  else if (opt.map.value > 0) {
5347
3.16k
  set_map:
5348
3.16k
    set_optimize_map_info(reg, &opt.map);
5349
3.16k
    set_sub_anchor(reg, &opt.map.anc);
5350
3.16k
  }
5351
1.58k
  else {
5352
1.58k
    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5353
1.58k
    if (opt.len.max == 0)
5354
792
      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5355
1.58k
  }
5356
5357
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5358
  print_optimize_info(stderr, reg);
5359
#endif
5360
9.10k
  return r;
5361
9.10k
}
5362
5363
static void
5364
clear_optimize_info(regex_t* reg)
5365
9.10k
{
5366
9.10k
  reg->optimize      = ONIG_OPTIMIZE_NONE;
5367
9.10k
  reg->anchor        = 0;
5368
9.10k
  reg->anchor_dmin   = 0;
5369
9.10k
  reg->anchor_dmax   = 0;
5370
9.10k
  reg->sub_anchor    = 0;
5371
9.10k
  reg->exact_end     = (UChar* )NULL;
5372
9.10k
  reg->threshold_len = 0;
5373
9.10k
  if (IS_NOT_NULL(reg->exact)) {
5374
0
    xfree(reg->exact);
5375
0
    reg->exact = (UChar* )NULL;
5376
0
  }
5377
9.10k
}
5378
5379
#ifdef ONIG_DEBUG
5380
5381
static void print_enc_string(FILE* fp, OnigEncoding enc,
5382
           const UChar *s, const UChar *end)
5383
{
5384
  fprintf(fp, "\nPATTERN: /");
5385
5386
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5387
    const UChar *p;
5388
    OnigCodePoint code;
5389
5390
    p = s;
5391
    while (p < end) {
5392
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5393
      if (code >= 0x80) {
5394
  fprintf(fp, " 0x%04x ", (int )code);
5395
      }
5396
      else {
5397
  fputc((int )code, fp);
5398
      }
5399
5400
      p += enclen(enc, p, end);
5401
    }
5402
  }
5403
  else {
5404
    while (s < end) {
5405
      fputc((int )*s, fp);
5406
      s++;
5407
    }
5408
  }
5409
5410
  fprintf(fp, "/ (%s)\n", enc->name);
5411
}
5412
#endif  /* ONIG_DEBUG */
5413
5414
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5415
static void
5416
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5417
{
5418
  if (a == ONIG_INFINITE_DISTANCE)
5419
    fputs("inf", f);
5420
  else
5421
    fprintf(f, "(%"PRIuPTR")", a);
5422
5423
  fputs("-", f);
5424
5425
  if (b == ONIG_INFINITE_DISTANCE)
5426
    fputs("inf", f);
5427
  else
5428
    fprintf(f, "(%"PRIuPTR")", b);
5429
}
5430
5431
static void
5432
print_anchor(FILE* f, int anchor)
5433
{
5434
  int q = 0;
5435
5436
  fprintf(f, "[");
5437
5438
  if (anchor & ANCHOR_BEGIN_BUF) {
5439
    fprintf(f, "begin-buf");
5440
    q = 1;
5441
  }
5442
  if (anchor & ANCHOR_BEGIN_LINE) {
5443
    if (q) fprintf(f, ", ");
5444
    q = 1;
5445
    fprintf(f, "begin-line");
5446
  }
5447
  if (anchor & ANCHOR_BEGIN_POSITION) {
5448
    if (q) fprintf(f, ", ");
5449
    q = 1;
5450
    fprintf(f, "begin-pos");
5451
  }
5452
  if (anchor & ANCHOR_END_BUF) {
5453
    if (q) fprintf(f, ", ");
5454
    q = 1;
5455
    fprintf(f, "end-buf");
5456
  }
5457
  if (anchor & ANCHOR_SEMI_END_BUF) {
5458
    if (q) fprintf(f, ", ");
5459
    q = 1;
5460
    fprintf(f, "semi-end-buf");
5461
  }
5462
  if (anchor & ANCHOR_END_LINE) {
5463
    if (q) fprintf(f, ", ");
5464
    q = 1;
5465
    fprintf(f, "end-line");
5466
  }
5467
  if (anchor & ANCHOR_ANYCHAR_STAR) {
5468
    if (q) fprintf(f, ", ");
5469
    q = 1;
5470
    fprintf(f, "anychar-star");
5471
  }
5472
  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5473
    if (q) fprintf(f, ", ");
5474
    fprintf(f, "anychar-star-ml");
5475
  }
5476
5477
  fprintf(f, "]");
5478
}
5479
5480
static void
5481
print_optimize_info(FILE* f, regex_t* reg)
5482
{
5483
  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5484
                              "EXACT_IC", "MAP",
5485
                              "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5486
5487
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5488
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5489
  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5490
    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5491
  fprintf(f, "\n");
5492
5493
  if (reg->optimize) {
5494
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5495
    fprintf(f, "\n");
5496
  }
5497
  fprintf(f, "\n");
5498
5499
  if (reg->exact) {
5500
    UChar *p;
5501
    fprintf(f, "exact: [");
5502
    for (p = reg->exact; p < reg->exact_end; p++) {
5503
      fputc(*p, f);
5504
    }
5505
    fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5506
  }
5507
  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5508
    int c, i, n = 0;
5509
5510
    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5511
      if (reg->map[i]) n++;
5512
5513
    fprintf(f, "map: n=%d\n", n);
5514
    if (n > 0) {
5515
      c = 0;
5516
      fputc('[', f);
5517
      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5518
  if (reg->map[i] != 0) {
5519
    if (c > 0)  fputs(", ", f);
5520
    c++;
5521
    if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5522
        ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5523
      fputc(i, f);
5524
    else
5525
      fprintf(f, "%d", i);
5526
  }
5527
      }
5528
      fprintf(f, "]\n");
5529
    }
5530
  }
5531
}
5532
#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5533
5534
5535
extern void
5536
onig_free_body(regex_t* reg)
5537
9.10k
{
5538
9.10k
  if (IS_NOT_NULL(reg)) {
5539
9.10k
    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5540
9.10k
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5541
9.10k
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5542
9.10k
    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5543
5544
9.10k
#ifdef USE_NAMED_GROUP
5545
9.10k
    onig_names_free(reg);
5546
9.10k
#endif
5547
9.10k
  }
5548
9.10k
}
5549
5550
extern void
5551
onig_free(regex_t* reg)
5552
9.10k
{
5553
9.10k
  if (IS_NOT_NULL(reg)) {
5554
9.10k
    onig_free_body(reg);
5555
9.10k
    xfree(reg);
5556
9.10k
  }
5557
9.10k
}
5558
5559
#ifdef RUBY
5560
size_t
5561
onig_memsize(const regex_t *reg)
5562
{
5563
    size_t size = sizeof(regex_t);
5564
    if (IS_NULL(reg)) return 0;
5565
    if (IS_NOT_NULL(reg->p))                size += reg->alloc;
5566
    if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
5567
    if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5568
    if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
5569
5570
    return size;
5571
}
5572
5573
size_t
5574
onig_region_memsize(const OnigRegion *regs)
5575
{
5576
    size_t size = sizeof(*regs);
5577
    if (IS_NULL(regs)) return 0;
5578
    size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5579
    return size;
5580
}
5581
#endif
5582
5583
#define REGEX_TRANSFER(to,from) do {\
5584
  onig_free_body(to);\
5585
  xmemcpy(to, from, sizeof(regex_t));\
5586
  xfree(from);\
5587
} while (0)
5588
5589
#if 0
5590
extern void
5591
onig_transfer(regex_t* to, regex_t* from)
5592
{
5593
  REGEX_TRANSFER(to, from);
5594
}
5595
#endif
5596
5597
#ifdef ONIG_DEBUG_COMPILE
5598
static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5599
#endif
5600
#ifdef ONIG_DEBUG_PARSE_TREE
5601
static void print_tree(FILE* f, Node* node);
5602
#endif
5603
5604
#ifdef RUBY
5605
extern int
5606
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5607
       OnigErrorInfo* einfo)
5608
{
5609
  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5610
}
5611
#endif
5612
5613
#ifdef RUBY
5614
extern int
5615
onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5616
        OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5617
#else
5618
extern int
5619
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5620
       OnigErrorInfo* einfo)
5621
#endif
5622
9.10k
{
5623
9.10k
#define COMPILE_INIT_SIZE  20
5624
5625
9.10k
  int r;
5626
9.10k
  OnigDistance init_size;
5627
9.10k
  Node*  root;
5628
9.10k
  ScanEnv  scan_env = {0};
5629
9.10k
#ifdef USE_SUBEXP_CALL
5630
9.10k
  UnsetAddrList  uslist;
5631
9.10k
#endif
5632
5633
9.10k
  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5634
5635
#ifdef RUBY
5636
  scan_env.sourcefile = sourcefile;
5637
  scan_env.sourceline = sourceline;
5638
#endif
5639
5640
#ifdef ONIG_DEBUG
5641
  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5642
#endif
5643
5644
9.10k
  if (reg->alloc == 0) {
5645
9.10k
    init_size = (pattern_end - pattern) * 2;
5646
9.10k
    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5647
9.10k
    r = BBUF_INIT(reg, init_size);
5648
9.10k
    if (r != 0) goto end;
5649
9.10k
  }
5650
0
  else
5651
0
    reg->used = 0;
5652
5653
9.10k
  reg->num_mem            = 0;
5654
9.10k
  reg->num_repeat         = 0;
5655
9.10k
  reg->num_null_check     = 0;
5656
9.10k
  reg->repeat_range_alloc = 0;
5657
9.10k
  reg->repeat_range       = (OnigRepeatRange* )NULL;
5658
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5659
  reg->num_comb_exp_check = 0;
5660
#endif
5661
5662
9.10k
  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5663
9.10k
  if (r != 0) goto err;
5664
5665
#ifdef ONIG_DEBUG_PARSE_TREE
5666
# if 0
5667
  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5668
  print_tree(stderr, root);
5669
# endif
5670
#endif
5671
5672
9.10k
#ifdef USE_NAMED_GROUP
5673
  /* mixed use named group and no-named group */
5674
9.10k
  if (scan_env.num_named > 0 &&
5675
9.10k
      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5676
9.10k
      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5677
396
    if (scan_env.num_named != scan_env.num_mem)
5678
0
      r = disable_noname_group_capture(&root, reg, &scan_env);
5679
396
    else
5680
396
      r = numbered_ref_check(root);
5681
5682
396
    if (r != 0) goto err;
5683
396
  }
5684
9.10k
#endif
5685
5686
9.10k
#ifdef USE_SUBEXP_CALL
5687
9.10k
  if (scan_env.num_call > 0) {
5688
0
    r = unset_addr_list_init(&uslist, scan_env.num_call);
5689
0
    if (r != 0) goto err;
5690
0
    scan_env.unset_addr_list = &uslist;
5691
0
    r = setup_subexp_call(root, &scan_env);
5692
0
    if (r != 0) goto err_unset;
5693
0
    r = subexp_recursive_check_trav(root, &scan_env);
5694
0
    if (r  < 0) goto err_unset;
5695
0
    r = subexp_inf_recursive_check_trav(root, &scan_env);
5696
0
    if (r != 0) goto err_unset;
5697
5698
0
    reg->num_call = scan_env.num_call;
5699
0
  }
5700
9.10k
  else
5701
9.10k
    reg->num_call = 0;
5702
9.10k
#endif
5703
5704
9.10k
  r = setup_tree(root, reg, 0, &scan_env);
5705
9.10k
  if (r != 0) goto err_unset;
5706
5707
#ifdef ONIG_DEBUG_PARSE_TREE
5708
  print_tree(stderr, root);
5709
#endif
5710
5711
9.10k
  reg->capture_history  = scan_env.capture_history;
5712
9.10k
  reg->bt_mem_start     = scan_env.bt_mem_start;
5713
9.10k
  reg->bt_mem_start    |= reg->capture_history;
5714
9.10k
  if (IS_FIND_CONDITION(reg->options))
5715
0
    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5716
9.10k
  else {
5717
9.10k
    reg->bt_mem_end  = scan_env.bt_mem_end;
5718
9.10k
    reg->bt_mem_end |= reg->capture_history;
5719
9.10k
  }
5720
5721
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5722
  if (scan_env.backrefed_mem == 0
5723
# ifdef USE_SUBEXP_CALL
5724
      || scan_env.num_call == 0
5725
# endif
5726
      ) {
5727
    setup_comb_exp_check(root, 0, &scan_env);
5728
# ifdef USE_SUBEXP_CALL
5729
    if (scan_env.has_recursion != 0) {
5730
      scan_env.num_comb_exp_check = 0;
5731
    }
5732
    else
5733
# endif
5734
    if (scan_env.comb_exp_max_regnum > 0) {
5735
      int i;
5736
      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5737
  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5738
    scan_env.num_comb_exp_check = 0;
5739
    break;
5740
  }
5741
      }
5742
    }
5743
  }
5744
5745
  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5746
#endif
5747
5748
9.10k
  clear_optimize_info(reg);
5749
9.10k
#ifndef ONIG_DONT_OPTIMIZE
5750
9.10k
  r = set_optimize_info_from_tree(root, reg, &scan_env);
5751
9.10k
  if (r != 0) goto err_unset;
5752
9.10k
#endif
5753
5754
9.10k
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5755
0
    xfree(scan_env.mem_nodes_dynamic);
5756
0
    scan_env.mem_nodes_dynamic = (Node** )NULL;
5757
0
  }
5758
5759
9.10k
  r = compile_tree(root, reg);
5760
9.10k
  if (r == 0) {
5761
9.10k
    r = add_opcode(reg, OP_END);
5762
9.10k
#ifdef USE_SUBEXP_CALL
5763
9.10k
    if (scan_env.num_call > 0) {
5764
0
      r = unset_addr_list_fix(&uslist, reg);
5765
0
      unset_addr_list_end(&uslist);
5766
0
      if (r) goto err;
5767
0
    }
5768
9.10k
#endif
5769
5770
9.10k
    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5771
0
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5772
9.10k
    else {
5773
9.10k
      if (reg->bt_mem_start != 0)
5774
0
  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5775
9.10k
      else
5776
9.10k
  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5777
9.10k
    }
5778
9.10k
  }
5779
0
#ifdef USE_SUBEXP_CALL
5780
0
  else if (scan_env.num_call > 0) {
5781
0
    unset_addr_list_end(&uslist);
5782
0
  }
5783
9.10k
#endif
5784
9.10k
  onig_node_free(root);
5785
5786
#ifdef ONIG_DEBUG_COMPILE
5787
# ifdef USE_NAMED_GROUP
5788
  onig_print_names(stderr, reg);
5789
# endif
5790
  print_compiled_byte_code_list(stderr, reg);
5791
#endif
5792
5793
9.10k
 end:
5794
9.10k
  return r;
5795
5796
0
 err_unset:
5797
0
#ifdef USE_SUBEXP_CALL
5798
0
  if (scan_env.num_call > 0) {
5799
0
    unset_addr_list_end(&uslist);
5800
0
  }
5801
0
#endif
5802
0
 err:
5803
0
  if (IS_NOT_NULL(scan_env.error)) {
5804
0
    if (IS_NOT_NULL(einfo)) {
5805
0
      einfo->enc     = scan_env.enc;
5806
0
      einfo->par     = scan_env.error;
5807
0
      einfo->par_end = scan_env.error_end;
5808
0
    }
5809
0
  }
5810
5811
0
  onig_node_free(root);
5812
0
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5813
0
      xfree(scan_env.mem_nodes_dynamic);
5814
0
  return r;
5815
0
}
5816
5817
static int onig_inited = 0;
5818
5819
extern int
5820
onig_reg_init(regex_t* reg, OnigOptionType option,
5821
        OnigCaseFoldType case_fold_flag,
5822
        OnigEncoding enc, const OnigSyntaxType* syntax)
5823
9.10k
{
5824
9.10k
  if (! onig_inited)
5825
1
    onig_init();
5826
5827
9.10k
  if (IS_NULL(reg))
5828
0
    return ONIGERR_INVALID_ARGUMENT;
5829
5830
9.10k
  (reg)->exact            = (UChar* )NULL;
5831
9.10k
  (reg)->chain            = (regex_t* )NULL;
5832
9.10k
  (reg)->p                = (UChar* )NULL;
5833
9.10k
  (reg)->name_table       = (void* )NULL;
5834
9.10k
  (reg)->repeat_range     = (OnigRepeatRange* )NULL;
5835
5836
9.10k
  if (ONIGENC_IS_UNDEF(enc))
5837
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5838
5839
9.10k
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5840
9.10k
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5841
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5842
0
  }
5843
5844
9.10k
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5845
0
    option |= syntax->options;
5846
0
    option &= ~ONIG_OPTION_SINGLELINE;
5847
0
  }
5848
9.10k
  else
5849
9.10k
    option |= syntax->options;
5850
5851
9.10k
  (reg)->enc              = enc;
5852
9.10k
  (reg)->options          = option;
5853
9.10k
  (reg)->syntax           = syntax;
5854
9.10k
  (reg)->optimize         = 0;
5855
5856
9.10k
  (reg)->alloc            = 0;
5857
9.10k
  (reg)->used             = 0;
5858
5859
9.10k
  (reg)->case_fold_flag   = case_fold_flag;
5860
9.10k
  return 0;
5861
9.10k
}
5862
5863
extern int
5864
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5865
          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5866
          const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5867
0
{
5868
0
  int r;
5869
5870
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5871
0
  if (r) return r;
5872
5873
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
5874
0
  return r;
5875
0
}
5876
5877
extern int
5878
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5879
    OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5880
    OnigErrorInfo* einfo)
5881
9.10k
{
5882
9.10k
  int r;
5883
5884
9.10k
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5885
9.10k
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5886
5887
9.10k
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5888
9.10k
  if (r) goto err;
5889
5890
9.10k
  r = onig_compile(*reg, pattern, pattern_end, einfo);
5891
9.10k
  if (r) {
5892
0
  err:
5893
0
    onig_free(*reg);
5894
0
    *reg = NULL;
5895
0
  }
5896
9.10k
  return r;
5897
9.10k
}
5898
5899
extern int
5900
onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5901
0
{
5902
0
  return onig_init();
5903
0
}
5904
5905
extern int
5906
onig_init(void)
5907
397
{
5908
397
  if (onig_inited != 0)
5909
396
    return 0;
5910
5911
1
  onig_inited = 1;
5912
5913
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5914
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
5915
#endif
5916
5917
1
  onigenc_init();
5918
  /* onigenc_set_default_caseconv_table((UChar* )0); */
5919
5920
#ifdef ONIG_DEBUG_STATISTICS
5921
  onig_statistics_init();
5922
#endif
5923
5924
1
  return 0;
5925
397
}
5926
5927
5928
static OnigEndCallListItemType* EndCallTop;
5929
5930
extern void onig_add_end_call(void (*func)(void))
5931
0
{
5932
0
  OnigEndCallListItemType* item;
5933
5934
0
  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5935
0
  if (item == 0) return ;
5936
5937
0
  item->next = EndCallTop;
5938
0
  item->func = func;
5939
5940
0
  EndCallTop = item;
5941
0
}
5942
5943
static void
5944
exec_end_call_list(void)
5945
0
{
5946
0
  OnigEndCallListItemType* prev;
5947
0
  void (*func)(void);
5948
5949
0
  while (EndCallTop != 0) {
5950
0
    func = EndCallTop->func;
5951
0
    (*func)();
5952
5953
0
    prev = EndCallTop;
5954
0
    EndCallTop = EndCallTop->next;
5955
0
    xfree(prev);
5956
0
  }
5957
0
}
5958
5959
extern int
5960
onig_end(void)
5961
0
{
5962
0
  exec_end_call_list();
5963
5964
#ifdef ONIG_DEBUG_STATISTICS
5965
  onig_print_statistics(stderr);
5966
#endif
5967
5968
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5969
  _CrtDumpMemoryLeaks();
5970
#endif
5971
5972
0
  onig_inited = 0;
5973
5974
0
  return 0;
5975
0
}
5976
5977
extern int
5978
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5979
0
{
5980
0
  OnigCodePoint n, *data;
5981
0
  OnigCodePoint low, high, x;
5982
5983
0
  GET_CODE_POINT(n, p);
5984
0
  data = (OnigCodePoint* )p;
5985
0
  data++;
5986
5987
0
  for (low = 0, high = n; low < high; ) {
5988
0
    x = (low + high) >> 1;
5989
0
    if (code > data[x * 2 + 1])
5990
0
      low = x + 1;
5991
0
    else
5992
0
      high = x;
5993
0
  }
5994
5995
0
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5996
0
}
5997
5998
extern int
5999
onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6000
5.14k
{
6001
5.14k
  int found;
6002
6003
5.14k
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6004
0
    if (IS_NULL(cc->mbuf)) {
6005
0
      found = 0;
6006
0
    }
6007
0
    else {
6008
0
      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6009
0
    }
6010
0
  }
6011
5.14k
  else {
6012
5.14k
    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6013
5.14k
  }
6014
6015
5.14k
  if (IS_NCCLASS_NOT(cc))
6016
1.98k
    return !found;
6017
3.16k
  else
6018
3.16k
    return found;
6019
5.14k
}
6020
6021
extern int
6022
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6023
5.14k
{
6024
5.14k
  int len;
6025
6026
5.14k
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6027
0
    len = 2;
6028
0
  }
6029
5.14k
  else {
6030
5.14k
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6031
5.14k
  }
6032
5.14k
  return onig_is_code_in_cc_len(len, code, cc);
6033
5.14k
}
6034
6035
6036
#ifdef ONIG_DEBUG
6037
6038
/* arguments type */
6039
# define ARG_SPECIAL     -1
6040
# define ARG_NON          0
6041
# define ARG_RELADDR      1
6042
# define ARG_ABSADDR      2
6043
# define ARG_LENGTH       3
6044
# define ARG_MEMNUM       4
6045
# define ARG_OPTION       5
6046
# define ARG_STATE_CHECK  6
6047
6048
OnigOpInfoType OnigOpInfo[] = {
6049
  { OP_FINISH,            "finish",          ARG_NON },
6050
  { OP_END,               "end",             ARG_NON },
6051
  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
6052
  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
6053
  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
6054
  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
6055
  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
6056
  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
6057
  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
6058
  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
6059
  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
6060
  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
6061
  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
6062
  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
6063
  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
6064
  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
6065
  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
6066
  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
6067
  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
6068
  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
6069
  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
6070
  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
6071
  { OP_ANYCHAR,           "anychar",         ARG_NON },
6072
  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
6073
  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
6074
  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
6075
  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6076
  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6077
  { OP_WORD,                "word",            ARG_NON },
6078
  { OP_NOT_WORD,            "not-word",        ARG_NON },
6079
  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
6080
  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
6081
  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
6082
  { OP_WORD_END,            "word-end",        ARG_NON },
6083
  { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
6084
  { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
6085
  { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
6086
  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6087
  { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
6088
  { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
6089
  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
6090
  { OP_END_BUF,             "end-buf",         ARG_NON },
6091
  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
6092
  { OP_END_LINE,            "end-line",        ARG_NON },
6093
  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
6094
  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
6095
  { OP_BACKREF1,            "backref1",             ARG_NON },
6096
  { OP_BACKREF2,            "backref2",             ARG_NON },
6097
  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
6098
  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
6099
  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
6100
  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
6101
  { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
6102
  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
6103
  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
6104
  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
6105
  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
6106
  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
6107
  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
6108
  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
6109
  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
6110
  { OP_KEEP,                "keep",                 ARG_NON },
6111
  { OP_FAIL,                "fail",                 ARG_NON },
6112
  { OP_JUMP,                "jump",                 ARG_RELADDR },
6113
  { OP_PUSH,                "push",                 ARG_RELADDR },
6114
  { OP_POP,                 "pop",                  ARG_NON },
6115
  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
6116
  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
6117
  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
6118
  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
6119
  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
6120
  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
6121
  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
6122
  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
6123
  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
6124
  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
6125
  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
6126
  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
6127
  { OP_PUSH_POS,             "push-pos",             ARG_NON },
6128
  { OP_POP_POS,              "pop-pos",              ARG_NON },
6129
  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
6130
  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
6131
  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
6132
  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
6133
  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
6134
  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6135
  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6136
  { OP_PUSH_ABSENT_POS,      "push-absent-pos",      ARG_NON },
6137
  { OP_ABSENT,               "absent",               ARG_RELADDR },
6138
  { OP_ABSENT_END,           "absent-end",           ARG_NON },
6139
  { OP_CALL,                 "call",                 ARG_ABSADDR },
6140
  { OP_RETURN,               "return",               ARG_NON },
6141
  { OP_CONDITION,            "condition",            ARG_SPECIAL },
6142
  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
6143
  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6144
  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
6145
  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
6146
  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6147
    "state-check-anychar-ml*", ARG_STATE_CHECK },
6148
  { -1, "", ARG_NON }
6149
};
6150
6151
static const char*
6152
op2name(int opcode)
6153
{
6154
  int i;
6155
6156
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6157
    if (opcode == OnigOpInfo[i].opcode)
6158
      return OnigOpInfo[i].name;
6159
  }
6160
  return "";
6161
}
6162
6163
static int
6164
op2arg_type(int opcode)
6165
{
6166
  int i;
6167
6168
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6169
    if (opcode == OnigOpInfo[i].opcode)
6170
      return OnigOpInfo[i].arg_type;
6171
  }
6172
  return ARG_SPECIAL;
6173
}
6174
6175
# ifdef ONIG_DEBUG_PARSE_TREE
6176
static void
6177
Indent(FILE* f, int indent)
6178
{
6179
  int i;
6180
  for (i = 0; i < indent; i++) putc(' ', f);
6181
}
6182
# endif /* ONIG_DEBUG_PARSE_TREE */
6183
6184
static void
6185
p_string(FILE* f, ptrdiff_t len, UChar* s)
6186
{
6187
  fputs(":", f);
6188
  while (len-- > 0) { fputc(*s++, f); }
6189
}
6190
6191
static void
6192
p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6193
{
6194
  int x = len * mb_len;
6195
6196
  fprintf(f, ":%d:", len);
6197
  while (x-- > 0) { fputc(*s++, f); }
6198
}
6199
6200
extern void
6201
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6202
                              OnigEncoding enc)
6203
{
6204
  int i, n, arg_type;
6205
  RelAddrType addr;
6206
  LengthType len;
6207
  MemNumType mem;
6208
  StateCheckNumType scn;
6209
  OnigCodePoint code;
6210
  UChar *q;
6211
6212
  fprintf(f, "[%s", op2name(*bp));
6213
  arg_type = op2arg_type(*bp);
6214
  if (arg_type != ARG_SPECIAL) {
6215
    bp++;
6216
    switch (arg_type) {
6217
    case ARG_NON:
6218
      break;
6219
    case ARG_RELADDR:
6220
      GET_RELADDR_INC(addr, bp);
6221
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6222
      break;
6223
    case ARG_ABSADDR:
6224
      GET_ABSADDR_INC(addr, bp);
6225
      fprintf(f, ":(%d)", addr);
6226
      break;
6227
    case ARG_LENGTH:
6228
      GET_LENGTH_INC(len, bp);
6229
      fprintf(f, ":%d", len);
6230
      break;
6231
    case ARG_MEMNUM:
6232
      mem = *((MemNumType* )bp);
6233
      bp += SIZE_MEMNUM;
6234
      fprintf(f, ":%d", mem);
6235
      break;
6236
    case ARG_OPTION:
6237
      {
6238
  OnigOptionType option = *((OnigOptionType* )bp);
6239
  bp += SIZE_OPTION;
6240
  fprintf(f, ":%d", option);
6241
      }
6242
      break;
6243
6244
    case ARG_STATE_CHECK:
6245
      scn = *((StateCheckNumType* )bp);
6246
      bp += SIZE_STATE_CHECK_NUM;
6247
      fprintf(f, ":%d", scn);
6248
      break;
6249
    }
6250
  }
6251
  else {
6252
    switch (*bp++) {
6253
    case OP_EXACT1:
6254
    case OP_ANYCHAR_STAR_PEEK_NEXT:
6255
    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6256
      p_string(f, 1, bp++); break;
6257
    case OP_EXACT2:
6258
      p_string(f, 2, bp); bp += 2; break;
6259
    case OP_EXACT3:
6260
      p_string(f, 3, bp); bp += 3; break;
6261
    case OP_EXACT4:
6262
      p_string(f, 4, bp); bp += 4; break;
6263
    case OP_EXACT5:
6264
      p_string(f, 5, bp); bp += 5; break;
6265
    case OP_EXACTN:
6266
      GET_LENGTH_INC(len, bp);
6267
      p_len_string(f, len, 1, bp);
6268
      bp += len;
6269
      break;
6270
6271
    case OP_EXACTMB2N1:
6272
      p_string(f, 2, bp); bp += 2; break;
6273
    case OP_EXACTMB2N2:
6274
      p_string(f, 4, bp); bp += 4; break;
6275
    case OP_EXACTMB2N3:
6276
      p_string(f, 6, bp); bp += 6; break;
6277
    case OP_EXACTMB2N:
6278
      GET_LENGTH_INC(len, bp);
6279
      p_len_string(f, len, 2, bp);
6280
      bp += len * 2;
6281
      break;
6282
    case OP_EXACTMB3N:
6283
      GET_LENGTH_INC(len, bp);
6284
      p_len_string(f, len, 3, bp);
6285
      bp += len * 3;
6286
      break;
6287
    case OP_EXACTMBN:
6288
      {
6289
  int mb_len;
6290
6291
  GET_LENGTH_INC(mb_len, bp);
6292
  GET_LENGTH_INC(len, bp);
6293
  fprintf(f, ":%d:%d:", mb_len, len);
6294
  n = len * mb_len;
6295
  while (n-- > 0) { fputc(*bp++, f); }
6296
      }
6297
      break;
6298
6299
    case OP_EXACT1_IC:
6300
      len = enclen(enc, bp, bpend);
6301
      p_string(f, len, bp);
6302
      bp += len;
6303
      break;
6304
    case OP_EXACTN_IC:
6305
      GET_LENGTH_INC(len, bp);
6306
      p_len_string(f, len, 1, bp);
6307
      bp += len;
6308
      break;
6309
6310
    case OP_CCLASS:
6311
      n = bitset_on_num((BitSetRef )bp);
6312
      bp += SIZE_BITSET;
6313
      fprintf(f, ":%d", n);
6314
      break;
6315
6316
    case OP_CCLASS_NOT:
6317
      n = bitset_on_num((BitSetRef )bp);
6318
      bp += SIZE_BITSET;
6319
      fprintf(f, ":%d", n);
6320
      break;
6321
6322
    case OP_CCLASS_MB:
6323
    case OP_CCLASS_MB_NOT:
6324
      GET_LENGTH_INC(len, bp);
6325
      q = bp;
6326
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6327
      ALIGNMENT_RIGHT(q);
6328
# endif
6329
      GET_CODE_POINT(code, q);
6330
      bp += len;
6331
      fprintf(f, ":%d:%d", (int )code, len);
6332
      break;
6333
6334
    case OP_CCLASS_MIX:
6335
    case OP_CCLASS_MIX_NOT:
6336
      n = bitset_on_num((BitSetRef )bp);
6337
      bp += SIZE_BITSET;
6338
      GET_LENGTH_INC(len, bp);
6339
      q = bp;
6340
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6341
      ALIGNMENT_RIGHT(q);
6342
# endif
6343
      GET_CODE_POINT(code, q);
6344
      bp += len;
6345
      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6346
      break;
6347
6348
    case OP_BACKREFN_IC:
6349
      mem = *((MemNumType* )bp);
6350
      bp += SIZE_MEMNUM;
6351
      fprintf(f, ":%d", mem);
6352
      break;
6353
6354
    case OP_BACKREF_MULTI_IC:
6355
    case OP_BACKREF_MULTI:
6356
      fputs(" ", f);
6357
      GET_LENGTH_INC(len, bp);
6358
      for (i = 0; i < len; i++) {
6359
  GET_MEMNUM_INC(mem, bp);
6360
  if (i > 0) fputs(", ", f);
6361
  fprintf(f, "%d", mem);
6362
      }
6363
      break;
6364
6365
    case OP_BACKREF_WITH_LEVEL:
6366
      {
6367
  OnigOptionType option;
6368
  LengthType level;
6369
6370
  GET_OPTION_INC(option, bp);
6371
  fprintf(f, ":%d", option);
6372
  GET_LENGTH_INC(level, bp);
6373
  fprintf(f, ":%d", level);
6374
6375
  fputs(" ", f);
6376
  GET_LENGTH_INC(len, bp);
6377
  for (i = 0; i < len; i++) {
6378
    GET_MEMNUM_INC(mem, bp);
6379
    if (i > 0) fputs(", ", f);
6380
    fprintf(f, "%d", mem);
6381
  }
6382
      }
6383
      break;
6384
6385
    case OP_REPEAT:
6386
    case OP_REPEAT_NG:
6387
      {
6388
  mem = *((MemNumType* )bp);
6389
  bp += SIZE_MEMNUM;
6390
  addr = *((RelAddrType* )bp);
6391
  bp += SIZE_RELADDR;
6392
  fprintf(f, ":%d:%d", mem, addr);
6393
      }
6394
      break;
6395
6396
    case OP_PUSH_OR_JUMP_EXACT1:
6397
    case OP_PUSH_IF_PEEK_NEXT:
6398
      addr = *((RelAddrType* )bp);
6399
      bp += SIZE_RELADDR;
6400
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6401
      p_string(f, 1, bp);
6402
      bp += 1;
6403
      break;
6404
6405
    case OP_LOOK_BEHIND:
6406
      GET_LENGTH_INC(len, bp);
6407
      fprintf(f, ":%d", len);
6408
      break;
6409
6410
    case OP_PUSH_LOOK_BEHIND_NOT:
6411
      GET_RELADDR_INC(addr, bp);
6412
      GET_LENGTH_INC(len, bp);
6413
      fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6414
      break;
6415
6416
    case OP_STATE_CHECK_PUSH:
6417
    case OP_STATE_CHECK_PUSH_OR_JUMP:
6418
      scn = *((StateCheckNumType* )bp);
6419
      bp += SIZE_STATE_CHECK_NUM;
6420
      addr = *((RelAddrType* )bp);
6421
      bp += SIZE_RELADDR;
6422
      fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6423
      break;
6424
6425
    case OP_CONDITION:
6426
      GET_MEMNUM_INC(mem, bp);
6427
      GET_RELADDR_INC(addr, bp);
6428
      fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6429
      break;
6430
6431
    default:
6432
      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6433
        bp[-1]);
6434
    }
6435
  }
6436
  fputs("]", f);
6437
  if (nextp) *nextp = bp;
6438
}
6439
6440
# ifdef ONIG_DEBUG_COMPILE
6441
static void
6442
print_compiled_byte_code_list(FILE* f, regex_t* reg)
6443
{
6444
  int ncode;
6445
  UChar* bp = reg->p;
6446
  UChar* end = reg->p + reg->used;
6447
6448
  fprintf(f, "code length: %d", reg->used);
6449
6450
  ncode = -1;
6451
  while (bp < end) {
6452
    ncode++;
6453
    if (ncode % 5 == 0)
6454
      fprintf(f, "\n%ld:", bp - reg->p);
6455
    else
6456
      fprintf(f, " %ld:", bp - reg->p);
6457
    onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6458
  }
6459
6460
  fprintf(f, "\n");
6461
}
6462
# endif /* ONIG_DEBUG_COMPILE */
6463
6464
# ifdef ONIG_DEBUG_PARSE_TREE
6465
static void
6466
print_indent_tree(FILE* f, Node* node, int indent)
6467
{
6468
  int i, type, container_p = 0;
6469
  int add = 3;
6470
  UChar* p;
6471
6472
  Indent(f, indent);
6473
  if (IS_NULL(node)) {
6474
    fprintf(f, "ERROR: null node!!!\n");
6475
    exit (0);
6476
  }
6477
6478
  type = NTYPE(node);
6479
  switch (type) {
6480
  case NT_LIST:
6481
  case NT_ALT:
6482
    if (NTYPE(node) == NT_LIST)
6483
      fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6484
    else
6485
      fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6486
6487
    print_indent_tree(f, NCAR(node), indent + add);
6488
    while (IS_NOT_NULL(node = NCDR(node))) {
6489
      if (NTYPE(node) != type) {
6490
  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6491
  exit(0);
6492
      }
6493
      print_indent_tree(f, NCAR(node), indent + add);
6494
    }
6495
    break;
6496
6497
  case NT_STR:
6498
    fprintf(f, "<string%s:%"PRIxPTR">",
6499
      (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6500
    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6501
      if (*p >= 0x20 && *p < 0x7f)
6502
  fputc(*p, f);
6503
      else {
6504
  fprintf(f, " 0x%02x", *p);
6505
      }
6506
    }
6507
    break;
6508
6509
  case NT_CCLASS:
6510
    fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6511
    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6512
    if (NCCLASS(node)->mbuf) {
6513
      BBuf* bbuf = NCCLASS(node)->mbuf;
6514
      OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6515
      OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6516
      fprintf(f, "%d", *data++);
6517
      for (; data < end; data+=2) {
6518
  fprintf(f, ",");
6519
  fprintf(f, "%04x-%04x", data[0], data[1]);
6520
      }
6521
    }
6522
    break;
6523
6524
  case NT_CTYPE:
6525
    fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6526
    switch (NCTYPE(node)->ctype) {
6527
    case ONIGENC_CTYPE_WORD:
6528
      if (NCTYPE(node)->not != 0)
6529
  fputs("not word",       f);
6530
      else
6531
  fputs("word",           f);
6532
      break;
6533
6534
    default:
6535
      fprintf(f, "ERROR: undefined ctype.\n");
6536
      exit(0);
6537
    }
6538
    break;
6539
6540
  case NT_CANY:
6541
    fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6542
    break;
6543
6544
  case NT_ANCHOR:
6545
    fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6546
    switch (NANCHOR(node)->type) {
6547
    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6548
    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6549
    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6550
    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6551
    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6552
    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6553
6554
    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6555
    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6556
#  ifdef USE_WORD_BEGIN_END
6557
    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6558
    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6559
#  endif
6560
    case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
6561
    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
6562
    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
6563
    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6564
    case ANCHOR_KEEP:            fputs("keep",f);            break;
6565
6566
    default:
6567
      fprintf(f, "ERROR: undefined anchor type.\n");
6568
      break;
6569
    }
6570
    break;
6571
6572
  case NT_BREF:
6573
    {
6574
      int* p;
6575
      BRefNode* br = NBREF(node);
6576
      p = BACKREFS_P(br);
6577
      fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6578
      for (i = 0; i < br->back_num; i++) {
6579
  if (i > 0) fputs(", ", f);
6580
  fprintf(f, "%d", p[i]);
6581
      }
6582
    }
6583
    break;
6584
6585
#  ifdef USE_SUBEXP_CALL
6586
  case NT_CALL:
6587
    {
6588
      CallNode* cn = NCALL(node);
6589
      fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6590
      p_string(f, cn->name_end - cn->name, cn->name);
6591
    }
6592
    break;
6593
#  endif
6594
6595
  case NT_QTFR:
6596
    fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6597
      NQTFR(node)->lower, NQTFR(node)->upper,
6598
      (NQTFR(node)->greedy ? "" : "?"));
6599
    print_indent_tree(f, NQTFR(node)->target, indent + add);
6600
    break;
6601
6602
  case NT_ENCLOSE:
6603
    fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6604
    switch (NENCLOSE(node)->type) {
6605
    case ENCLOSE_OPTION:
6606
      fprintf(f, "option:%d", NENCLOSE(node)->option);
6607
      break;
6608
    case ENCLOSE_MEMORY:
6609
      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6610
      break;
6611
    case ENCLOSE_STOP_BACKTRACK:
6612
      fprintf(f, "stop-bt");
6613
      break;
6614
    case ENCLOSE_CONDITION:
6615
      fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6616
      break;
6617
    case ENCLOSE_ABSENT:
6618
      fprintf(f, "absent");
6619
      break;
6620
6621
    default:
6622
      break;
6623
    }
6624
    fprintf(f, "\n");
6625
    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6626
    break;
6627
6628
  default:
6629
    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6630
    break;
6631
  }
6632
6633
  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6634
      type != NT_ENCLOSE)
6635
    fprintf(f, "\n");
6636
6637
  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6638
6639
  fflush(f);
6640
}
6641
6642
static void
6643
print_tree(FILE* f, Node* node)
6644
{
6645
  print_indent_tree(f, node, 0);
6646
}
6647
# endif /* ONIG_DEBUG_PARSE_TREE */
6648
#endif /* ONIG_DEBUG */