Coverage Report

Created: 2023-01-10 06:17

/src/fluent-bit/lib/onigmo/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regparse.h"
32
33
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35
extern OnigCaseFoldType
36
onig_get_default_case_fold_flag(void)
37
0
{
38
0
  return OnigDefaultCaseFoldFlag;
39
0
}
40
41
extern int
42
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43
0
{
44
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
45
0
  return 0;
46
0
}
47
48
49
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51
#endif
52
53
#if 0
54
static UChar*
55
str_dup(UChar* s, UChar* end)
56
{
57
  ptrdiff_t len = end - s;
58
59
  if (len > 0) {
60
    UChar* r = (UChar* )xmalloc(len + 1);
61
    CHECK_NULL_RETURN(r);
62
    xmemcpy(r, s, len);
63
    r[len] = (UChar )0;
64
    return r;
65
  }
66
  else return NULL;
67
}
68
#endif
69
70
static void
71
swap_node(Node* a, Node* b)
72
5.00k
{
73
5.00k
  Node c;
74
5.00k
  c = *a; *a = *b; *b = c;
75
76
5.00k
  if (NTYPE(a) == NT_STR) {
77
0
    StrNode* sn = NSTR(a);
78
0
    if (sn->capa == 0) {
79
0
      size_t len = sn->end - sn->s;
80
0
      sn->s   = sn->buf;
81
0
      sn->end = sn->s + len;
82
0
    }
83
0
  }
84
85
5.00k
  if (NTYPE(b) == NT_STR) {
86
0
    StrNode* sn = NSTR(b);
87
0
    if (sn->capa == 0) {
88
0
      size_t len = sn->end - sn->s;
89
0
      sn->s   = sn->buf;
90
0
      sn->end = sn->s + len;
91
0
    }
92
0
  }
93
5.00k
}
94
95
static OnigDistance
96
distance_add(OnigDistance d1, OnigDistance d2)
97
152k
{
98
152k
  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99
47.7k
    return ONIG_INFINITE_DISTANCE;
100
104k
  else {
101
104k
    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102
0
    else return ONIG_INFINITE_DISTANCE;
103
104k
  }
104
152k
}
105
106
static OnigDistance
107
distance_multiply(OnigDistance d, int m)
108
12.7k
{
109
12.7k
  if (m == 0) return 0;
110
111
8.08k
  if (d < ONIG_INFINITE_DISTANCE / m)
112
8.08k
    return d * m;
113
0
  else
114
0
    return ONIG_INFINITE_DISTANCE;
115
8.08k
}
116
117
static int
118
bitset_is_empty(BitSetRef bs)
119
0
{
120
0
  int i;
121
0
  for (i = 0; i < BITSET_SIZE; i++) {
122
0
    if (bs[i] != 0) return 0;
123
0
  }
124
0
  return 1;
125
0
}
126
127
#ifdef ONIG_DEBUG
128
static int
129
bitset_on_num(BitSetRef bs)
130
{
131
  int i, n;
132
133
  n = 0;
134
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135
    if (BITSET_AT(bs, i)) n++;
136
  }
137
  return n;
138
}
139
#endif
140
141
extern int
142
onig_bbuf_init(BBuf* buf, OnigDistance size)
143
13.8k
{
144
13.8k
  if (size <= 0) {
145
0
    size   = 0;
146
0
    buf->p = NULL;
147
0
  }
148
13.8k
  else {
149
13.8k
    buf->p = (UChar* )xmalloc(size);
150
13.8k
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151
13.8k
  }
152
153
13.8k
  buf->alloc = (unsigned int )size;
154
13.8k
  buf->used  = 0;
155
13.8k
  return 0;
156
13.8k
}
157
158
159
#ifdef USE_SUBEXP_CALL
160
161
static int
162
unset_addr_list_init(UnsetAddrList* uslist, int size)
163
0
{
164
0
  UnsetAddr* p;
165
166
0
  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167
0
  CHECK_NULL_RETURN_MEMERR(p);
168
0
  uslist->num   = 0;
169
0
  uslist->alloc = size;
170
0
  uslist->us    = p;
171
0
  return 0;
172
0
}
173
174
static void
175
unset_addr_list_end(UnsetAddrList* uslist)
176
0
{
177
0
  if (IS_NOT_NULL(uslist->us))
178
0
    xfree(uslist->us);
179
0
}
180
181
static int
182
unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183
0
{
184
0
  UnsetAddr* p;
185
0
  int size;
186
187
0
  if (uslist->num >= uslist->alloc) {
188
0
    size = uslist->alloc * 2;
189
0
    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190
0
    CHECK_NULL_RETURN_MEMERR(p);
191
0
    uslist->alloc = size;
192
0
    uslist->us    = p;
193
0
  }
194
195
0
  uslist->us[uslist->num].offset = offset;
196
0
  uslist->us[uslist->num].target = node;
197
0
  uslist->num++;
198
0
  return 0;
199
0
}
200
#endif /* USE_SUBEXP_CALL */
201
202
203
static int
204
add_opcode(regex_t* reg, int opcode)
205
90.0k
{
206
90.0k
  BBUF_ADD1(reg, opcode);
207
90.0k
  return 0;
208
90.0k
}
209
210
#ifdef USE_COMBINATION_EXPLOSION_CHECK
211
static int
212
add_state_check_num(regex_t* reg, int num)
213
{
214
  StateCheckNumType n = (StateCheckNumType )num;
215
216
  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217
  return 0;
218
}
219
#endif
220
221
static int
222
add_rel_addr(regex_t* reg, int addr)
223
25.0k
{
224
25.0k
  RelAddrType ra = (RelAddrType )addr;
225
226
25.0k
  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227
25.0k
  return 0;
228
25.0k
}
229
230
static int
231
add_abs_addr(regex_t* reg, int addr)
232
0
{
233
0
  AbsAddrType ra = (AbsAddrType )addr;
234
235
0
  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236
0
  return 0;
237
0
}
238
239
static int
240
add_length(regex_t* reg, OnigDistance len)
241
6.93k
{
242
6.93k
  LengthType l = (LengthType )len;
243
244
6.93k
  BBUF_ADD(reg, &l, SIZE_LENGTH);
245
6.93k
  return 0;
246
6.93k
}
247
248
static int
249
add_mem_num(regex_t* reg, int num)
250
3.85k
{
251
3.85k
  MemNumType n = (MemNumType )num;
252
253
3.85k
  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254
3.85k
  return 0;
255
3.85k
}
256
257
#if 0
258
static int
259
add_pointer(regex_t* reg, void* addr)
260
{
261
  PointerType ptr = (PointerType )addr;
262
263
  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264
  return 0;
265
}
266
#endif
267
268
static int
269
add_option(regex_t* reg, OnigOptionType option)
270
0
{
271
0
  BBUF_ADD(reg, &option, SIZE_OPTION);
272
0
  return 0;
273
0
}
274
275
static int
276
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277
25.0k
{
278
25.0k
  int r;
279
280
25.0k
  r = add_opcode(reg, opcode);
281
25.0k
  if (r) return r;
282
25.0k
  r = add_rel_addr(reg, addr);
283
25.0k
  return r;
284
25.0k
}
285
286
static int
287
add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288
16.9k
{
289
16.9k
  BBUF_ADD(reg, bytes, len);
290
16.9k
  return 0;
291
16.9k
}
292
293
static int
294
add_bitset(regex_t* reg, BitSetRef bs)
295
15.4k
{
296
15.4k
  BBUF_ADD(reg, bs, SIZE_BITSET);
297
15.4k
  return 0;
298
15.4k
}
299
300
static int
301
add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302
0
{
303
0
  int r;
304
0
305
0
  r = add_opcode(reg, opcode);
306
0
  if (r) return r;
307
0
  r = add_option(reg, option);
308
0
  return r;
309
0
}
310
311
static int compile_length_tree(Node* node, regex_t* reg);
312
static int compile_tree(Node* node, regex_t* reg);
313
314
315
#define IS_NEED_STR_LEN_OP_EXACT(op) \
316
28.8k
   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
317
28.8k
    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
318
319
static int
320
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321
28.8k
{
322
28.8k
  int op;
323
28.8k
  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325
28.8k
  if (ignore_case) {
326
0
    switch (str_len) {
327
0
    case 1:  op = OP_EXACT1_IC; break;
328
0
    default: op = OP_EXACTN_IC; break;
329
0
    }
330
0
  }
331
28.8k
  else {
332
28.8k
    switch (mb_len) {
333
28.8k
    case 1:
334
28.8k
      switch (str_len) {
335
9.24k
      case 1:  op = OP_EXACT1; break;
336
770
      case 2:  op = OP_EXACT2; break;
337
1.15k
      case 3:  op = OP_EXACT3; break;
338
1.54k
      case 4:  op = OP_EXACT4; break;
339
2.31k
      case 5:  op = OP_EXACT5; break;
340
13.8k
      default: op = OP_EXACTN; break;
341
28.8k
      }
342
28.8k
      break;
343
344
28.8k
    case 2:
345
0
      switch (str_len) {
346
0
      case 1:  op = OP_EXACTMB2N1; break;
347
0
      case 2:  op = OP_EXACTMB2N2; break;
348
0
      case 3:  op = OP_EXACTMB2N3; break;
349
0
      default: op = OP_EXACTMB2N;  break;
350
0
      }
351
0
      break;
352
353
0
    case 3:
354
0
      op = OP_EXACTMB3N;
355
0
      break;
356
357
0
    default:
358
0
      op = OP_EXACTMBN;
359
0
      break;
360
28.8k
    }
361
28.8k
  }
362
28.8k
  return op;
363
28.8k
}
364
365
static int
366
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367
4.23k
{
368
4.23k
  int r;
369
4.23k
  int saved_num_null_check = reg->num_null_check;
370
371
4.23k
  if (empty_info != 0) {
372
0
    r = add_opcode(reg, OP_NULL_CHECK_START);
373
0
    if (r) return r;
374
0
    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375
0
    if (r) return r;
376
0
    reg->num_null_check++;
377
0
  }
378
379
4.23k
  r = compile_tree(node, reg);
380
4.23k
  if (r) return r;
381
382
4.23k
  if (empty_info != 0) {
383
0
    if (empty_info == NQ_TARGET_IS_EMPTY)
384
0
      r = add_opcode(reg, OP_NULL_CHECK_END);
385
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390
0
    if (r) return r;
391
0
    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392
0
  }
393
4.23k
  return r;
394
4.23k
}
395
396
#ifdef USE_SUBEXP_CALL
397
static int
398
compile_call(CallNode* node, regex_t* reg)
399
0
{
400
0
  int r;
401
402
0
  r = add_opcode(reg, OP_CALL);
403
0
  if (r) return r;
404
0
  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405
0
                          node->target);
406
0
  if (r) return r;
407
0
  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408
0
  return r;
409
0
}
410
#endif
411
412
static int
413
compile_tree_n_times(Node* node, int n, regex_t* reg)
414
11.5k
{
415
11.5k
  int i, r;
416
417
18.4k
  for (i = 0; i < n; i++) {
418
6.93k
    r = compile_tree(node, reg);
419
6.93k
    if (r) return r;
420
6.93k
  }
421
11.5k
  return 0;
422
11.5k
}
423
424
static int
425
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426
                          regex_t* reg ARG_UNUSED, int ignore_case)
427
12.7k
{
428
12.7k
  int len;
429
12.7k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431
12.7k
  len = SIZE_OPCODE;
432
433
12.7k
  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
434
12.7k
  if (IS_NEED_STR_LEN_OP_EXACT(op))
435
6.93k
    len += SIZE_LENGTH;
436
437
12.7k
  len += (int )byte_len;
438
12.7k
  return len;
439
12.7k
}
440
441
static int
442
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443
                   regex_t* reg, int ignore_case)
444
16.1k
{
445
16.1k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446
16.1k
  add_opcode(reg, op);
447
448
16.1k
  if (op == OP_EXACTMBN)
449
0
    add_length(reg, mb_len);
450
451
16.1k
  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452
6.93k
    if (op == OP_EXACTN_IC)
453
0
      add_length(reg, byte_len);
454
6.93k
    else
455
6.93k
      add_length(reg, byte_len / mb_len);
456
6.93k
  }
457
458
16.1k
  add_bytes(reg, s, byte_len);
459
16.1k
  return 0;
460
16.1k
}
461
462
463
static int
464
compile_length_string_node(Node* node, regex_t* reg)
465
12.7k
{
466
12.7k
  int rlen, r, len, prev_len, blen, ambig;
467
12.7k
  OnigEncoding enc = reg->enc;
468
12.7k
  UChar *p, *prev;
469
12.7k
  StrNode* sn;
470
471
12.7k
  sn = NSTR(node);
472
12.7k
  if (sn->end <= sn->s)
473
0
    return 0;
474
475
12.7k
  ambig = NSTRING_IS_AMBIG(node);
476
477
12.7k
  p = prev = sn->s;
478
12.7k
  prev_len = enclen(enc, p, sn->end);
479
12.7k
  p += prev_len;
480
12.7k
  blen = prev_len;
481
12.7k
  rlen = 0;
482
483
91.2k
  for (; p < sn->end; ) {
484
78.5k
    len = enclen(enc, p, sn->end);
485
78.5k
    if (len == prev_len || ambig) {
486
78.5k
      blen += len;
487
78.5k
    }
488
0
    else {
489
0
      r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490
0
      rlen += r;
491
0
      prev = p;
492
0
      blen = len;
493
0
      prev_len = len;
494
0
    }
495
78.5k
    p += len;
496
78.5k
  }
497
12.7k
  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498
12.7k
  rlen += r;
499
12.7k
  return rlen;
500
12.7k
}
501
502
static int
503
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504
0
{
505
0
  if (sn->end <= sn->s)
506
0
    return 0;
507
508
0
  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509
0
}
510
511
static int
512
compile_string_node(Node* node, regex_t* reg)
513
16.5k
{
514
16.5k
  int r, len, prev_len, blen, ambig;
515
16.5k
  OnigEncoding enc = reg->enc;
516
16.5k
  UChar *p, *prev, *end;
517
16.5k
  StrNode* sn;
518
519
16.5k
  sn = NSTR(node);
520
16.5k
  if (sn->end <= sn->s)
521
385
    return 0;
522
523
16.1k
  end = sn->end;
524
16.1k
  ambig = NSTRING_IS_AMBIG(node);
525
526
16.1k
  p = prev = sn->s;
527
16.1k
  prev_len = enclen(enc, p, end);
528
16.1k
  p += prev_len;
529
16.1k
  blen = prev_len;
530
531
141k
  for (; p < end; ) {
532
125k
    len = enclen(enc, p, end);
533
125k
    if (len == prev_len || ambig) {
534
125k
      blen += len;
535
125k
    }
536
0
    else {
537
0
      r = add_compile_string(prev, prev_len, blen, reg, ambig);
538
0
      if (p + len > end) {
539
0
        return 0;
540
0
      }
541
0
      if (r) return r;
542
543
0
      prev  = p;
544
0
      blen  = len;
545
0
      prev_len = len;
546
0
    }
547
548
125k
    p += len;
549
125k
  }
550
16.1k
  return add_compile_string(prev, prev_len, blen, reg, ambig);
551
16.1k
}
552
553
static int
554
compile_string_raw_node(StrNode* sn, regex_t* reg)
555
0
{
556
0
  if (sn->end <= sn->s)
557
0
    return 0;
558
559
0
  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560
0
}
561
562
static int
563
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
564
0
{
565
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566
0
  add_length(reg, mbuf->used);
567
0
  return add_bytes(reg, mbuf->p, mbuf->used);
568
#else
569
  int r, pad_size;
570
  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
571
572
  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573
  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575
576
  r = add_bytes(reg, mbuf->p, mbuf->used);
577
578
  /* padding for return value from compile_length_cclass_node() to be fix. */
579
  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581
  return r;
582
#endif
583
0
}
584
585
static int
586
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
587
12.7k
{
588
12.7k
  int len;
589
590
12.7k
  if (IS_NULL(cc->mbuf)) {
591
12.7k
    len = SIZE_OPCODE + SIZE_BITSET;
592
12.7k
  }
593
0
  else {
594
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595
0
      len = SIZE_OPCODE;
596
0
    }
597
0
    else {
598
0
      len = SIZE_OPCODE + SIZE_BITSET;
599
0
    }
600
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601
0
    len += SIZE_LENGTH + cc->mbuf->used;
602
#else
603
    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604
#endif
605
0
  }
606
607
12.7k
  return len;
608
12.7k
}
609
610
static int
611
compile_cclass_node(CClassNode* cc, regex_t* reg)
612
15.4k
{
613
15.4k
  int r;
614
615
15.4k
  if (IS_NULL(cc->mbuf)) {
616
15.4k
    if (IS_NCCLASS_NOT(cc))
617
4.23k
      add_opcode(reg, OP_CCLASS_NOT);
618
11.1k
    else
619
11.1k
      add_opcode(reg, OP_CCLASS);
620
621
15.4k
    r = add_bitset(reg, cc->bs);
622
15.4k
  }
623
0
  else {
624
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
625
0
      if (IS_NCCLASS_NOT(cc))
626
0
  add_opcode(reg, OP_CCLASS_MB_NOT);
627
0
      else
628
0
  add_opcode(reg, OP_CCLASS_MB);
629
630
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
631
0
    }
632
0
    else {
633
0
      if (IS_NCCLASS_NOT(cc))
634
0
  add_opcode(reg, OP_CCLASS_MIX_NOT);
635
0
      else
636
0
  add_opcode(reg, OP_CCLASS_MIX);
637
638
0
      r = add_bitset(reg, cc->bs);
639
0
      if (r) return r;
640
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
641
0
    }
642
0
  }
643
644
15.4k
  return r;
645
15.4k
}
646
647
static int
648
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
649
0
{
650
0
#define REPEAT_RANGE_ALLOC  4
651
652
0
  OnigRepeatRange* p;
653
654
0
  if (reg->repeat_range_alloc == 0) {
655
0
    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
656
0
    CHECK_NULL_RETURN_MEMERR(p);
657
0
    reg->repeat_range = p;
658
0
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
659
0
  }
660
0
  else if (reg->repeat_range_alloc <= id) {
661
0
    int n;
662
0
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
663
0
    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
664
0
                                    sizeof(OnigRepeatRange) * n);
665
0
    CHECK_NULL_RETURN_MEMERR(p);
666
0
    reg->repeat_range = p;
667
0
    reg->repeat_range_alloc = n;
668
0
  }
669
0
  else {
670
0
    p = reg->repeat_range;
671
0
  }
672
673
0
  p[id].lower = lower;
674
0
  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
675
0
  return 0;
676
0
}
677
678
static int
679
compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
680
                          regex_t* reg)
681
0
{
682
0
  int r;
683
0
  int num_repeat = reg->num_repeat;
684
685
0
  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
686
0
  if (r) return r;
687
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
688
0
  reg->num_repeat++;
689
0
  if (r) return r;
690
0
  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
691
0
  if (r) return r;
692
693
0
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
694
0
  if (r) return r;
695
696
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
697
0
  if (r) return r;
698
699
0
  if (
700
0
#ifdef USE_SUBEXP_CALL
701
0
      reg->num_call > 0 ||
702
0
#endif
703
0
      IS_QUANTIFIER_IN_REPEAT(qn)) {
704
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
705
0
  }
706
0
  else {
707
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
708
0
  }
709
0
  if (r) return r;
710
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
711
0
  return r;
712
0
}
713
714
static int
715
is_anychar_star_quantifier(QtfrNode* qn)
716
6.54k
{
717
6.54k
  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
718
6.54k
      NTYPE(qn->target) == NT_CANY)
719
1.92k
    return 1;
720
4.62k
  else
721
4.62k
    return 0;
722
6.54k
}
723
724
4.23k
#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
725
#define CKN_ON   (ckn > 0)
726
727
#ifdef USE_COMBINATION_EXPLOSION_CHECK
728
729
static int
730
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
731
{
732
  int len, mod_tlen, cklen;
733
  int ckn;
734
  int infinite = IS_REPEAT_INFINITE(qn->upper);
735
  int empty_info = qn->target_empty_info;
736
  int tlen = compile_length_tree(qn->target, reg);
737
738
  if (tlen < 0) return tlen;
739
740
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
741
742
  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
743
744
  /* anychar repeat */
745
  if (NTYPE(qn->target) == NT_CANY) {
746
    if (qn->greedy && infinite) {
747
      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
748
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
749
      else
750
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
751
    }
752
  }
753
754
  if (empty_info != 0)
755
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
756
  else
757
    mod_tlen = tlen;
758
759
  if (infinite && qn->lower <= 1) {
760
    if (qn->greedy) {
761
      if (qn->lower == 1)
762
  len = SIZE_OP_JUMP;
763
      else
764
  len = 0;
765
766
      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
767
    }
768
    else {
769
      if (qn->lower == 0)
770
  len = SIZE_OP_JUMP;
771
      else
772
  len = 0;
773
774
      len += mod_tlen + SIZE_OP_PUSH + cklen;
775
    }
776
  }
777
  else if (qn->upper == 0) {
778
    if (qn->is_referred != 0) /* /(?<n>..){0}/ */
779
      len = SIZE_OP_JUMP + tlen;
780
    else
781
      len = 0;
782
  }
783
  else if (qn->upper == 1 && qn->greedy) {
784
    if (qn->lower == 0) {
785
      if (CKN_ON) {
786
  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
787
      }
788
      else {
789
  len = SIZE_OP_PUSH + tlen;
790
      }
791
    }
792
    else {
793
      len = tlen;
794
    }
795
  }
796
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
797
    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
798
  }
799
  else {
800
    len = SIZE_OP_REPEAT_INC
801
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
802
    if (CKN_ON)
803
      len += SIZE_OP_STATE_CHECK;
804
  }
805
806
  return len;
807
}
808
809
static int
810
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
811
{
812
  int r, mod_tlen;
813
  int ckn;
814
  int infinite = IS_REPEAT_INFINITE(qn->upper);
815
  int empty_info = qn->target_empty_info;
816
  int tlen = compile_length_tree(qn->target, reg);
817
818
  if (tlen < 0) return tlen;
819
820
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
821
822
  if (is_anychar_star_quantifier(qn)) {
823
    r = compile_tree_n_times(qn->target, qn->lower, reg);
824
    if (r) return r;
825
    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
826
      if (IS_MULTILINE(reg->options))
827
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
828
      else
829
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
830
      if (r) return r;
831
      if (CKN_ON) {
832
  r = add_state_check_num(reg, ckn);
833
  if (r) return r;
834
      }
835
836
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
837
    }
838
    else {
839
      if (IS_MULTILINE(reg->options)) {
840
  r = add_opcode(reg, (CKN_ON ?
841
             OP_STATE_CHECK_ANYCHAR_ML_STAR
842
           : OP_ANYCHAR_ML_STAR));
843
      }
844
      else {
845
  r = add_opcode(reg, (CKN_ON ?
846
             OP_STATE_CHECK_ANYCHAR_STAR
847
           : OP_ANYCHAR_STAR));
848
      }
849
      if (r) return r;
850
      if (CKN_ON)
851
  r = add_state_check_num(reg, ckn);
852
853
      return r;
854
    }
855
  }
856
857
  if (empty_info != 0)
858
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
859
  else
860
    mod_tlen = tlen;
861
862
  if (infinite && qn->lower <= 1) {
863
    if (qn->greedy) {
864
      if (qn->lower == 1) {
865
  r = add_opcode_rel_addr(reg, OP_JUMP,
866
      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
867
  if (r) return r;
868
      }
869
870
      if (CKN_ON) {
871
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
872
  if (r) return r;
873
  r = add_state_check_num(reg, ckn);
874
  if (r) return r;
875
  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
876
      }
877
      else {
878
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
879
      }
880
      if (r) return r;
881
      r = compile_tree_empty_check(qn->target, reg, empty_info);
882
      if (r) return r;
883
      r = add_opcode_rel_addr(reg, OP_JUMP,
884
        -(mod_tlen + (int )SIZE_OP_JUMP
885
    + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
886
    }
887
    else {
888
      if (qn->lower == 0) {
889
  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
890
  if (r) return r;
891
      }
892
      r = compile_tree_empty_check(qn->target, reg, empty_info);
893
      if (r) return r;
894
      if (CKN_ON) {
895
  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
896
  if (r) return r;
897
  r = add_state_check_num(reg, ckn);
898
  if (r) return r;
899
  r = add_rel_addr(reg,
900
     -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
901
      }
902
      else
903
  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
904
    }
905
  }
906
  else if (qn->upper == 0) {
907
    if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
908
      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
909
      if (r) return r;
910
      r = compile_tree(qn->target, reg);
911
    }
912
    else
913
      r = 0;
914
  }
915
  else if (qn->upper == 1 && qn->greedy) {
916
    if (qn->lower == 0) {
917
      if (CKN_ON) {
918
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
919
  if (r) return r;
920
  r = add_state_check_num(reg, ckn);
921
  if (r) return r;
922
  r = add_rel_addr(reg, tlen);
923
      }
924
      else {
925
  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
926
      }
927
      if (r) return r;
928
    }
929
930
    r = compile_tree(qn->target, reg);
931
  }
932
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
933
    if (CKN_ON) {
934
      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
935
      if (r) return r;
936
      r = add_state_check_num(reg, ckn);
937
      if (r) return r;
938
      r = add_rel_addr(reg, SIZE_OP_JUMP);
939
    }
940
    else {
941
      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
942
    }
943
944
    if (r) return r;
945
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
946
    if (r) return r;
947
    r = compile_tree(qn->target, reg);
948
  }
949
  else {
950
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
951
    if (CKN_ON) {
952
      if (r) return r;
953
      r = add_opcode(reg, OP_STATE_CHECK);
954
      if (r) return r;
955
      r = add_state_check_num(reg, ckn);
956
    }
957
  }
958
  return r;
959
}
960
961
#else /* USE_COMBINATION_EXPLOSION_CHECK */
962
963
static int
964
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
965
3.08k
{
966
3.08k
  int len, mod_tlen;
967
3.08k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
968
3.08k
  int empty_info = qn->target_empty_info;
969
3.08k
  int tlen = compile_length_tree(qn->target, reg);
970
971
3.08k
  if (tlen < 0) return tlen;
972
973
  /* anychar repeat */
974
3.08k
  if (NTYPE(qn->target) == NT_CANY) {
975
0
    if (qn->greedy && infinite) {
976
0
      if (IS_NOT_NULL(qn->next_head_exact))
977
0
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
978
0
      else
979
0
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
980
0
    }
981
0
  }
982
983
3.08k
  if (empty_info != 0)
984
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
985
3.08k
  else
986
3.08k
    mod_tlen = tlen;
987
988
3.08k
  if (infinite &&
989
3.08k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
990
3.08k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
991
0
      len = SIZE_OP_JUMP;
992
0
    }
993
3.08k
    else {
994
3.08k
      len = tlen * qn->lower;
995
3.08k
    }
996
997
3.08k
    if (qn->greedy) {
998
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
999
      if (IS_NOT_NULL(qn->head_exact))
1000
  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1001
      else
1002
#endif
1003
3.08k
      if (IS_NOT_NULL(qn->next_head_exact))
1004
2.31k
  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1005
770
      else
1006
770
  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1007
3.08k
    }
1008
0
    else
1009
0
      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1010
3.08k
  }
1011
0
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1012
0
    len = SIZE_OP_JUMP + tlen;
1013
0
  }
1014
0
  else if (!infinite && qn->greedy &&
1015
0
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1016
0
                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1017
0
    len = tlen * qn->lower;
1018
0
    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1019
0
  }
1020
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1021
0
    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1022
0
  }
1023
0
  else {
1024
0
    len = SIZE_OP_REPEAT_INC
1025
0
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1026
0
  }
1027
1028
3.08k
  return len;
1029
3.08k
}
1030
1031
static int
1032
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1033
6.54k
{
1034
6.54k
  int i, r, mod_tlen;
1035
6.54k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
1036
6.54k
  int empty_info = qn->target_empty_info;
1037
6.54k
  int tlen = compile_length_tree(qn->target, reg);
1038
1039
6.54k
  if (tlen < 0) return tlen;
1040
1041
6.54k
  if (is_anychar_star_quantifier(qn)) {
1042
1.92k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1043
1.92k
    if (r) return r;
1044
1.92k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1045
770
      if (IS_MULTILINE(reg->options))
1046
0
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1047
770
      else
1048
770
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1049
770
      if (r) return r;
1050
770
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1051
770
    }
1052
1.15k
    else {
1053
1.15k
      if (IS_MULTILINE(reg->options))
1054
0
  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1055
1.15k
      else
1056
1.15k
  return add_opcode(reg, OP_ANYCHAR_STAR);
1057
1.15k
    }
1058
1.92k
  }
1059
1060
4.62k
  if (empty_info != 0)
1061
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1062
4.62k
  else
1063
4.62k
    mod_tlen = tlen;
1064
1065
4.62k
  if (infinite &&
1066
4.62k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1067
4.23k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1068
0
      if (qn->greedy) {
1069
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1070
  if (IS_NOT_NULL(qn->head_exact))
1071
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1072
  else
1073
#endif
1074
0
  if (IS_NOT_NULL(qn->next_head_exact))
1075
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1076
0
  else
1077
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1078
0
      }
1079
0
      else {
1080
0
  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1081
0
      }
1082
0
      if (r) return r;
1083
0
    }
1084
4.23k
    else {
1085
4.23k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1086
4.23k
      if (r) return r;
1087
4.23k
    }
1088
1089
4.23k
    if (qn->greedy) {
1090
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1091
      if (IS_NOT_NULL(qn->head_exact)) {
1092
  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093
           mod_tlen + SIZE_OP_JUMP);
1094
  if (r) return r;
1095
  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097
  if (r) return r;
1098
  r = add_opcode_rel_addr(reg, OP_JUMP,
1099
  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100
      }
1101
      else
1102
#endif
1103
3.85k
      if (IS_NOT_NULL(qn->next_head_exact)) {
1104
0
  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1105
0
        mod_tlen + SIZE_OP_JUMP);
1106
0
  if (r) return r;
1107
0
  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1108
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1109
0
  if (r) return r;
1110
0
  r = add_opcode_rel_addr(reg, OP_JUMP,
1111
0
          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1112
0
      }
1113
3.85k
      else {
1114
3.85k
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1115
3.85k
  if (r) return r;
1116
3.85k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1117
3.85k
  if (r) return r;
1118
3.85k
  r = add_opcode_rel_addr(reg, OP_JUMP,
1119
3.85k
         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1120
3.85k
      }
1121
3.85k
    }
1122
385
    else {
1123
385
      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1124
385
      if (r) return r;
1125
385
      r = compile_tree_empty_check(qn->target, reg, empty_info);
1126
385
      if (r) return r;
1127
385
      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1128
385
    }
1129
4.23k
  }
1130
385
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1131
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1132
0
    if (r) return r;
1133
0
    r = compile_tree(qn->target, reg);
1134
0
  }
1135
385
  else if (!infinite && qn->greedy &&
1136
385
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1137
385
                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1138
385
    int n = qn->upper - qn->lower;
1139
1140
385
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1141
385
    if (r) return r;
1142
1143
770
    for (i = 0; i < n; i++) {
1144
385
      r = add_opcode_rel_addr(reg, OP_PUSH,
1145
385
         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1146
385
      if (r) return r;
1147
385
      r = compile_tree(qn->target, reg);
1148
385
      if (r) return r;
1149
385
    }
1150
385
  }
1151
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1152
0
    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1153
0
    if (r) return r;
1154
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1155
0
    if (r) return r;
1156
0
    r = compile_tree(qn->target, reg);
1157
0
  }
1158
0
  else {
1159
0
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1160
0
  }
1161
4.62k
  return r;
1162
4.62k
}
1163
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164
1165
static int
1166
compile_length_option_node(EncloseNode* node, regex_t* reg)
1167
0
{
1168
0
  int tlen;
1169
0
  OnigOptionType prev = reg->options;
1170
1171
0
  reg->options = node->option;
1172
0
  tlen = compile_length_tree(node->target, reg);
1173
0
  reg->options = prev;
1174
1175
0
  if (tlen < 0) return tlen;
1176
1177
0
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178
0
    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1179
0
           + tlen + SIZE_OP_SET_OPTION;
1180
0
  }
1181
0
  else
1182
0
    return tlen;
1183
0
}
1184
1185
static int
1186
compile_option_node(EncloseNode* node, regex_t* reg)
1187
385
{
1188
385
  int r;
1189
385
  OnigOptionType prev = reg->options;
1190
1191
385
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1192
0
    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1193
0
    if (r) return r;
1194
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1195
0
    if (r) return r;
1196
0
    r = add_opcode(reg, OP_FAIL);
1197
0
    if (r) return r;
1198
0
  }
1199
1200
385
  reg->options = node->option;
1201
385
  r = compile_tree(node->target, reg);
1202
385
  reg->options = prev;
1203
1204
385
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1205
0
    if (r) return r;
1206
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1207
0
  }
1208
385
  return r;
1209
385
}
1210
1211
static int
1212
compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1213
2.31k
{
1214
2.31k
  int len;
1215
2.31k
  int tlen;
1216
1217
2.31k
  if (node->type == ENCLOSE_OPTION)
1218
0
    return compile_length_option_node(node, reg);
1219
1220
2.31k
  if (node->target) {
1221
2.31k
    tlen = compile_length_tree(node->target, reg);
1222
2.31k
    if (tlen < 0) return tlen;
1223
2.31k
  }
1224
0
  else
1225
0
    tlen = 0;
1226
1227
2.31k
  switch (node->type) {
1228
0
  case ENCLOSE_MEMORY:
1229
0
#ifdef USE_SUBEXP_CALL
1230
0
    if (IS_ENCLOSE_CALLED(node)) {
1231
0
      len = SIZE_OP_MEMORY_START_PUSH + tlen
1232
0
    + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1233
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1234
0
  len += (IS_ENCLOSE_RECURSION(node)
1235
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1236
0
      else
1237
0
  len += (IS_ENCLOSE_RECURSION(node)
1238
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1239
0
    }
1240
0
    else if (IS_ENCLOSE_RECURSION(node)) {
1241
0
      len = SIZE_OP_MEMORY_START_PUSH;
1242
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1243
0
         ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1244
0
    }
1245
0
    else
1246
0
#endif
1247
0
    {
1248
0
      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1249
0
  len = SIZE_OP_MEMORY_START_PUSH;
1250
0
      else
1251
0
  len = SIZE_OP_MEMORY_START;
1252
1253
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1254
0
         ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1255
0
    }
1256
0
    break;
1257
1258
2.31k
  case ENCLOSE_STOP_BACKTRACK:
1259
2.31k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1260
2.31k
      QtfrNode* qn = NQTFR(node->target);
1261
2.31k
      tlen = compile_length_tree(qn->target, reg);
1262
2.31k
      if (tlen < 0) return tlen;
1263
1264
2.31k
      len = tlen * qn->lower
1265
2.31k
    + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1266
2.31k
    }
1267
0
    else {
1268
0
      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1269
0
    }
1270
2.31k
    break;
1271
1272
2.31k
  case ENCLOSE_CONDITION:
1273
0
    len = SIZE_OP_CONDITION;
1274
0
    if (NTYPE(node->target) == NT_ALT) {
1275
0
      Node* x = node->target;
1276
1277
0
      tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1278
0
      if (tlen < 0) return tlen;
1279
0
      len += tlen + SIZE_OP_JUMP;
1280
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1281
0
      x = NCDR(x);
1282
0
      tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1283
0
      if (tlen < 0) return tlen;
1284
0
      len += tlen;
1285
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1286
0
    }
1287
0
    else {
1288
0
      return ONIGERR_PARSER_BUG;
1289
0
    }
1290
0
    break;
1291
1292
0
  case ENCLOSE_ABSENT:
1293
0
    len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1294
0
    break;
1295
1296
0
  default:
1297
0
    return ONIGERR_TYPE_BUG;
1298
0
    break;
1299
2.31k
  }
1300
1301
2.31k
  return len;
1302
2.31k
}
1303
1304
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1305
1306
static int
1307
compile_enclose_node(EncloseNode* node, regex_t* reg)
1308
7.31k
{
1309
7.31k
  int r, len;
1310
1311
7.31k
  if (node->type == ENCLOSE_OPTION)
1312
385
    return compile_option_node(node, reg);
1313
1314
6.93k
  switch (node->type) {
1315
1.92k
  case ENCLOSE_MEMORY:
1316
1.92k
#ifdef USE_SUBEXP_CALL
1317
1.92k
    if (IS_ENCLOSE_CALLED(node)) {
1318
0
      r = add_opcode(reg, OP_CALL);
1319
0
      if (r) return r;
1320
0
      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1321
0
      node->state |= NST_ADDR_FIXED;
1322
0
      r = add_abs_addr(reg, (int )node->call_addr);
1323
0
      if (r) return r;
1324
0
      len = compile_length_tree(node->target, reg);
1325
0
      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1326
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1327
0
  len += (IS_ENCLOSE_RECURSION(node)
1328
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1329
0
      else
1330
0
  len += (IS_ENCLOSE_RECURSION(node)
1331
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1332
1333
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1334
0
      if (r) return r;
1335
0
    }
1336
1.92k
#endif
1337
1.92k
    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1338
0
      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1339
1.92k
    else
1340
1.92k
      r = add_opcode(reg, OP_MEMORY_START);
1341
1.92k
    if (r) return r;
1342
1.92k
    r = add_mem_num(reg, node->regnum);
1343
1.92k
    if (r) return r;
1344
1.92k
    r = compile_tree(node->target, reg);
1345
1.92k
    if (r) return r;
1346
1.92k
#ifdef USE_SUBEXP_CALL
1347
1.92k
    if (IS_ENCLOSE_CALLED(node)) {
1348
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1349
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350
0
           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1351
0
      else
1352
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1353
0
           ? OP_MEMORY_END_REC : OP_MEMORY_END));
1354
1355
0
      if (r) return r;
1356
0
      r = add_mem_num(reg, node->regnum);
1357
0
      if (r) return r;
1358
0
      r = add_opcode(reg, OP_RETURN);
1359
0
    }
1360
1.92k
    else if (IS_ENCLOSE_RECURSION(node)) {
1361
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1362
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1363
0
      else
1364
0
  r = add_opcode(reg, OP_MEMORY_END_REC);
1365
0
      if (r) return r;
1366
0
      r = add_mem_num(reg, node->regnum);
1367
0
    }
1368
1.92k
    else
1369
1.92k
#endif
1370
1.92k
    {
1371
1.92k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1372
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1373
1.92k
      else
1374
1.92k
  r = add_opcode(reg, OP_MEMORY_END);
1375
1.92k
      if (r) return r;
1376
1.92k
      r = add_mem_num(reg, node->regnum);
1377
1.92k
    }
1378
1.92k
    break;
1379
1380
5.00k
  case ENCLOSE_STOP_BACKTRACK:
1381
5.00k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1382
5.00k
      QtfrNode* qn = NQTFR(node->target);
1383
5.00k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1384
5.00k
      if (r) return r;
1385
1386
5.00k
      len = compile_length_tree(qn->target, reg);
1387
5.00k
      if (len < 0) return len;
1388
1389
5.00k
      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1390
5.00k
      if (r) return r;
1391
5.00k
      r = compile_tree(qn->target, reg);
1392
5.00k
      if (r) return r;
1393
5.00k
      r = add_opcode(reg, OP_POP);
1394
5.00k
      if (r) return r;
1395
5.00k
      r = add_opcode_rel_addr(reg, OP_JUMP,
1396
5.00k
   -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1397
5.00k
    }
1398
0
    else {
1399
0
      r = add_opcode(reg, OP_PUSH_STOP_BT);
1400
0
      if (r) return r;
1401
0
      r = compile_tree(node->target, reg);
1402
0
      if (r) return r;
1403
0
      r = add_opcode(reg, OP_POP_STOP_BT);
1404
0
    }
1405
5.00k
    break;
1406
1407
5.00k
  case ENCLOSE_CONDITION:
1408
0
    r = add_opcode(reg, OP_CONDITION);
1409
0
    if (r) return r;
1410
0
    r = add_mem_num(reg, node->regnum);
1411
0
    if (r) return r;
1412
1413
0
    if (NTYPE(node->target) == NT_ALT) {
1414
0
      Node* x = node->target;
1415
0
      int len2;
1416
1417
0
      len = compile_length_tree(NCAR(x), reg);  /* yes-node */
1418
0
      if (len < 0) return len;
1419
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1420
0
      x = NCDR(x);
1421
0
      len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1422
0
      if (len2 < 0) return len2;
1423
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1424
1425
0
      x = node->target;
1426
0
      r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1427
0
      if (r) return r;
1428
0
      r = compile_tree(NCAR(x), reg);   /* yes-node */
1429
0
      if (r) return r;
1430
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1431
0
      if (r) return r;
1432
0
      x = NCDR(x);
1433
0
      r = compile_tree(NCAR(x), reg);   /* no-node */
1434
0
    }
1435
0
    else {
1436
0
      return ONIGERR_PARSER_BUG;
1437
0
    }
1438
0
    break;
1439
1440
0
  case ENCLOSE_ABSENT:
1441
0
    len = compile_length_tree(node->target, reg);
1442
0
    if (len < 0) return len;
1443
1444
0
    r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1445
0
    if (r) return r;
1446
0
    r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1447
0
    if (r) return r;
1448
0
    r = compile_tree(node->target, reg);
1449
0
    if (r) return r;
1450
0
    r = add_opcode(reg, OP_ABSENT_END);
1451
0
    break;
1452
1453
0
  default:
1454
0
    return ONIGERR_TYPE_BUG;
1455
0
    break;
1456
6.93k
  }
1457
1458
6.93k
  return r;
1459
6.93k
}
1460
1461
static int
1462
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1463
1.54k
{
1464
1.54k
  int len;
1465
1.54k
  int tlen = 0;
1466
1467
1.54k
  if (node->target) {
1468
0
    tlen = compile_length_tree(node->target, reg);
1469
0
    if (tlen < 0) return tlen;
1470
0
  }
1471
1472
1.54k
  switch (node->type) {
1473
0
  case ANCHOR_PREC_READ:
1474
0
    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1475
0
    break;
1476
0
  case ANCHOR_PREC_READ_NOT:
1477
0
    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1478
0
    break;
1479
0
  case ANCHOR_LOOK_BEHIND:
1480
0
    len = SIZE_OP_LOOK_BEHIND + tlen;
1481
0
    break;
1482
0
  case ANCHOR_LOOK_BEHIND_NOT:
1483
0
    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1484
0
    break;
1485
1486
1.54k
  default:
1487
1.54k
    len = SIZE_OPCODE;
1488
1.54k
    break;
1489
1.54k
  }
1490
1491
1.54k
  return len;
1492
1.54k
}
1493
1494
static int
1495
compile_anchor_node(AnchorNode* node, regex_t* reg)
1496
11.1k
{
1497
11.1k
  int r, len;
1498
1499
11.1k
  switch (node->type) {
1500
0
  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1501
0
  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1502
7.70k
  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1503
3.08k
  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1504
0
  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1505
0
  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1506
1507
385
  case ANCHOR_WORD_BOUND:
1508
385
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1509
385
    else                      r = add_opcode(reg, OP_WORD_BOUND);
1510
385
    break;
1511
0
  case ANCHOR_NOT_WORD_BOUND:
1512
0
    if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1513
0
    else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
1514
0
    break;
1515
0
#ifdef USE_WORD_BEGIN_END
1516
0
  case ANCHOR_WORD_BEGIN:
1517
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1518
0
    else                      r = add_opcode(reg, OP_WORD_BEGIN);
1519
0
    break;
1520
0
  case ANCHOR_WORD_END:
1521
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
1522
0
    else                      r = add_opcode(reg, OP_WORD_END);
1523
0
    break;
1524
0
#endif
1525
0
  case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
1526
1527
0
  case ANCHOR_PREC_READ:
1528
0
    r = add_opcode(reg, OP_PUSH_POS);
1529
0
    if (r) return r;
1530
0
    r = compile_tree(node->target, reg);
1531
0
    if (r) return r;
1532
0
    r = add_opcode(reg, OP_POP_POS);
1533
0
    break;
1534
1535
0
  case ANCHOR_PREC_READ_NOT:
1536
0
    len = compile_length_tree(node->target, reg);
1537
0
    if (len < 0) return len;
1538
0
    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1539
0
    if (r) return r;
1540
0
    r = compile_tree(node->target, reg);
1541
0
    if (r) return r;
1542
0
    r = add_opcode(reg, OP_FAIL_POS);
1543
0
    break;
1544
1545
0
  case ANCHOR_LOOK_BEHIND:
1546
0
    {
1547
0
      int n;
1548
0
      r = add_opcode(reg, OP_LOOK_BEHIND);
1549
0
      if (r) return r;
1550
0
      if (node->char_len < 0) {
1551
0
  r = get_char_length_tree(node->target, reg, &n);
1552
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1553
0
      }
1554
0
      else
1555
0
  n = node->char_len;
1556
0
      r = add_length(reg, n);
1557
0
      if (r) return r;
1558
0
      r = compile_tree(node->target, reg);
1559
0
    }
1560
0
    break;
1561
1562
0
  case ANCHOR_LOOK_BEHIND_NOT:
1563
0
    {
1564
0
      int n;
1565
0
      len = compile_length_tree(node->target, reg);
1566
0
      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1567
0
         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1568
0
      if (r) return r;
1569
0
      if (node->char_len < 0) {
1570
0
  r = get_char_length_tree(node->target, reg, &n);
1571
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1572
0
      }
1573
0
      else
1574
0
  n = node->char_len;
1575
0
      r = add_length(reg, n);
1576
0
      if (r) return r;
1577
0
      r = compile_tree(node->target, reg);
1578
0
      if (r) return r;
1579
0
      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1580
0
    }
1581
0
    break;
1582
1583
0
  default:
1584
0
    return ONIGERR_TYPE_BUG;
1585
0
    break;
1586
11.1k
  }
1587
1588
11.1k
  return r;
1589
11.1k
}
1590
1591
static int
1592
compile_length_tree(Node* node, regex_t* reg)
1593
37.7k
{
1594
37.7k
  int len, type, r;
1595
1596
37.7k
  type = NTYPE(node);
1597
37.7k
  switch (type) {
1598
3.08k
  case NT_LIST:
1599
3.08k
    len = 0;
1600
7.70k
    do {
1601
7.70k
      r = compile_length_tree(NCAR(node), reg);
1602
7.70k
      if (r < 0) return r;
1603
7.70k
      len += r;
1604
7.70k
    } while (IS_NOT_NULL(node = NCDR(node)));
1605
3.08k
    r = len;
1606
3.08k
    break;
1607
1608
0
  case NT_ALT:
1609
0
    {
1610
0
      int n = 0;
1611
0
      len = 0;
1612
0
      do {
1613
0
  r = compile_length_tree(NCAR(node), reg);
1614
0
  if (r < 0) return r;
1615
0
  len += r;
1616
0
  n++;
1617
0
      } while (IS_NOT_NULL(node = NCDR(node)));
1618
0
      r = len;
1619
0
      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1620
0
    }
1621
0
    break;
1622
1623
12.7k
  case NT_STR:
1624
12.7k
    if (NSTRING_IS_RAW(node))
1625
0
      r = compile_length_string_raw_node(NSTR(node), reg);
1626
12.7k
    else
1627
12.7k
      r = compile_length_string_node(node, reg);
1628
12.7k
    break;
1629
1630
12.7k
  case NT_CCLASS:
1631
12.7k
    r = compile_length_cclass_node(NCCLASS(node), reg);
1632
12.7k
    break;
1633
1634
0
  case NT_CTYPE:
1635
2.31k
  case NT_CANY:
1636
2.31k
    r = SIZE_OPCODE;
1637
2.31k
    break;
1638
1639
0
  case NT_BREF:
1640
0
    {
1641
0
      BRefNode* br = NBREF(node);
1642
1643
0
#ifdef USE_BACKREF_WITH_LEVEL
1644
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1645
0
  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1646
0
            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1647
0
      }
1648
0
      else
1649
0
#endif
1650
0
      if (br->back_num == 1) {
1651
0
  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1652
0
       ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1653
0
      }
1654
0
      else {
1655
0
  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1656
0
      }
1657
0
    }
1658
0
    break;
1659
1660
0
#ifdef USE_SUBEXP_CALL
1661
0
  case NT_CALL:
1662
0
    r = SIZE_OP_CALL;
1663
0
    break;
1664
0
#endif
1665
1666
3.08k
  case NT_QTFR:
1667
3.08k
    r = compile_length_quantifier_node(NQTFR(node), reg);
1668
3.08k
    break;
1669
1670
2.31k
  case NT_ENCLOSE:
1671
2.31k
    r = compile_length_enclose_node(NENCLOSE(node), reg);
1672
2.31k
    break;
1673
1674
1.54k
  case NT_ANCHOR:
1675
1.54k
    r = compile_length_anchor_node(NANCHOR(node), reg);
1676
1.54k
    break;
1677
1678
0
  default:
1679
0
    return ONIGERR_TYPE_BUG;
1680
0
    break;
1681
37.7k
  }
1682
1683
37.7k
  return r;
1684
37.7k
}
1685
1686
static int
1687
compile_tree(Node* node, regex_t* reg)
1688
71.2k
{
1689
71.2k
  int n, type, len, pos, r = 0;
1690
1691
71.2k
  type = NTYPE(node);
1692
71.2k
  switch (type) {
1693
9.24k
  case NT_LIST:
1694
38.1k
    do {
1695
38.1k
      r = compile_tree(NCAR(node), reg);
1696
38.1k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1697
9.24k
    break;
1698
1699
2.31k
  case NT_ALT:
1700
2.31k
    {
1701
2.31k
      Node* x = node;
1702
2.31k
      len = 0;
1703
5.39k
      do {
1704
5.39k
  len += compile_length_tree(NCAR(x), reg);
1705
5.39k
  if (NCDR(x) != NULL) {
1706
3.08k
    len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1707
3.08k
  }
1708
5.39k
      } while (IS_NOT_NULL(x = NCDR(x)));
1709
2.31k
      pos = reg->used + len;  /* goal position */
1710
1711
5.39k
      do {
1712
5.39k
  len = compile_length_tree(NCAR(node), reg);
1713
5.39k
  if (IS_NOT_NULL(NCDR(node))) {
1714
3.08k
    r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1715
3.08k
    if (r) break;
1716
3.08k
  }
1717
5.39k
  r = compile_tree(NCAR(node), reg);
1718
5.39k
  if (r) break;
1719
5.39k
  if (IS_NOT_NULL(NCDR(node))) {
1720
3.08k
    len = pos - (reg->used + SIZE_OP_JUMP);
1721
3.08k
    r = add_opcode_rel_addr(reg, OP_JUMP, len);
1722
3.08k
    if (r) break;
1723
3.08k
  }
1724
5.39k
      } while (IS_NOT_NULL(node = NCDR(node)));
1725
2.31k
    }
1726
0
    break;
1727
1728
16.5k
  case NT_STR:
1729
16.5k
    if (NSTRING_IS_RAW(node))
1730
0
      r = compile_string_raw_node(NSTR(node), reg);
1731
16.5k
    else
1732
16.5k
      r = compile_string_node(node, reg);
1733
16.5k
    break;
1734
1735
15.4k
  case NT_CCLASS:
1736
15.4k
    r = compile_cclass_node(NCCLASS(node), reg);
1737
15.4k
    break;
1738
1739
0
  case NT_CTYPE:
1740
0
    {
1741
0
      int op;
1742
1743
0
      switch (NCTYPE(node)->ctype) {
1744
0
      case ONIGENC_CTYPE_WORD:
1745
0
  if (NCTYPE(node)->ascii_range != 0) {
1746
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
1747
0
    else                         op = OP_ASCII_WORD;
1748
0
  }
1749
0
  else {
1750
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1751
0
    else                         op = OP_WORD;
1752
0
  }
1753
0
  break;
1754
0
      default:
1755
0
  return ONIGERR_TYPE_BUG;
1756
0
  break;
1757
0
      }
1758
0
      r = add_opcode(reg, op);
1759
0
    }
1760
0
    break;
1761
1762
2.69k
  case NT_CANY:
1763
2.69k
    if (IS_MULTILINE(reg->options))
1764
0
      r = add_opcode(reg, OP_ANYCHAR_ML);
1765
2.69k
    else
1766
2.69k
      r = add_opcode(reg, OP_ANYCHAR);
1767
2.69k
    break;
1768
1769
0
  case NT_BREF:
1770
0
    {
1771
0
      BRefNode* br = NBREF(node);
1772
1773
0
#ifdef USE_BACKREF_WITH_LEVEL
1774
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1775
0
  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1776
0
  if (r) return r;
1777
0
  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1778
0
  if (r) return r;
1779
0
  r = add_length(reg, br->nest_level);
1780
0
  if (r) return r;
1781
1782
0
  goto add_bacref_mems;
1783
0
      }
1784
0
      else
1785
0
#endif
1786
0
      if (br->back_num == 1) {
1787
0
  n = br->back_static[0];
1788
0
  if (IS_IGNORECASE(reg->options)) {
1789
0
    r = add_opcode(reg, OP_BACKREFN_IC);
1790
0
    if (r) return r;
1791
0
    r = add_mem_num(reg, n);
1792
0
  }
1793
0
  else {
1794
0
    switch (n) {
1795
0
    case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1796
0
    case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1797
0
    default:
1798
0
      r = add_opcode(reg, OP_BACKREFN);
1799
0
      if (r) return r;
1800
0
      r = add_mem_num(reg, n);
1801
0
      break;
1802
0
    }
1803
0
  }
1804
0
      }
1805
0
      else {
1806
0
  int i;
1807
0
  int* p;
1808
1809
0
  if (IS_IGNORECASE(reg->options)) {
1810
0
    r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1811
0
  }
1812
0
  else {
1813
0
    r = add_opcode(reg, OP_BACKREF_MULTI);
1814
0
  }
1815
0
  if (r) return r;
1816
1817
0
#ifdef USE_BACKREF_WITH_LEVEL
1818
0
      add_bacref_mems:
1819
0
#endif
1820
0
  r = add_length(reg, br->back_num);
1821
0
  if (r) return r;
1822
0
  p = BACKREFS_P(br);
1823
0
  for (i = br->back_num - 1; i >= 0; i--) {
1824
0
    r = add_mem_num(reg, p[i]);
1825
0
    if (r) return r;
1826
0
  }
1827
0
      }
1828
0
    }
1829
0
    break;
1830
1831
0
#ifdef USE_SUBEXP_CALL
1832
0
  case NT_CALL:
1833
0
    r = compile_call(NCALL(node), reg);
1834
0
    break;
1835
0
#endif
1836
1837
6.54k
  case NT_QTFR:
1838
6.54k
    r = compile_quantifier_node(NQTFR(node), reg);
1839
6.54k
    break;
1840
1841
7.31k
  case NT_ENCLOSE:
1842
7.31k
    r = compile_enclose_node(NENCLOSE(node), reg);
1843
7.31k
    break;
1844
1845
11.1k
  case NT_ANCHOR:
1846
11.1k
    r = compile_anchor_node(NANCHOR(node), reg);
1847
11.1k
    break;
1848
1849
0
  default:
1850
#ifdef ONIG_DEBUG
1851
    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1852
#endif
1853
0
    break;
1854
71.2k
  }
1855
1856
71.2k
  return r;
1857
71.2k
}
1858
1859
#ifdef USE_NAMED_GROUP
1860
1861
static int
1862
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1863
0
{
1864
0
  int r = 0;
1865
0
  Node* node = *plink;
1866
1867
0
  switch (NTYPE(node)) {
1868
0
  case NT_LIST:
1869
0
  case NT_ALT:
1870
0
    do {
1871
0
      r = noname_disable_map(&(NCAR(node)), map, counter);
1872
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873
0
    break;
1874
1875
0
  case NT_QTFR:
1876
0
    {
1877
0
      Node** ptarget = &(NQTFR(node)->target);
1878
0
      Node*  old = *ptarget;
1879
0
      r = noname_disable_map(ptarget, map, counter);
1880
0
      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1881
0
  onig_reduce_nested_quantifier(node, *ptarget);
1882
0
      }
1883
0
    }
1884
0
    break;
1885
1886
0
  case NT_ENCLOSE:
1887
0
    {
1888
0
      EncloseNode* en = NENCLOSE(node);
1889
0
      if (en->type == ENCLOSE_MEMORY) {
1890
0
  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1891
0
    (*counter)++;
1892
0
    map[en->regnum].new_val = *counter;
1893
0
    en->regnum = *counter;
1894
0
  }
1895
0
  else if (en->regnum != 0) {
1896
0
    *plink = en->target;
1897
0
    en->target = NULL_NODE;
1898
0
    onig_node_free(node);
1899
0
    r = noname_disable_map(plink, map, counter);
1900
0
    break;
1901
0
  }
1902
0
      }
1903
0
      r = noname_disable_map(&(en->target), map, counter);
1904
0
    }
1905
0
    break;
1906
1907
0
  case NT_ANCHOR:
1908
0
    if (NANCHOR(node)->target)
1909
0
      r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1910
0
    break;
1911
1912
0
  default:
1913
0
    break;
1914
0
  }
1915
1916
0
  return r;
1917
0
}
1918
1919
static int
1920
renumber_node_backref(Node* node, GroupNumRemap* map)
1921
0
{
1922
0
  int i, pos, n, old_num;
1923
0
  int *backs;
1924
0
  BRefNode* bn = NBREF(node);
1925
1926
0
  if (! IS_BACKREF_NAME_REF(bn))
1927
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1928
1929
0
  old_num = bn->back_num;
1930
0
  if (IS_NULL(bn->back_dynamic))
1931
0
    backs = bn->back_static;
1932
0
  else
1933
0
    backs = bn->back_dynamic;
1934
1935
0
  for (i = 0, pos = 0; i < old_num; i++) {
1936
0
    n = map[backs[i]].new_val;
1937
0
    if (n > 0) {
1938
0
      backs[pos] = n;
1939
0
      pos++;
1940
0
    }
1941
0
  }
1942
1943
0
  bn->back_num = pos;
1944
0
  return 0;
1945
0
}
1946
1947
static int
1948
renumber_by_map(Node* node, GroupNumRemap* map)
1949
0
{
1950
0
  int r = 0;
1951
1952
0
  switch (NTYPE(node)) {
1953
0
  case NT_LIST:
1954
0
  case NT_ALT:
1955
0
    do {
1956
0
      r = renumber_by_map(NCAR(node), map);
1957
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1958
0
    break;
1959
0
  case NT_QTFR:
1960
0
    r = renumber_by_map(NQTFR(node)->target, map);
1961
0
    break;
1962
0
  case NT_ENCLOSE:
1963
0
    {
1964
0
      EncloseNode* en = NENCLOSE(node);
1965
0
      if (en->type == ENCLOSE_CONDITION)
1966
0
  en->regnum = map[en->regnum].new_val;
1967
0
      r = renumber_by_map(en->target, map);
1968
0
    }
1969
0
    break;
1970
1971
0
  case NT_BREF:
1972
0
    r = renumber_node_backref(node, map);
1973
0
    break;
1974
1975
0
  case NT_ANCHOR:
1976
0
    if (NANCHOR(node)->target)
1977
0
      r = renumber_by_map(NANCHOR(node)->target, map);
1978
0
    break;
1979
1980
0
  default:
1981
0
    break;
1982
0
  }
1983
1984
0
  return r;
1985
0
}
1986
1987
static int
1988
numbered_ref_check(Node* node)
1989
7.70k
{
1990
7.70k
  int r = 0;
1991
1992
7.70k
  switch (NTYPE(node)) {
1993
385
  case NT_LIST:
1994
1.15k
  case NT_ALT:
1995
5.00k
    do {
1996
5.00k
      r = numbered_ref_check(NCAR(node));
1997
5.00k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998
1.15k
    break;
1999
770
  case NT_QTFR:
2000
770
    r = numbered_ref_check(NQTFR(node)->target);
2001
770
    break;
2002
1.54k
  case NT_ENCLOSE:
2003
1.54k
    r = numbered_ref_check(NENCLOSE(node)->target);
2004
1.54k
    break;
2005
2006
0
  case NT_BREF:
2007
0
    if (! IS_BACKREF_NAME_REF(NBREF(node)))
2008
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2009
0
    break;
2010
2011
770
  case NT_ANCHOR:
2012
770
    if (NANCHOR(node)->target)
2013
0
      r = numbered_ref_check(NANCHOR(node)->target);
2014
770
    break;
2015
2016
3.46k
  default:
2017
3.46k
    break;
2018
7.70k
  }
2019
2020
7.70k
  return r;
2021
7.70k
}
2022
2023
static int
2024
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025
0
{
2026
0
  int r, i, pos, counter;
2027
0
  BitStatusType loc;
2028
0
  GroupNumRemap* map;
2029
2030
0
  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2031
0
  CHECK_NULL_RETURN_MEMERR(map);
2032
0
  for (i = 1; i <= env->num_mem; i++) {
2033
0
    map[i].new_val = 0;
2034
0
  }
2035
0
  counter = 0;
2036
0
  r = noname_disable_map(root, map, &counter);
2037
0
  if (r != 0) return r;
2038
2039
0
  r = renumber_by_map(*root, map);
2040
0
  if (r != 0) return r;
2041
2042
0
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043
0
    if (map[i].new_val > 0) {
2044
0
      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2045
0
      pos++;
2046
0
    }
2047
0
  }
2048
2049
0
  loc = env->capture_history;
2050
0
  BIT_STATUS_CLEAR(env->capture_history);
2051
0
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052
0
    if (BIT_STATUS_AT(loc, i)) {
2053
0
      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054
0
    }
2055
0
  }
2056
2057
0
  env->num_mem = env->num_named;
2058
0
  reg->num_mem = env->num_named;
2059
2060
0
  return onig_renumber_name_table(reg, map);
2061
0
}
2062
#endif /* USE_NAMED_GROUP */
2063
2064
#ifdef USE_SUBEXP_CALL
2065
static int
2066
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067
0
{
2068
0
  int i, offset;
2069
0
  EncloseNode* en;
2070
0
  AbsAddrType addr;
2071
2072
0
  for (i = 0; i < uslist->num; i++) {
2073
0
    en = NENCLOSE(uslist->us[i].target);
2074
0
    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075
0
    addr = en->call_addr;
2076
0
    offset = uslist->us[i].offset;
2077
2078
0
    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079
0
  }
2080
0
  return 0;
2081
0
}
2082
#endif
2083
2084
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085
static int
2086
quantifiers_memory_node_info(Node* node)
2087
0
{
2088
0
  int r = 0;
2089
2090
0
  switch (NTYPE(node)) {
2091
0
  case NT_LIST:
2092
0
  case NT_ALT:
2093
0
    {
2094
0
      int v;
2095
0
      do {
2096
0
  v = quantifiers_memory_node_info(NCAR(node));
2097
0
  if (v > r) r = v;
2098
0
      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099
0
    }
2100
0
    break;
2101
2102
0
# ifdef USE_SUBEXP_CALL
2103
0
  case NT_CALL:
2104
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2105
0
      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106
0
    }
2107
0
    else
2108
0
      r = quantifiers_memory_node_info(NCALL(node)->target);
2109
0
    break;
2110
0
# endif
2111
2112
0
  case NT_QTFR:
2113
0
    {
2114
0
      QtfrNode* qn = NQTFR(node);
2115
0
      if (qn->upper != 0) {
2116
0
  r = quantifiers_memory_node_info(qn->target);
2117
0
      }
2118
0
    }
2119
0
    break;
2120
2121
0
  case NT_ENCLOSE:
2122
0
    {
2123
0
      EncloseNode* en = NENCLOSE(node);
2124
0
      switch (en->type) {
2125
0
      case ENCLOSE_MEMORY:
2126
0
  return NQ_TARGET_IS_EMPTY_MEM;
2127
0
  break;
2128
2129
0
      case ENCLOSE_OPTION:
2130
0
      case ENCLOSE_STOP_BACKTRACK:
2131
0
      case ENCLOSE_CONDITION:
2132
0
      case ENCLOSE_ABSENT:
2133
0
  r = quantifiers_memory_node_info(en->target);
2134
0
  break;
2135
0
      default:
2136
0
  break;
2137
0
      }
2138
0
    }
2139
0
    break;
2140
2141
0
  case NT_BREF:
2142
0
  case NT_STR:
2143
0
  case NT_CTYPE:
2144
0
  case NT_CCLASS:
2145
0
  case NT_CANY:
2146
0
  case NT_ANCHOR:
2147
0
  default:
2148
0
    break;
2149
0
  }
2150
2151
0
  return r;
2152
0
}
2153
#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155
static int
2156
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157
13.8k
{
2158
13.8k
  OnigDistance tmin;
2159
13.8k
  int r = 0;
2160
2161
13.8k
  *min = 0;
2162
13.8k
  switch (NTYPE(node)) {
2163
0
  case NT_BREF:
2164
0
    {
2165
0
      int i;
2166
0
      int* backs;
2167
0
      Node** nodes = SCANENV_MEM_NODES(env);
2168
0
      BRefNode* br = NBREF(node);
2169
0
      if (br->state & NST_RECURSION) break;
2170
2171
0
      backs = BACKREFS_P(br);
2172
0
      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2173
0
      r = get_min_match_length(nodes[backs[0]], min, env);
2174
0
      if (r != 0) break;
2175
0
      for (i = 1; i < br->back_num; i++) {
2176
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2177
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178
0
  if (r != 0) break;
2179
0
  if (*min > tmin) *min = tmin;
2180
0
      }
2181
0
    }
2182
0
    break;
2183
2184
0
#ifdef USE_SUBEXP_CALL
2185
0
  case NT_CALL:
2186
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2187
0
      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188
0
      if (IS_ENCLOSE_MIN_FIXED(en))
2189
0
  *min = en->min_len;
2190
0
    }
2191
0
    else
2192
0
      r = get_min_match_length(NCALL(node)->target, min, env);
2193
0
    break;
2194
0
#endif
2195
2196
770
  case NT_LIST:
2197
1.54k
    do {
2198
1.54k
      r = get_min_match_length(NCAR(node), &tmin, env);
2199
1.54k
      if (r == 0) *min += tmin;
2200
1.54k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201
770
    break;
2202
2203
0
  case NT_ALT:
2204
0
    {
2205
0
      Node *x, *y;
2206
0
      y = node;
2207
0
      do {
2208
0
  x = NCAR(y);
2209
0
  r = get_min_match_length(x, &tmin, env);
2210
0
  if (r != 0) break;
2211
0
  if (y == node) *min = tmin;
2212
0
  else if (*min > tmin) *min = tmin;
2213
0
      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214
0
    }
2215
0
    break;
2216
2217
1.15k
  case NT_STR:
2218
1.15k
    {
2219
1.15k
      StrNode* sn = NSTR(node);
2220
1.15k
      *min = sn->end - sn->s;
2221
1.15k
    }
2222
1.15k
    break;
2223
2224
0
  case NT_CTYPE:
2225
0
    *min = 1;
2226
0
    break;
2227
2228
8.85k
  case NT_CCLASS:
2229
11.1k
  case NT_CANY:
2230
11.1k
    *min = 1;
2231
11.1k
    break;
2232
2233
770
  case NT_QTFR:
2234
770
    {
2235
770
      QtfrNode* qn = NQTFR(node);
2236
2237
770
      if (qn->lower > 0) {
2238
770
  r = get_min_match_length(qn->target, min, env);
2239
770
  if (r == 0)
2240
770
    *min = distance_multiply(*min, qn->lower);
2241
770
      }
2242
770
    }
2243
770
    break;
2244
2245
0
  case NT_ENCLOSE:
2246
0
    {
2247
0
      EncloseNode* en = NENCLOSE(node);
2248
0
      switch (en->type) {
2249
0
      case ENCLOSE_MEMORY:
2250
0
        if (IS_ENCLOSE_MIN_FIXED(en))
2251
0
          *min = en->min_len;
2252
0
        else {
2253
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254
0
      *min = 0;  /* recursive */
2255
0
    else {
2256
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2257
0
      r = get_min_match_length(en->target, min, env);
2258
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2259
0
      if (r == 0) {
2260
0
        en->min_len = *min;
2261
0
        SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2262
0
      }
2263
0
    }
2264
0
        }
2265
0
        break;
2266
2267
0
      case ENCLOSE_OPTION:
2268
0
      case ENCLOSE_STOP_BACKTRACK:
2269
0
      case ENCLOSE_CONDITION:
2270
0
  r = get_min_match_length(en->target, min, env);
2271
0
  break;
2272
2273
0
      case ENCLOSE_ABSENT:
2274
0
  break;
2275
0
      }
2276
0
    }
2277
0
    break;
2278
2279
0
  case NT_ANCHOR:
2280
0
  default:
2281
0
    break;
2282
13.8k
  }
2283
2284
13.8k
  return r;
2285
13.8k
}
2286
2287
static int
2288
get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289
0
{
2290
0
  OnigDistance tmax;
2291
0
  int r = 0;
2292
2293
0
  *max = 0;
2294
0
  switch (NTYPE(node)) {
2295
0
  case NT_LIST:
2296
0
    do {
2297
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2298
0
      if (r == 0)
2299
0
  *max = distance_add(*max, tmax);
2300
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301
0
    break;
2302
2303
0
  case NT_ALT:
2304
0
    do {
2305
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2306
0
      if (r == 0 && *max < tmax) *max = tmax;
2307
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308
0
    break;
2309
2310
0
  case NT_STR:
2311
0
    {
2312
0
      StrNode* sn = NSTR(node);
2313
0
      *max = sn->end - sn->s;
2314
0
    }
2315
0
    break;
2316
2317
0
  case NT_CTYPE:
2318
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319
0
    break;
2320
2321
0
  case NT_CCLASS:
2322
0
  case NT_CANY:
2323
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324
0
    break;
2325
2326
0
  case NT_BREF:
2327
0
    {
2328
0
      int i;
2329
0
      int* backs;
2330
0
      Node** nodes = SCANENV_MEM_NODES(env);
2331
0
      BRefNode* br = NBREF(node);
2332
0
      if (br->state & NST_RECURSION) {
2333
0
  *max = ONIG_INFINITE_DISTANCE;
2334
0
  break;
2335
0
      }
2336
0
      backs = BACKREFS_P(br);
2337
0
      for (i = 0; i < br->back_num; i++) {
2338
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2339
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340
0
  if (r != 0) break;
2341
0
  if (*max < tmax) *max = tmax;
2342
0
      }
2343
0
    }
2344
0
    break;
2345
2346
0
#ifdef USE_SUBEXP_CALL
2347
0
  case NT_CALL:
2348
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2349
0
      r = get_max_match_length(NCALL(node)->target, max, env);
2350
0
    else
2351
0
      *max = ONIG_INFINITE_DISTANCE;
2352
0
    break;
2353
0
#endif
2354
2355
0
  case NT_QTFR:
2356
0
    {
2357
0
      QtfrNode* qn = NQTFR(node);
2358
2359
0
      if (qn->upper != 0) {
2360
0
  r = get_max_match_length(qn->target, max, env);
2361
0
  if (r == 0 && *max != 0) {
2362
0
    if (! IS_REPEAT_INFINITE(qn->upper))
2363
0
      *max = distance_multiply(*max, qn->upper);
2364
0
    else
2365
0
      *max = ONIG_INFINITE_DISTANCE;
2366
0
  }
2367
0
      }
2368
0
    }
2369
0
    break;
2370
2371
0
  case NT_ENCLOSE:
2372
0
    {
2373
0
      EncloseNode* en = NENCLOSE(node);
2374
0
      switch (en->type) {
2375
0
      case ENCLOSE_MEMORY:
2376
0
  if (IS_ENCLOSE_MAX_FIXED(en))
2377
0
    *max = en->max_len;
2378
0
  else {
2379
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2380
0
      *max = ONIG_INFINITE_DISTANCE;
2381
0
    else {
2382
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2383
0
      r = get_max_match_length(en->target, max, env);
2384
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2385
0
      if (r == 0) {
2386
0
        en->max_len = *max;
2387
0
        SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2388
0
      }
2389
0
    }
2390
0
  }
2391
0
  break;
2392
2393
0
      case ENCLOSE_OPTION:
2394
0
      case ENCLOSE_STOP_BACKTRACK:
2395
0
      case ENCLOSE_CONDITION:
2396
0
  r = get_max_match_length(en->target, max, env);
2397
0
  break;
2398
2399
0
      case ENCLOSE_ABSENT:
2400
0
  break;
2401
0
      }
2402
0
    }
2403
0
    break;
2404
2405
0
  case NT_ANCHOR:
2406
0
  default:
2407
0
    break;
2408
0
  }
2409
2410
0
  return r;
2411
0
}
2412
2413
0
#define GET_CHAR_LEN_VARLEN           -1
2414
0
#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2415
2416
/* fixed size pattern node only */
2417
static int
2418
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419
0
{
2420
0
  int tlen;
2421
0
  int r = 0;
2422
2423
0
  level++;
2424
0
  *len = 0;
2425
0
  switch (NTYPE(node)) {
2426
0
  case NT_LIST:
2427
0
    do {
2428
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429
0
      if (r == 0)
2430
0
  *len = (int )distance_add(*len, tlen);
2431
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432
0
    break;
2433
2434
0
  case NT_ALT:
2435
0
    {
2436
0
      int tlen2;
2437
0
      int varlen = 0;
2438
2439
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440
0
      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441
0
  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442
0
  if (r == 0) {
2443
0
    if (tlen != tlen2)
2444
0
      varlen = 1;
2445
0
  }
2446
0
      }
2447
0
      if (r == 0) {
2448
0
  if (varlen != 0) {
2449
0
    if (level == 1)
2450
0
      r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2451
0
    else
2452
0
      r = GET_CHAR_LEN_VARLEN;
2453
0
  }
2454
0
  else
2455
0
    *len = tlen;
2456
0
      }
2457
0
    }
2458
0
    break;
2459
2460
0
  case NT_STR:
2461
0
    {
2462
0
      StrNode* sn = NSTR(node);
2463
0
      UChar *s = sn->s;
2464
0
      while (s < sn->end) {
2465
0
  s += enclen(reg->enc, s, sn->end);
2466
0
  (*len)++;
2467
0
      }
2468
0
    }
2469
0
    break;
2470
2471
0
  case NT_QTFR:
2472
0
    {
2473
0
      QtfrNode* qn = NQTFR(node);
2474
0
      if (qn->lower == qn->upper) {
2475
0
  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476
0
  if (r == 0)
2477
0
    *len = (int )distance_multiply(tlen, qn->lower);
2478
0
      }
2479
0
      else
2480
0
  r = GET_CHAR_LEN_VARLEN;
2481
0
    }
2482
0
    break;
2483
2484
0
#ifdef USE_SUBEXP_CALL
2485
0
  case NT_CALL:
2486
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2487
0
      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488
0
    else
2489
0
      r = GET_CHAR_LEN_VARLEN;
2490
0
    break;
2491
0
#endif
2492
2493
0
  case NT_CTYPE:
2494
0
    *len = 1;
2495
0
    break;
2496
2497
0
  case NT_CCLASS:
2498
0
  case NT_CANY:
2499
0
    *len = 1;
2500
0
    break;
2501
2502
0
  case NT_ENCLOSE:
2503
0
    {
2504
0
      EncloseNode* en = NENCLOSE(node);
2505
0
      switch (en->type) {
2506
0
      case ENCLOSE_MEMORY:
2507
0
#ifdef USE_SUBEXP_CALL
2508
0
  if (IS_ENCLOSE_CLEN_FIXED(en))
2509
0
    *len = en->char_len;
2510
0
  else {
2511
0
    r = get_char_length_tree1(en->target, reg, len, level);
2512
0
    if (r == 0) {
2513
0
      en->char_len = *len;
2514
0
      SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2515
0
    }
2516
0
  }
2517
0
  break;
2518
0
#endif
2519
0
      case ENCLOSE_OPTION:
2520
0
      case ENCLOSE_STOP_BACKTRACK:
2521
0
      case ENCLOSE_CONDITION:
2522
0
  r = get_char_length_tree1(en->target, reg, len, level);
2523
0
  break;
2524
0
      case ENCLOSE_ABSENT:
2525
0
      default:
2526
0
  break;
2527
0
      }
2528
0
    }
2529
0
    break;
2530
2531
0
  case NT_ANCHOR:
2532
0
    break;
2533
2534
0
  default:
2535
0
    r = GET_CHAR_LEN_VARLEN;
2536
0
    break;
2537
0
  }
2538
2539
0
  return r;
2540
0
}
2541
2542
static int
2543
get_char_length_tree(Node* node, regex_t* reg, int* len)
2544
0
{
2545
0
  return get_char_length_tree1(node, reg, len, 0);
2546
0
}
2547
2548
/* x is not included y ==>  1 : 0 */
2549
static int
2550
is_not_included(Node* x, Node* y, regex_t* reg)
2551
5.00k
{
2552
5.00k
  int i;
2553
5.00k
  OnigDistance len;
2554
5.00k
  OnigCodePoint code;
2555
5.00k
  UChar *p;
2556
5.00k
  int ytype;
2557
2558
10.0k
 retry:
2559
10.0k
  ytype = NTYPE(y);
2560
10.0k
  switch (NTYPE(x)) {
2561
0
  case NT_CTYPE:
2562
0
    {
2563
0
      switch (ytype) {
2564
0
      case NT_CTYPE:
2565
0
  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566
0
      NCTYPE(y)->not   != NCTYPE(x)->not &&
2567
0
      NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568
0
    return 1;
2569
0
  else
2570
0
    return 0;
2571
0
  break;
2572
2573
0
      case NT_CCLASS:
2574
5.00k
      swap:
2575
5.00k
  {
2576
5.00k
    Node* tmp;
2577
5.00k
    tmp = x; x = y; y = tmp;
2578
5.00k
    goto retry;
2579
0
  }
2580
0
  break;
2581
2582
0
      case NT_STR:
2583
0
  goto swap;
2584
0
  break;
2585
2586
0
      default:
2587
0
  break;
2588
0
      }
2589
0
    }
2590
0
    break;
2591
2592
5.00k
  case NT_CCLASS:
2593
5.00k
    {
2594
5.00k
      CClassNode* xc = NCCLASS(x);
2595
5.00k
      switch (ytype) {
2596
0
      case NT_CTYPE:
2597
0
  switch (NCTYPE(y)->ctype) {
2598
0
  case ONIGENC_CTYPE_WORD:
2599
0
    if (NCTYPE(y)->not == 0) {
2600
0
      if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601
0
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602
0
    if (BITSET_AT(xc->bs, i)) {
2603
0
      if (NCTYPE(y)->ascii_range) {
2604
0
        if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605
0
      }
2606
0
      else {
2607
0
        if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608
0
      }
2609
0
    }
2610
0
        }
2611
0
        return 1;
2612
0
      }
2613
0
      return 0;
2614
0
    }
2615
0
    else {
2616
0
      if (IS_NOT_NULL(xc->mbuf)) return 0;
2617
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618
0
        int is_word;
2619
0
        if (NCTYPE(y)->ascii_range)
2620
0
    is_word = IS_CODE_SB_WORD(reg->enc, i);
2621
0
        else
2622
0
    is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623
0
        if (! is_word) {
2624
0
    if (!IS_NCCLASS_NOT(xc)) {
2625
0
      if (BITSET_AT(xc->bs, i))
2626
0
        return 0;
2627
0
    }
2628
0
    else {
2629
0
      if (! BITSET_AT(xc->bs, i))
2630
0
        return 0;
2631
0
    }
2632
0
        }
2633
0
      }
2634
0
      return 1;
2635
0
    }
2636
0
    break;
2637
2638
0
  default:
2639
0
    break;
2640
0
  }
2641
0
  break;
2642
2643
0
      case NT_CCLASS:
2644
0
  {
2645
0
    int v;
2646
0
    CClassNode* yc = NCCLASS(y);
2647
2648
0
    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649
0
      v = BITSET_AT(xc->bs, i);
2650
0
      if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651
0
    (v == 0 && IS_NCCLASS_NOT(xc))) {
2652
0
        v = BITSET_AT(yc->bs, i);
2653
0
        if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654
0
      (v == 0 && IS_NCCLASS_NOT(yc)))
2655
0
    return 0;
2656
0
      }
2657
0
    }
2658
0
    if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659
0
        (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660
0
      return 1;
2661
0
    return 0;
2662
0
  }
2663
0
  break;
2664
2665
5.00k
      case NT_STR:
2666
5.00k
  goto swap;
2667
0
  break;
2668
2669
0
      default:
2670
0
  break;
2671
5.00k
      }
2672
5.00k
    }
2673
0
    break;
2674
2675
5.00k
  case NT_STR:
2676
5.00k
    {
2677
5.00k
      StrNode* xs = NSTR(x);
2678
5.00k
      if (NSTRING_LEN(x) == 0)
2679
0
  break;
2680
2681
5.00k
      switch (ytype) {
2682
0
      case NT_CTYPE:
2683
0
  switch (NCTYPE(y)->ctype) {
2684
0
  case ONIGENC_CTYPE_WORD:
2685
0
    if (NCTYPE(y)->ascii_range) {
2686
0
      if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687
0
        return NCTYPE(y)->not;
2688
0
      else
2689
0
        return !(NCTYPE(y)->not);
2690
0
    }
2691
0
    else {
2692
0
      if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693
0
        return NCTYPE(y)->not;
2694
0
      else
2695
0
        return !(NCTYPE(y)->not);
2696
0
    }
2697
0
    break;
2698
0
  default:
2699
0
    break;
2700
0
  }
2701
0
  break;
2702
2703
5.00k
      case NT_CCLASS:
2704
5.00k
  {
2705
5.00k
    CClassNode* cc = NCCLASS(y);
2706
2707
5.00k
    code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708
5.00k
             xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709
5.00k
    return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710
0
  }
2711
0
  break;
2712
2713
0
      case NT_STR:
2714
0
  {
2715
0
    UChar *q;
2716
0
    StrNode* ys = NSTR(y);
2717
0
    len = NSTRING_LEN(x);
2718
0
    if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719
0
    if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720
      /* tiny version */
2721
0
      return 0;
2722
0
    }
2723
0
    else {
2724
0
      for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725
0
        if (*p != *q) return 1;
2726
0
      }
2727
0
    }
2728
0
  }
2729
0
  break;
2730
2731
0
      default:
2732
0
  break;
2733
5.00k
      }
2734
5.00k
    }
2735
0
    break;
2736
2737
0
  default:
2738
0
    break;
2739
10.0k
  }
2740
2741
0
  return 0;
2742
10.0k
}
2743
2744
static Node*
2745
get_head_value_node(Node* node, int exact, regex_t* reg)
2746
26.9k
{
2747
26.9k
  Node* n = NULL_NODE;
2748
2749
26.9k
  switch (NTYPE(node)) {
2750
0
  case NT_BREF:
2751
770
  case NT_ALT:
2752
2.69k
  case NT_CANY:
2753
2.69k
#ifdef USE_SUBEXP_CALL
2754
2.69k
  case NT_CALL:
2755
2.69k
#endif
2756
2.69k
    break;
2757
2758
0
  case NT_CTYPE:
2759
8.47k
  case NT_CCLASS:
2760
8.47k
    if (exact == 0) {
2761
7.70k
      n = node;
2762
7.70k
    }
2763
8.47k
    break;
2764
2765
0
  case NT_LIST:
2766
0
    n = get_head_value_node(NCAR(node), exact, reg);
2767
0
    break;
2768
2769
10.7k
  case NT_STR:
2770
10.7k
    {
2771
10.7k
      StrNode* sn = NSTR(node);
2772
2773
10.7k
      if (sn->end <= sn->s)
2774
0
  break;
2775
2776
10.7k
      if (exact == 0 ||
2777
10.7k
    NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2778
10.7k
  n = node;
2779
10.7k
      }
2780
10.7k
    }
2781
0
    break;
2782
2783
3.85k
  case NT_QTFR:
2784
3.85k
    {
2785
3.85k
      QtfrNode* qn = NQTFR(node);
2786
3.85k
      if (qn->lower > 0) {
2787
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2788
  if (IS_NOT_NULL(qn->head_exact))
2789
    n = qn->head_exact;
2790
  else
2791
#endif
2792
770
    n = get_head_value_node(qn->target, exact, reg);
2793
770
      }
2794
3.85k
    }
2795
3.85k
    break;
2796
2797
0
  case NT_ENCLOSE:
2798
0
    {
2799
0
      EncloseNode* en = NENCLOSE(node);
2800
0
      switch (en->type) {
2801
0
      case ENCLOSE_OPTION:
2802
0
  {
2803
0
    OnigOptionType options = reg->options;
2804
2805
0
    reg->options = NENCLOSE(node)->option;
2806
0
    n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2807
0
    reg->options = options;
2808
0
  }
2809
0
  break;
2810
2811
0
      case ENCLOSE_MEMORY:
2812
0
      case ENCLOSE_STOP_BACKTRACK:
2813
0
      case ENCLOSE_CONDITION:
2814
0
  n = get_head_value_node(en->target, exact, reg);
2815
0
  break;
2816
2817
0
      case ENCLOSE_ABSENT:
2818
0
  break;
2819
0
      }
2820
0
    }
2821
0
    break;
2822
2823
1.15k
  case NT_ANCHOR:
2824
1.15k
    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2825
0
      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2826
1.15k
    break;
2827
2828
0
  default:
2829
0
    break;
2830
26.9k
  }
2831
2832
26.9k
  return n;
2833
26.9k
}
2834
2835
static int
2836
check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2837
0
{
2838
0
  int type, r = 0;
2839
2840
0
  type = NTYPE(node);
2841
0
  if ((NTYPE2BIT(type) & type_mask) == 0)
2842
0
    return 1;
2843
2844
0
  switch (type) {
2845
0
  case NT_LIST:
2846
0
  case NT_ALT:
2847
0
    do {
2848
0
      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2849
0
        anchor_mask);
2850
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2851
0
    break;
2852
2853
0
  case NT_QTFR:
2854
0
    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2855
0
      anchor_mask);
2856
0
    break;
2857
2858
0
  case NT_ENCLOSE:
2859
0
    {
2860
0
      EncloseNode* en = NENCLOSE(node);
2861
0
      if ((en->type & enclose_mask) == 0)
2862
0
  return 1;
2863
2864
0
      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2865
0
    }
2866
0
    break;
2867
2868
0
  case NT_ANCHOR:
2869
0
    type = NANCHOR(node)->type;
2870
0
    if ((type & anchor_mask) == 0)
2871
0
      return 1;
2872
2873
0
    if (NANCHOR(node)->target)
2874
0
      r = check_type_tree(NANCHOR(node)->target,
2875
0
        type_mask, enclose_mask, anchor_mask);
2876
0
    break;
2877
2878
0
  default:
2879
0
    break;
2880
0
  }
2881
0
  return r;
2882
0
}
2883
2884
#ifdef USE_SUBEXP_CALL
2885
2886
0
# define RECURSION_EXIST       1
2887
0
# define RECURSION_INFINITE    2
2888
2889
static int
2890
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2891
0
{
2892
0
  int type;
2893
0
  int r = 0;
2894
2895
0
  type = NTYPE(node);
2896
0
  switch (type) {
2897
0
  case NT_LIST:
2898
0
    {
2899
0
      Node *x;
2900
0
      OnigDistance min;
2901
0
      int ret;
2902
2903
0
      x = node;
2904
0
      do {
2905
0
  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2906
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2907
0
  r |= ret;
2908
0
  if (head) {
2909
0
    ret = get_min_match_length(NCAR(x), &min, env);
2910
0
    if (ret != 0) return ret;
2911
0
    if (min != 0) head = 0;
2912
0
  }
2913
0
      } while (IS_NOT_NULL(x = NCDR(x)));
2914
0
    }
2915
0
    break;
2916
2917
0
  case NT_ALT:
2918
0
    {
2919
0
      int ret;
2920
0
      r = RECURSION_EXIST;
2921
0
      do {
2922
0
  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2923
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2924
0
  r &= ret;
2925
0
      } while (IS_NOT_NULL(node = NCDR(node)));
2926
0
    }
2927
0
    break;
2928
2929
0
  case NT_QTFR:
2930
0
    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2931
0
    if (r == RECURSION_EXIST) {
2932
0
      if (NQTFR(node)->lower == 0) r = 0;
2933
0
    }
2934
0
    break;
2935
2936
0
  case NT_ANCHOR:
2937
0
    {
2938
0
      AnchorNode* an = NANCHOR(node);
2939
0
      switch (an->type) {
2940
0
      case ANCHOR_PREC_READ:
2941
0
      case ANCHOR_PREC_READ_NOT:
2942
0
      case ANCHOR_LOOK_BEHIND:
2943
0
      case ANCHOR_LOOK_BEHIND_NOT:
2944
0
  r = subexp_inf_recursive_check(an->target, env, head);
2945
0
  break;
2946
0
      }
2947
0
    }
2948
0
    break;
2949
2950
0
  case NT_CALL:
2951
0
    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2952
0
    break;
2953
2954
0
  case NT_ENCLOSE:
2955
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2956
0
      return 0;
2957
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2958
0
      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2959
0
    else {
2960
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
2961
0
      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2963
0
    }
2964
0
    break;
2965
2966
0
  default:
2967
0
    break;
2968
0
  }
2969
2970
0
  return r;
2971
0
}
2972
2973
static int
2974
subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2975
0
{
2976
0
  int type;
2977
0
  int r = 0;
2978
2979
0
  type = NTYPE(node);
2980
0
  switch (type) {
2981
0
  case NT_LIST:
2982
0
  case NT_ALT:
2983
0
    do {
2984
0
      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2985
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2986
0
    break;
2987
2988
0
  case NT_QTFR:
2989
0
    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2990
0
    break;
2991
2992
0
  case NT_ANCHOR:
2993
0
    {
2994
0
      AnchorNode* an = NANCHOR(node);
2995
0
      switch (an->type) {
2996
0
      case ANCHOR_PREC_READ:
2997
0
      case ANCHOR_PREC_READ_NOT:
2998
0
      case ANCHOR_LOOK_BEHIND:
2999
0
      case ANCHOR_LOOK_BEHIND_NOT:
3000
0
  r = subexp_inf_recursive_check_trav(an->target, env);
3001
0
  break;
3002
0
      }
3003
0
    }
3004
0
    break;
3005
3006
0
  case NT_ENCLOSE:
3007
0
    {
3008
0
      EncloseNode* en = NENCLOSE(node);
3009
3010
0
      if (IS_ENCLOSE_RECURSION(en)) {
3011
0
  SET_ENCLOSE_STATUS(node, NST_MARK1);
3012
0
  r = subexp_inf_recursive_check(en->target, env, 1);
3013
0
  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014
0
  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3015
0
      }
3016
0
      r = subexp_inf_recursive_check_trav(en->target, env);
3017
0
    }
3018
3019
0
    break;
3020
3021
0
  default:
3022
0
    break;
3023
0
  }
3024
3025
0
  return r;
3026
0
}
3027
3028
static int
3029
subexp_recursive_check(Node* node)
3030
0
{
3031
0
  int r = 0;
3032
3033
0
  switch (NTYPE(node)) {
3034
0
  case NT_LIST:
3035
0
  case NT_ALT:
3036
0
    do {
3037
0
      r |= subexp_recursive_check(NCAR(node));
3038
0
    } while (IS_NOT_NULL(node = NCDR(node)));
3039
0
    break;
3040
3041
0
  case NT_QTFR:
3042
0
    r = subexp_recursive_check(NQTFR(node)->target);
3043
0
    break;
3044
3045
0
  case NT_ANCHOR:
3046
0
    {
3047
0
      AnchorNode* an = NANCHOR(node);
3048
0
      switch (an->type) {
3049
0
      case ANCHOR_PREC_READ:
3050
0
      case ANCHOR_PREC_READ_NOT:
3051
0
      case ANCHOR_LOOK_BEHIND:
3052
0
      case ANCHOR_LOOK_BEHIND_NOT:
3053
0
  r = subexp_recursive_check(an->target);
3054
0
  break;
3055
0
      }
3056
0
    }
3057
0
    break;
3058
3059
0
  case NT_CALL:
3060
0
    r = subexp_recursive_check(NCALL(node)->target);
3061
0
    if (r != 0) SET_CALL_RECURSION(node);
3062
0
    break;
3063
3064
0
  case NT_ENCLOSE:
3065
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3066
0
      return 0;
3067
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3068
0
      return 1; /* recursion */
3069
0
    else {
3070
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
3071
0
      r = subexp_recursive_check(NENCLOSE(node)->target);
3072
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3073
0
    }
3074
0
    break;
3075
3076
0
  default:
3077
0
    break;
3078
0
  }
3079
3080
0
  return r;
3081
0
}
3082
3083
3084
static int
3085
subexp_recursive_check_trav(Node* node, ScanEnv* env)
3086
0
{
3087
0
# define FOUND_CALLED_NODE    1
3088
3089
0
  int type;
3090
0
  int r = 0;
3091
3092
0
  type = NTYPE(node);
3093
0
  switch (type) {
3094
0
  case NT_LIST:
3095
0
  case NT_ALT:
3096
0
    {
3097
0
      int ret;
3098
0
      do {
3099
0
  ret = subexp_recursive_check_trav(NCAR(node), env);
3100
0
  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3101
0
  else if (ret < 0) return ret;
3102
0
      } while (IS_NOT_NULL(node = NCDR(node)));
3103
0
    }
3104
0
    break;
3105
3106
0
  case NT_QTFR:
3107
0
    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3108
0
    if (NQTFR(node)->upper == 0) {
3109
0
      if (r == FOUND_CALLED_NODE)
3110
0
  NQTFR(node)->is_referred = 1;
3111
0
    }
3112
0
    break;
3113
3114
0
  case NT_ANCHOR:
3115
0
    {
3116
0
      AnchorNode* an = NANCHOR(node);
3117
0
      switch (an->type) {
3118
0
      case ANCHOR_PREC_READ:
3119
0
      case ANCHOR_PREC_READ_NOT:
3120
0
      case ANCHOR_LOOK_BEHIND:
3121
0
      case ANCHOR_LOOK_BEHIND_NOT:
3122
0
  r = subexp_recursive_check_trav(an->target, env);
3123
0
  break;
3124
0
      }
3125
0
    }
3126
0
    break;
3127
3128
0
  case NT_ENCLOSE:
3129
0
    {
3130
0
      EncloseNode* en = NENCLOSE(node);
3131
3132
0
      if (! IS_ENCLOSE_RECURSION(en)) {
3133
0
  if (IS_ENCLOSE_CALLED(en)) {
3134
0
    SET_ENCLOSE_STATUS(node, NST_MARK1);
3135
0
    r = subexp_recursive_check(en->target);
3136
0
    if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137
0
    CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3138
0
  }
3139
0
      }
3140
0
      r = subexp_recursive_check_trav(en->target, env);
3141
0
      if (IS_ENCLOSE_CALLED(en))
3142
0
  r |= FOUND_CALLED_NODE;
3143
0
    }
3144
0
    break;
3145
3146
0
  default:
3147
0
    break;
3148
0
  }
3149
3150
0
  return r;
3151
0
}
3152
3153
static int
3154
setup_subexp_call(Node* node, ScanEnv* env)
3155
0
{
3156
0
  int type;
3157
0
  int r = 0;
3158
3159
0
  type = NTYPE(node);
3160
0
  switch (type) {
3161
0
  case NT_LIST:
3162
0
    do {
3163
0
      r = setup_subexp_call(NCAR(node), env);
3164
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3165
0
    break;
3166
3167
0
  case NT_ALT:
3168
0
    do {
3169
0
      r = setup_subexp_call(NCAR(node), env);
3170
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3171
0
    break;
3172
3173
0
  case NT_QTFR:
3174
0
    r = setup_subexp_call(NQTFR(node)->target, env);
3175
0
    break;
3176
0
  case NT_ENCLOSE:
3177
0
    r = setup_subexp_call(NENCLOSE(node)->target, env);
3178
0
    break;
3179
3180
0
  case NT_CALL:
3181
0
    {
3182
0
      CallNode* cn = NCALL(node);
3183
0
      Node** nodes = SCANENV_MEM_NODES(env);
3184
3185
0
      if (cn->group_num != 0) {
3186
0
  int gnum = cn->group_num;
3187
3188
0
# ifdef USE_NAMED_GROUP
3189
0
  if (env->num_named > 0 &&
3190
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3191
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3192
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3193
0
  }
3194
0
# endif
3195
0
  if (gnum > env->num_mem) {
3196
0
    onig_scan_env_set_error_string(env,
3197
0
     ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3198
0
    return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3199
0
  }
3200
3201
0
# ifdef USE_NAMED_GROUP
3202
0
      set_call_attr:
3203
0
# endif
3204
0
  cn->target = nodes[cn->group_num];
3205
0
  if (IS_NULL(cn->target)) {
3206
0
    onig_scan_env_set_error_string(env,
3207
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3208
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3209
0
  }
3210
0
  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3211
0
  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3212
0
  cn->unset_addr_list = env->unset_addr_list;
3213
0
      }
3214
0
# ifdef USE_NAMED_GROUP
3215
0
#  ifdef USE_PERL_SUBEXP_CALL
3216
0
      else if (cn->name == cn->name_end) {
3217
0
  goto set_call_attr;
3218
0
      }
3219
0
#  endif
3220
0
      else {
3221
0
  int *refs;
3222
3223
0
  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3224
0
             &refs);
3225
0
  if (n <= 0) {
3226
0
    onig_scan_env_set_error_string(env,
3227
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3228
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3229
0
  }
3230
0
  else if (n > 1 &&
3231
0
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3232
0
    onig_scan_env_set_error_string(env,
3233
0
      ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3234
0
    return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3235
0
  }
3236
0
  else {
3237
0
    cn->group_num = refs[0];
3238
0
    goto set_call_attr;
3239
0
  }
3240
0
      }
3241
0
# endif
3242
0
    }
3243
0
    break;
3244
3245
0
  case NT_ANCHOR:
3246
0
    {
3247
0
      AnchorNode* an = NANCHOR(node);
3248
3249
0
      switch (an->type) {
3250
0
      case ANCHOR_PREC_READ:
3251
0
      case ANCHOR_PREC_READ_NOT:
3252
0
      case ANCHOR_LOOK_BEHIND:
3253
0
      case ANCHOR_LOOK_BEHIND_NOT:
3254
0
  r = setup_subexp_call(an->target, env);
3255
0
  break;
3256
0
      }
3257
0
    }
3258
0
    break;
3259
3260
0
  default:
3261
0
    break;
3262
0
  }
3263
3264
0
  return r;
3265
0
}
3266
#endif
3267
3268
7.31k
#define IN_ALT          (1<<0)
3269
1.92k
#define IN_NOT          (1<<1)
3270
23.1k
#define IN_REPEAT       (1<<2)
3271
13.4k
#define IN_VAR_REPEAT   (1<<3)
3272
1.92k
#define IN_CALL         (1<<4)
3273
1.92k
#define IN_RECCALL      (1<<5)
3274
0
#define IN_LOOK_BEHIND  (1<<6)
3275
3276
/* divide different length alternatives in look-behind.
3277
  (?<=A|B) ==> (?<=A)|(?<=B)
3278
  (?<!A|B) ==> (?<!A)(?<!B)
3279
*/
3280
static int
3281
divide_look_behind_alternatives(Node* node)
3282
0
{
3283
0
  Node *head, *np, *insert_node;
3284
0
  AnchorNode* an = NANCHOR(node);
3285
0
  int anc_type = an->type;
3286
3287
0
  head = an->target;
3288
0
  np = NCAR(head);
3289
0
  swap_node(node, head);
3290
0
  NCAR(node) = head;
3291
0
  NANCHOR(head)->target = np;
3292
3293
0
  np = node;
3294
0
  while ((np = NCDR(np)) != NULL_NODE) {
3295
0
    insert_node = onig_node_new_anchor(anc_type);
3296
0
    CHECK_NULL_RETURN_MEMERR(insert_node);
3297
0
    NANCHOR(insert_node)->target = NCAR(np);
3298
0
    NCAR(np) = insert_node;
3299
0
  }
3300
3301
0
  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3302
0
    np = node;
3303
0
    do {
3304
0
      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3305
0
    } while ((np = NCDR(np)) != NULL_NODE);
3306
0
  }
3307
0
  return 0;
3308
0
}
3309
3310
static int
3311
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3312
0
{
3313
0
  int r, len;
3314
0
  AnchorNode* an = NANCHOR(node);
3315
3316
0
  r = get_char_length_tree(an->target, reg, &len);
3317
0
  if (r == 0)
3318
0
    an->char_len = len;
3319
0
  else if (r == GET_CHAR_LEN_VARLEN)
3320
0
    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3321
0
  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3322
0
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3323
0
      r = divide_look_behind_alternatives(node);
3324
0
    else
3325
0
      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3326
0
  }
3327
3328
0
  return r;
3329
0
}
3330
3331
static int
3332
next_setup(Node* node, Node* next_node, regex_t* reg)
3333
28.8k
{
3334
28.8k
  int type;
3335
3336
30.8k
 retry:
3337
30.8k
  type = NTYPE(node);
3338
30.8k
  if (type == NT_QTFR) {
3339
10.3k
    QtfrNode* qn = NQTFR(node);
3340
10.3k
    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3341
9.62k
#ifdef USE_QTFR_PEEK_NEXT
3342
9.62k
      Node* n = get_head_value_node(next_node, 1, reg);
3343
      /* '\0': for UTF-16BE etc... */
3344
9.62k
      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3345
5.77k
  qn->next_head_exact = n;
3346
5.77k
      }
3347
9.62k
#endif
3348
      /* automatic possessification a*b ==> (?>a*)b */
3349
9.62k
      if (qn->lower <= 1) {
3350
9.62k
  int ttype = NTYPE(qn->target);
3351
9.62k
  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3352
8.85k
    Node *x, *y;
3353
8.85k
    x = get_head_value_node(qn->target, 0, reg);
3354
8.85k
    if (IS_NOT_NULL(x)) {
3355
7.70k
      y = get_head_value_node(next_node,  0, reg);
3356
7.70k
      if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3357
5.00k
        Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3358
5.00k
        CHECK_NULL_RETURN_MEMERR(en);
3359
5.00k
        SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3360
5.00k
        swap_node(node, en);
3361
5.00k
        NENCLOSE(node)->target = en;
3362
5.00k
      }
3363
7.70k
    }
3364
8.85k
  }
3365
9.62k
      }
3366
9.62k
    }
3367
10.3k
  }
3368
20.4k
  else if (type == NT_ENCLOSE) {
3369
2.31k
    EncloseNode* en = NENCLOSE(node);
3370
2.31k
    if (en->type == ENCLOSE_MEMORY) {
3371
1.92k
      node = en->target;
3372
1.92k
      goto retry;
3373
1.92k
    }
3374
2.31k
  }
3375
28.8k
  return 0;
3376
30.8k
}
3377
3378
3379
static int
3380
update_string_node_case_fold(regex_t* reg, Node *node)
3381
0
{
3382
0
  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3383
0
  UChar *sbuf, *ebuf, *sp;
3384
0
  int r, i, len;
3385
0
  OnigDistance sbuf_size;
3386
0
  StrNode* sn = NSTR(node);
3387
3388
0
  end = sn->end;
3389
0
  sbuf_size = (end - sn->s) * 2;
3390
0
  sbuf = (UChar* )xmalloc(sbuf_size);
3391
0
  CHECK_NULL_RETURN_MEMERR(sbuf);
3392
0
  ebuf = sbuf + sbuf_size;
3393
3394
0
  sp = sbuf;
3395
0
  p = sn->s;
3396
0
  while (p < end) {
3397
0
    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3398
0
    for (i = 0; i < len; i++) {
3399
0
      if (sp >= ebuf) {
3400
0
  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3401
0
  if (IS_NULL(p)) {
3402
0
    xfree(sbuf);
3403
0
    return ONIGERR_MEMORY;
3404
0
  }
3405
0
  sbuf = p;
3406
0
  sp = sbuf + sbuf_size;
3407
0
  sbuf_size *= 2;
3408
0
  ebuf = sbuf + sbuf_size;
3409
0
      }
3410
3411
0
      *sp++ = buf[i];
3412
0
    }
3413
0
  }
3414
3415
0
  r = onig_node_str_set(node, sbuf, sp);
3416
3417
0
  xfree(sbuf);
3418
0
  return r;
3419
0
}
3420
3421
static int
3422
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3423
         regex_t* reg)
3424
0
{
3425
0
  int r;
3426
0
  Node *node;
3427
3428
0
  node = onig_node_new_str(s, end);
3429
0
  if (IS_NULL(node)) return ONIGERR_MEMORY;
3430
3431
0
  r = update_string_node_case_fold(reg, node);
3432
0
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return r;
3435
0
  }
3436
3437
0
  NSTRING_SET_AMBIG(node);
3438
0
  NSTRING_SET_DONT_GET_OPT_INFO(node);
3439
0
  *rnode = node;
3440
0
  return 0;
3441
0
}
3442
3443
static int
3444
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3445
        int slen)
3446
0
{
3447
0
  int i;
3448
3449
0
  for (i = 0; i < item_num; i++) {
3450
0
    if (items[i].byte_len != slen) {
3451
0
      return 1;
3452
0
    }
3453
0
    if (items[i].code_len != 1) {
3454
0
      return 1;
3455
0
    }
3456
0
  }
3457
0
  return 0;
3458
0
}
3459
3460
static int
3461
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3462
          UChar *p, int slen, UChar *end,
3463
          regex_t* reg, Node **rnode)
3464
0
{
3465
0
  int r, i, j, len, varlen;
3466
0
  Node *anode, *var_anode, *snode, *xnode, *an;
3467
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3468
3469
0
  *rnode = var_anode = NULL_NODE;
3470
3471
0
  varlen = 0;
3472
0
  for (i = 0; i < item_num; i++) {
3473
0
    if (items[i].byte_len != slen) {
3474
0
      varlen = 1;
3475
0
      break;
3476
0
    }
3477
0
  }
3478
3479
0
  if (varlen != 0) {
3480
0
    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3481
0
    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3482
3483
0
    xnode = onig_node_new_list(NULL, NULL);
3484
0
    if (IS_NULL(xnode)) goto mem_err;
3485
0
    NCAR(var_anode) = xnode;
3486
3487
0
    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3488
0
    if (IS_NULL(anode)) goto mem_err;
3489
0
    NCAR(xnode) = anode;
3490
0
  }
3491
0
  else {
3492
0
    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3493
0
    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3494
0
  }
3495
3496
0
  snode = onig_node_new_str(p, p + slen);
3497
0
  if (IS_NULL(snode)) goto mem_err;
3498
3499
0
  NCAR(anode) = snode;
3500
3501
0
  for (i = 0; i < item_num; i++) {
3502
0
    snode = onig_node_new_str(NULL, NULL);
3503
0
    if (IS_NULL(snode)) goto mem_err;
3504
3505
0
    for (j = 0; j < items[i].code_len; j++) {
3506
0
      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3507
0
      if (len < 0) {
3508
0
  r = len;
3509
0
  goto mem_err2;
3510
0
      }
3511
3512
0
      r = onig_node_str_cat(snode, buf, buf + len);
3513
0
      if (r != 0) goto mem_err2;
3514
0
    }
3515
3516
0
    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517
0
    if (IS_NULL(an)) {
3518
0
      goto mem_err2;
3519
0
    }
3520
3521
0
    if (items[i].byte_len != slen) {
3522
0
      Node *rem;
3523
0
      UChar *q = p + items[i].byte_len;
3524
3525
0
      if (q < end) {
3526
0
  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3527
0
  if (r != 0) {
3528
0
    onig_node_free(an);
3529
0
    goto mem_err2;
3530
0
  }
3531
3532
0
  xnode = onig_node_list_add(NULL_NODE, snode);
3533
0
  if (IS_NULL(xnode)) {
3534
0
    onig_node_free(an);
3535
0
    onig_node_free(rem);
3536
0
    goto mem_err2;
3537
0
  }
3538
0
  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3539
0
    onig_node_free(an);
3540
0
    onig_node_free(xnode);
3541
0
    onig_node_free(rem);
3542
0
    goto mem_err;
3543
0
  }
3544
3545
0
  NCAR(an) = xnode;
3546
0
      }
3547
0
      else {
3548
0
  NCAR(an) = snode;
3549
0
      }
3550
3551
0
      NCDR(var_anode) = an;
3552
0
      var_anode = an;
3553
0
    }
3554
0
    else {
3555
0
      NCAR(an)     = snode;
3556
0
      NCDR(anode) = an;
3557
0
      anode = an;
3558
0
    }
3559
0
  }
3560
3561
0
  return varlen;
3562
3563
0
 mem_err2:
3564
0
  onig_node_free(snode);
3565
3566
0
 mem_err:
3567
0
  onig_node_free(*rnode);
3568
3569
0
  return ONIGERR_MEMORY;
3570
0
}
3571
3572
0
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3573
3574
static int
3575
expand_case_fold_string(Node* node, regex_t* reg, int state)
3576
0
{
3577
0
  int r, n, len, alt_num;
3578
0
  int varlen = 0;
3579
0
  int is_in_look_behind;
3580
0
  UChar *start, *end, *p;
3581
0
  Node *top_root, *root, *snode, *prev_node;
3582
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3583
0
  StrNode* sn;
3584
3585
0
  if (NSTRING_IS_AMBIG(node)) return 0;
3586
3587
0
  sn = NSTR(node);
3588
3589
0
  start = sn->s;
3590
0
  end   = sn->end;
3591
0
  if (start >= end) return 0;
3592
3593
0
  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3594
3595
0
  r = 0;
3596
0
  top_root = root = prev_node = snode = NULL_NODE;
3597
0
  alt_num = 1;
3598
0
  p = start;
3599
0
  while (p < end) {
3600
0
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3601
0
             p, end, items);
3602
0
    if (n < 0) {
3603
0
      r = n;
3604
0
      goto err;
3605
0
    }
3606
3607
0
    len = enclen(reg->enc, p, end);
3608
3609
0
    varlen = is_case_fold_variable_len(n, items, len);
3610
0
    if (n == 0 || varlen == 0 || is_in_look_behind) {
3611
0
      if (IS_NULL(snode)) {
3612
0
  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3613
0
          onig_node_free(top_root);
3614
0
    top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3615
0
    if (IS_NULL(root)) {
3616
0
      onig_node_free(prev_node);
3617
0
      goto mem_err;
3618
0
    }
3619
0
  }
3620
3621
0
  prev_node = snode = onig_node_new_str(NULL, NULL);
3622
0
  if (IS_NULL(snode)) goto mem_err;
3623
0
  if (IS_NOT_NULL(root)) {
3624
0
    if (IS_NULL(onig_node_list_add(root, snode))) {
3625
0
      onig_node_free(snode);
3626
0
      goto mem_err;
3627
0
    }
3628
0
  }
3629
0
      }
3630
3631
0
      r = onig_node_str_cat(snode, p, p + len);
3632
0
      if (r != 0) goto err;
3633
0
    }
3634
0
    else {
3635
0
      alt_num *= (n + 1);
3636
0
      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3637
3638
0
      if (IS_NOT_NULL(snode)) {
3639
0
  r = update_string_node_case_fold(reg, snode);
3640
0
  if (r == 0) {
3641
0
    NSTRING_SET_AMBIG(snode);
3642
0
  }
3643
0
      }
3644
0
      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3645
0
        onig_node_free(top_root);
3646
0
  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3647
0
  if (IS_NULL(root)) {
3648
0
    onig_node_free(prev_node);
3649
0
    goto mem_err;
3650
0
  }
3651
0
      }
3652
3653
0
      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3654
0
      if (r < 0) goto mem_err;
3655
0
      if (r == 1) {
3656
0
  if (IS_NULL(root)) {
3657
0
    top_root = prev_node;
3658
0
  }
3659
0
  else {
3660
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3661
0
      onig_node_free(prev_node);
3662
0
      goto mem_err;
3663
0
    }
3664
0
  }
3665
3666
0
  root = NCAR(prev_node);
3667
0
      }
3668
0
      else { /* r == 0 */
3669
0
  if (IS_NOT_NULL(root)) {
3670
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671
0
      onig_node_free(prev_node);
3672
0
      goto mem_err;
3673
0
    }
3674
0
  }
3675
0
      }
3676
3677
0
      snode = NULL_NODE;
3678
0
    }
3679
3680
0
    p += len;
3681
0
  }
3682
0
  if (IS_NOT_NULL(snode)) {
3683
0
    r = update_string_node_case_fold(reg, snode);
3684
0
    if (r == 0) {
3685
0
      NSTRING_SET_AMBIG(snode);
3686
0
    }
3687
0
  }
3688
3689
0
  if (p < end) {
3690
0
    Node *srem;
3691
3692
0
    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3693
0
    if (r != 0) goto mem_err;
3694
3695
0
    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3696
0
      onig_node_free(top_root);
3697
0
      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3698
0
      if (IS_NULL(root)) {
3699
0
  onig_node_free(srem);
3700
0
  onig_node_free(prev_node);
3701
0
  goto mem_err;
3702
0
      }
3703
0
    }
3704
3705
0
    if (IS_NULL(root)) {
3706
0
      prev_node = srem;
3707
0
    }
3708
0
    else {
3709
0
      if (IS_NULL(onig_node_list_add(root, srem))) {
3710
0
  onig_node_free(srem);
3711
0
  goto mem_err;
3712
0
      }
3713
0
    }
3714
0
  }
3715
3716
  /* ending */
3717
0
  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3718
0
  swap_node(node, top_root);
3719
0
  onig_node_free(top_root);
3720
0
  return 0;
3721
3722
0
 mem_err:
3723
0
  r = ONIGERR_MEMORY;
3724
3725
0
 err:
3726
0
  onig_node_free(top_root);
3727
0
  return r;
3728
0
}
3729
3730
3731
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3732
3733
# define CEC_THRES_NUM_BIG_REPEAT         512
3734
# define CEC_INFINITE_NUM          0x7fffffff
3735
3736
# define CEC_IN_INFINITE_REPEAT    (1<<0)
3737
# define CEC_IN_FINITE_REPEAT      (1<<1)
3738
# define CEC_CONT_BIG_REPEAT       (1<<2)
3739
3740
static int
3741
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3742
{
3743
  int type;
3744
  int r = state;
3745
3746
  type = NTYPE(node);
3747
  switch (type) {
3748
  case NT_LIST:
3749
    {
3750
      Node* prev = NULL_NODE;
3751
      do {
3752
  r = setup_comb_exp_check(NCAR(node), r, env);
3753
  prev = NCAR(node);
3754
      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3755
    }
3756
    break;
3757
3758
  case NT_ALT:
3759
    {
3760
      int ret;
3761
      do {
3762
  ret = setup_comb_exp_check(NCAR(node), state, env);
3763
  r |= ret;
3764
      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3765
    }
3766
    break;
3767
3768
  case NT_QTFR:
3769
    {
3770
      int child_state = state;
3771
      int add_state = 0;
3772
      QtfrNode* qn = NQTFR(node);
3773
      Node* target = qn->target;
3774
      int var_num;
3775
3776
      if (! IS_REPEAT_INFINITE(qn->upper)) {
3777
  if (qn->upper > 1) {
3778
    /* {0,1}, {1,1} are allowed */
3779
    child_state |= CEC_IN_FINITE_REPEAT;
3780
3781
    /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3782
    if (env->backrefed_mem == 0) {
3783
      if (NTYPE(qn->target) == NT_ENCLOSE) {
3784
        EncloseNode* en = NENCLOSE(qn->target);
3785
        if (en->type == ENCLOSE_MEMORY) {
3786
    if (NTYPE(en->target) == NT_QTFR) {
3787
      QtfrNode* q = NQTFR(en->target);
3788
      if (IS_REPEAT_INFINITE(q->upper)
3789
          && q->greedy == qn->greedy) {
3790
        qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3791
        if (qn->upper == 1)
3792
          child_state = state;
3793
      }
3794
    }
3795
        }
3796
      }
3797
    }
3798
  }
3799
      }
3800
3801
      if (state & CEC_IN_FINITE_REPEAT) {
3802
  qn->comb_exp_check_num = -1;
3803
      }
3804
      else {
3805
  if (IS_REPEAT_INFINITE(qn->upper)) {
3806
    var_num = CEC_INFINITE_NUM;
3807
    child_state |= CEC_IN_INFINITE_REPEAT;
3808
  }
3809
  else {
3810
    var_num = qn->upper - qn->lower;
3811
  }
3812
3813
  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3814
    add_state |= CEC_CONT_BIG_REPEAT;
3815
3816
  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3817
      ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3818
       var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3819
    if (qn->comb_exp_check_num == 0) {
3820
      env->num_comb_exp_check++;
3821
      qn->comb_exp_check_num = env->num_comb_exp_check;
3822
      if (env->curr_max_regnum > env->comb_exp_max_regnum)
3823
        env->comb_exp_max_regnum = env->curr_max_regnum;
3824
    }
3825
  }
3826
      }
3827
3828
      r = setup_comb_exp_check(target, child_state, env);
3829
      r |= add_state;
3830
    }
3831
    break;
3832
3833
  case NT_ENCLOSE:
3834
    {
3835
      EncloseNode* en = NENCLOSE(node);
3836
3837
      switch (en->type) {
3838
      case ENCLOSE_MEMORY:
3839
  {
3840
    if (env->curr_max_regnum < en->regnum)
3841
      env->curr_max_regnum = en->regnum;
3842
3843
    r = setup_comb_exp_check(en->target, state, env);
3844
  }
3845
  break;
3846
3847
      default:
3848
  r = setup_comb_exp_check(en->target, state, env);
3849
  break;
3850
      }
3851
    }
3852
    break;
3853
3854
# ifdef USE_SUBEXP_CALL
3855
  case NT_CALL:
3856
    if (IS_CALL_RECURSION(NCALL(node)))
3857
      env->has_recursion = 1;
3858
    else
3859
      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3860
    break;
3861
# endif
3862
3863
  default:
3864
    break;
3865
  }
3866
3867
  return r;
3868
}
3869
#endif
3870
3871
/* setup_tree does the following work.
3872
 1. check empty loop. (set qn->target_empty_info)
3873
 2. expand ignore-case in char class.
3874
 3. set memory status bit flags. (reg->mem_stats)
3875
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3876
 5. find invalid patterns in look-behind.
3877
 6. expand repeated string.
3878
 */
3879
static int
3880
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3881
66.2k
{
3882
66.2k
  int type;
3883
66.2k
  int r = 0;
3884
3885
66.2k
restart:
3886
66.2k
  type = NTYPE(node);
3887
66.2k
  switch (type) {
3888
9.24k
  case NT_LIST:
3889
9.24k
    {
3890
9.24k
      Node* prev = NULL_NODE;
3891
38.1k
      do {
3892
38.1k
  r = setup_tree(NCAR(node), reg, state, env);
3893
38.1k
  if (IS_NOT_NULL(prev) && r == 0) {
3894
28.8k
    r = next_setup(prev, NCAR(node), reg);
3895
28.8k
  }
3896
38.1k
  prev = NCAR(node);
3897
38.1k
      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3898
9.24k
    }
3899
9.24k
    break;
3900
3901
2.31k
  case NT_ALT:
3902
5.39k
    do {
3903
5.39k
      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3904
5.39k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3905
2.31k
    break;
3906
3907
9.24k
  case NT_CCLASS:
3908
9.24k
    break;
3909
3910
16.5k
  case NT_STR:
3911
16.5k
    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3912
0
      r = expand_case_fold_string(node, reg, state);
3913
0
    }
3914
16.5k
    break;
3915
3916
0
  case NT_CTYPE:
3917
3.85k
  case NT_CANY:
3918
3.85k
    break;
3919
3920
0
#ifdef USE_SUBEXP_CALL
3921
0
  case NT_CALL:
3922
0
    break;
3923
0
#endif
3924
3925
0
  case NT_BREF:
3926
0
    {
3927
0
      int i;
3928
0
      int* p;
3929
0
      Node** nodes = SCANENV_MEM_NODES(env);
3930
0
      BRefNode* br = NBREF(node);
3931
0
      p = BACKREFS_P(br);
3932
0
      for (i = 0; i < br->back_num; i++) {
3933
0
  if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3934
0
  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3935
0
  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3936
0
#ifdef USE_BACKREF_WITH_LEVEL
3937
0
  if (IS_BACKREF_NEST_LEVEL(br)) {
3938
0
    BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3939
0
  }
3940
0
#endif
3941
0
  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3942
0
      }
3943
0
    }
3944
0
    break;
3945
3946
11.5k
  case NT_QTFR:
3947
11.5k
    {
3948
11.5k
      OnigDistance d;
3949
11.5k
      QtfrNode* qn = NQTFR(node);
3950
11.5k
      Node* target = qn->target;
3951
3952
11.5k
      if ((state & IN_REPEAT) != 0) {
3953
770
  qn->state |= NST_IN_REPEAT;
3954
770
      }
3955
3956
11.5k
      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3957
11.5k
  r = get_min_match_length(target, &d, env);
3958
11.5k
  if (r) break;
3959
11.5k
  if (d == 0) {
3960
0
    qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3961
0
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3962
0
    r = quantifiers_memory_node_info(target);
3963
0
    if (r < 0) break;
3964
0
    if (r > 0) {
3965
0
      qn->target_empty_info = r;
3966
0
    }
3967
0
#endif
3968
#if 0
3969
    r = get_max_match_length(target, &d, env);
3970
    if (r == 0 && d == 0) {
3971
      /*  ()* ==> ()?, ()+ ==> ()  */
3972
      qn->upper = 1;
3973
      if (qn->lower > 1) qn->lower = 1;
3974
      if (NTYPE(target) == NT_STR) {
3975
        qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3976
      }
3977
    }
3978
#endif
3979
0
  }
3980
11.5k
      }
3981
3982
11.5k
      state |= IN_REPEAT;
3983
11.5k
      if (qn->lower != qn->upper)
3984
11.5k
  state |= IN_VAR_REPEAT;
3985
11.5k
      r = setup_tree(target, reg, state, env);
3986
11.5k
      if (r) break;
3987
3988
      /* expand string */
3989
11.5k
#define EXPAND_STRING_MAX_LENGTH  100
3990
11.5k
      if (NTYPE(target) == NT_STR) {
3991
385
  if (qn->lower > 1) {
3992
0
    int i, n = qn->lower;
3993
0
    OnigDistance len = NSTRING_LEN(target);
3994
0
    StrNode* sn = NSTR(target);
3995
0
    Node* np;
3996
3997
0
    np = onig_node_new_str(sn->s, sn->end);
3998
0
    if (IS_NULL(np)) return ONIGERR_MEMORY;
3999
0
    NSTR(np)->flag = sn->flag;
4000
4001
0
    for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4002
0
      r = onig_node_str_cat(np, sn->s, sn->end);
4003
0
      if (r) {
4004
0
        onig_node_free(np);
4005
0
        return r;
4006
0
      }
4007
0
    }
4008
0
    if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4009
0
      Node *np1, *np2;
4010
4011
0
      qn->lower -= i;
4012
0
      if (! IS_REPEAT_INFINITE(qn->upper))
4013
0
        qn->upper -= i;
4014
4015
0
      np1 = onig_node_new_list(np, NULL);
4016
0
      if (IS_NULL(np1)) {
4017
0
        onig_node_free(np);
4018
0
        return ONIGERR_MEMORY;
4019
0
      }
4020
0
      swap_node(np1, node);
4021
0
      np2 = onig_node_list_add(node, np1);
4022
0
      if (IS_NULL(np2)) {
4023
0
        onig_node_free(np1);
4024
0
        return ONIGERR_MEMORY;
4025
0
      }
4026
0
    }
4027
0
    else {
4028
0
      swap_node(np, node);
4029
0
      onig_node_free(np);
4030
0
    }
4031
0
    break; /* break case NT_QTFR: */
4032
0
  }
4033
385
      }
4034
4035
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4036
      if (qn->greedy && (qn->target_empty_info != 0)) {
4037
  if (NTYPE(target) == NT_QTFR) {
4038
    QtfrNode* tqn = NQTFR(target);
4039
    if (IS_NOT_NULL(tqn->head_exact)) {
4040
      qn->head_exact  = tqn->head_exact;
4041
      tqn->head_exact = NULL;
4042
    }
4043
  }
4044
  else {
4045
    qn->head_exact = get_head_value_node(qn->target, 1, reg);
4046
  }
4047
      }
4048
#endif
4049
11.5k
    }
4050
11.5k
    break;
4051
4052
11.5k
  case NT_ENCLOSE:
4053
2.31k
    {
4054
2.31k
      EncloseNode* en = NENCLOSE(node);
4055
4056
2.31k
      switch (en->type) {
4057
385
      case ENCLOSE_OPTION:
4058
385
  {
4059
385
    OnigOptionType options = reg->options;
4060
385
    reg->options = NENCLOSE(node)->option;
4061
385
    r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4062
385
    reg->options = options;
4063
385
  }
4064
385
  break;
4065
4066
1.92k
      case ENCLOSE_MEMORY:
4067
1.92k
  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4068
0
    BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4069
    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4070
0
  }
4071
1.92k
  if (IS_ENCLOSE_CALLED(en))
4072
0
    state |= IN_CALL;
4073
1.92k
  if (IS_ENCLOSE_RECURSION(en))
4074
0
    state |= IN_RECCALL;
4075
1.92k
  else if ((state & IN_RECCALL) != 0)
4076
0
    SET_CALL_RECURSION(node);
4077
1.92k
  r = setup_tree(en->target, reg, state, env);
4078
1.92k
  break;
4079
4080
0
      case ENCLOSE_STOP_BACKTRACK:
4081
0
  {
4082
0
    Node* target = en->target;
4083
0
    r = setup_tree(target, reg, state, env);
4084
0
    if (NTYPE(target) == NT_QTFR) {
4085
0
      QtfrNode* tqn = NQTFR(target);
4086
0
      if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4087
0
    tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4088
0
        int qtype = NTYPE(tqn->target);
4089
0
        if (IS_NODE_TYPE_SIMPLE(qtype))
4090
0
    SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4091
0
      }
4092
0
    }
4093
0
  }
4094
0
  break;
4095
4096
0
      case ENCLOSE_CONDITION:
4097
0
#ifdef USE_NAMED_GROUP
4098
0
  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4099
0
      env->num_named > 0 &&
4100
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4101
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4102
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4103
0
  }
4104
0
#endif
4105
0
  if (NENCLOSE(node)->regnum > env->num_mem)
4106
0
    return ONIGERR_INVALID_BACKREF;
4107
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4108
0
  break;
4109
4110
0
      case ENCLOSE_ABSENT:
4111
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4112
0
  break;
4113
2.31k
      }
4114
2.31k
    }
4115
2.31k
    break;
4116
4117
11.1k
  case NT_ANCHOR:
4118
11.1k
    {
4119
11.1k
      AnchorNode* an = NANCHOR(node);
4120
4121
11.1k
      switch (an->type) {
4122
0
      case ANCHOR_PREC_READ:
4123
0
  r = setup_tree(an->target, reg, state, env);
4124
0
  break;
4125
0
      case ANCHOR_PREC_READ_NOT:
4126
0
  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4127
0
  break;
4128
4129
/* allowed node types in look-behind */
4130
0
#define ALLOWED_TYPE_IN_LB  \
4131
0
  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4132
0
    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4133
4134
0
#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4135
0
#define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
4136
4137
0
#define ALLOWED_ANCHOR_IN_LB \
4138
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4139
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4140
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4141
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4142
0
#define ALLOWED_ANCHOR_IN_LB_NOT \
4143
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4144
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4145
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4146
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4147
4148
0
      case ANCHOR_LOOK_BEHIND:
4149
0
  {
4150
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4151
0
            ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4152
0
    if (r < 0) return r;
4153
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4154
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4155
0
    r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4156
0
    if (r != 0) return r;
4157
0
    r = setup_look_behind(node, reg, env);
4158
0
  }
4159
0
  break;
4160
4161
0
      case ANCHOR_LOOK_BEHIND_NOT:
4162
0
  {
4163
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4164
0
          ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4165
0
    if (r < 0) return r;
4166
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4167
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4168
0
    r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4169
0
       env);
4170
0
    if (r != 0) return r;
4171
0
    r = setup_look_behind(node, reg, env);
4172
0
  }
4173
0
  break;
4174
11.1k
      }
4175
11.1k
    }
4176
11.1k
    break;
4177
4178
11.1k
  default:
4179
0
    break;
4180
66.2k
  }
4181
4182
66.2k
  return r;
4183
66.2k
}
4184
4185
/* set skip map for Sunday's quick search */
4186
static int
4187
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4188
      UChar skip[], int ignore_case)
4189
3.08k
{
4190
3.08k
  OnigDistance i, len;
4191
3.08k
  int clen, flen, n, j, k;
4192
3.08k
  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4193
3.08k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4194
3.08k
  OnigEncoding enc = reg->enc;
4195
4196
3.08k
  len = end - s;
4197
3.08k
  if (len >= ONIG_CHAR_TABLE_SIZE) {
4198
    /* This should not happen. */
4199
0
    return ONIGERR_TYPE_BUG;
4200
0
  }
4201
4202
3.08k
  if (ignore_case) {
4203
0
    for (i = 0; i < len; i += clen) {
4204
0
      p = s + i;
4205
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4206
0
    p, end, items);
4207
0
      clen = enclen(enc, p, end);
4208
0
      if (p + clen > end)
4209
0
  clen = (int )(end - p);
4210
4211
0
      for (j = 0; j < n; j++) {
4212
0
  if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4213
    /* Different length isn't supported. Stop optimization at here. */
4214
0
    end = p;
4215
0
    goto endcheck;
4216
0
  }
4217
0
  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4218
0
  if (flen != clen) {
4219
    /* Different length isn't supported. Stop optimization at here. */
4220
0
    end = p;
4221
0
    goto endcheck;
4222
0
  }
4223
0
      }
4224
0
    }
4225
0
endcheck:
4226
0
    len = end - s;
4227
0
  }
4228
4229
791k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4230
788k
    skip[i] = (UChar )(len + 1);
4231
3.08k
  n = 0;
4232
47.7k
  for (i = 0; i < len; i += clen) {
4233
44.6k
    p = s + i;
4234
44.6k
    if (ignore_case)
4235
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4236
44.6k
               p, end, items);
4237
44.6k
    clen = enclen(enc, p, end);
4238
44.6k
    if (p + clen > end)
4239
0
      clen = (int )(end - p);
4240
4241
89.3k
    for (j = 0; j < clen; j++) {
4242
44.6k
      skip[s[i + j]] = (UChar )(len - i - j);
4243
44.6k
      for (k = 0; k < n; k++) {
4244
0
  ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4245
0
  skip[buf[j]] = (UChar )(len - i - j);
4246
0
      }
4247
44.6k
    }
4248
44.6k
  }
4249
4250
3.08k
  return (int )len;
4251
3.08k
}
4252
4253
typedef struct {
4254
  OnigDistance min;  /* min byte length */
4255
  OnigDistance max;  /* max byte length */
4256
} MinMaxLen;
4257
4258
typedef struct {
4259
  MinMaxLen        mmd;
4260
  OnigEncoding     enc;
4261
  OnigOptionType   options;
4262
  OnigCaseFoldType case_fold_flag;
4263
  ScanEnv*         scan_env;
4264
} OptEnv;
4265
4266
typedef struct {
4267
  int left_anchor;
4268
  int right_anchor;
4269
} OptAncInfo;
4270
4271
typedef struct {
4272
  MinMaxLen  mmd; /* info position */
4273
  OptAncInfo anc;
4274
4275
  int   reach_end;
4276
  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
4277
  int   len;
4278
  UChar s[OPT_EXACT_MAXLEN];
4279
} OptExactInfo;
4280
4281
typedef struct {
4282
  MinMaxLen mmd; /* info position */
4283
  OptAncInfo anc;
4284
4285
  int   value;      /* weighted value */
4286
  UChar map[ONIG_CHAR_TABLE_SIZE];
4287
} OptMapInfo;
4288
4289
typedef struct {
4290
  MinMaxLen    len;
4291
4292
  OptAncInfo   anc;
4293
  OptExactInfo exb;    /* boundary */
4294
  OptExactInfo exm;    /* middle */
4295
  OptExactInfo expr;   /* prec read (?=...) */
4296
4297
  OptMapInfo   map;   /* boundary */
4298
} NodeOptInfo;
4299
4300
4301
static int
4302
map_position_value(OnigEncoding enc, int i)
4303
56.9k
{
4304
56.9k
  static const short int ByteValTable[] = {
4305
56.9k
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4306
56.9k
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4307
56.9k
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4308
56.9k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4309
56.9k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4310
56.9k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4311
56.9k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4312
56.9k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4313
56.9k
  };
4314
4315
56.9k
  if (i < numberof(ByteValTable)) {
4316
56.9k
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4317
0
      return 20;
4318
56.9k
    else
4319
56.9k
      return (int )ByteValTable[i];
4320
56.9k
  }
4321
0
  else
4322
0
    return 4;   /* Take it easy. */
4323
56.9k
}
4324
4325
static int
4326
distance_value(MinMaxLen* mm)
4327
38.5k
{
4328
  /* 1000 / (min-max-dist + 1) */
4329
38.5k
  static const short int dist_vals[] = {
4330
38.5k
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4331
38.5k
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4332
38.5k
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4333
38.5k
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4334
38.5k
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4335
38.5k
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4336
38.5k
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4337
38.5k
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4338
38.5k
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4339
38.5k
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4340
38.5k
  };
4341
4342
38.5k
  OnigDistance d;
4343
4344
38.5k
  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4345
4346
14.2k
  d = mm->max - mm->min;
4347
14.2k
  if (d < numberof(dist_vals))
4348
    /* return dist_vals[d] * 16 / (mm->min + 12); */
4349
14.2k
    return (int )dist_vals[d];
4350
0
  else
4351
0
    return 1;
4352
14.2k
}
4353
4354
static int
4355
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4356
19.2k
{
4357
19.2k
  if (v2 <= 0) return -1;
4358
19.2k
  if (v1 <= 0) return  1;
4359
4360
19.2k
  v1 *= distance_value(d1);
4361
19.2k
  v2 *= distance_value(d2);
4362
4363
19.2k
  if (v2 > v1) return  1;
4364
17.7k
  if (v2 < v1) return -1;
4365
4366
9.24k
  if (d2->min < d1->min) return  1;
4367
8.85k
  if (d2->min > d1->min) return -1;
4368
1.92k
  return 0;
4369
8.85k
}
4370
4371
static int
4372
is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4373
2.69k
{
4374
2.69k
  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4375
2.69k
}
4376
4377
4378
static void
4379
set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4380
41.1k
{
4381
41.1k
  mml->min = min;
4382
41.1k
  mml->max = max;
4383
41.1k
}
4384
4385
static void
4386
clear_mml(MinMaxLen* mml)
4387
301k
{
4388
301k
  mml->min = mml->max = 0;
4389
301k
}
4390
4391
static void
4392
copy_mml(MinMaxLen* to, MinMaxLen* from)
4393
213k
{
4394
213k
  to->min = from->min;
4395
213k
  to->max = from->max;
4396
213k
}
4397
4398
static void
4399
add_mml(MinMaxLen* to, MinMaxLen* from)
4400
76.2k
{
4401
76.2k
  to->min = distance_add(to->min, from->min);
4402
76.2k
  to->max = distance_add(to->max, from->max);
4403
76.2k
}
4404
4405
#if 0
4406
static void
4407
add_len_mml(MinMaxLen* to, OnigDistance len)
4408
{
4409
  to->min = distance_add(to->min, len);
4410
  to->max = distance_add(to->max, len);
4411
}
4412
#endif
4413
4414
static void
4415
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4416
6.16k
{
4417
6.16k
  if (to->min > from->min) to->min = from->min;
4418
6.16k
  if (to->max < from->max) to->max = from->max;
4419
6.16k
}
4420
4421
static void
4422
copy_opt_env(OptEnv* to, OptEnv* from)
4423
9.24k
{
4424
9.24k
  *to = *from;
4425
9.24k
}
4426
4427
static void
4428
clear_opt_anc_info(OptAncInfo* anc)
4429
333k
{
4430
333k
  anc->left_anchor  = 0;
4431
333k
  anc->right_anchor = 0;
4432
333k
}
4433
4434
static void
4435
copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4436
40.8k
{
4437
40.8k
  *to = *from;
4438
40.8k
}
4439
4440
static void
4441
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4442
        OnigDistance left_len, OnigDistance right_len)
4443
40.8k
{
4444
40.8k
  clear_opt_anc_info(to);
4445
4446
40.8k
  to->left_anchor = left->left_anchor;
4447
40.8k
  if (left_len == 0) {
4448
19.6k
    to->left_anchor |= right->left_anchor;
4449
19.6k
  }
4450
4451
40.8k
  to->right_anchor = right->right_anchor;
4452
40.8k
  if (right_len == 0) {
4453
11.5k
    to->right_anchor |= left->right_anchor;
4454
11.5k
  }
4455
29.2k
  else {
4456
29.2k
    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4457
29.2k
  }
4458
40.8k
}
4459
4460
static int
4461
is_left_anchor(int anc)
4462
10.7k
{
4463
10.7k
  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4464
10.7k
      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4465
10.7k
      anc == ANCHOR_PREC_READ_NOT)
4466
3.08k
    return 0;
4467
4468
7.70k
  return 1;
4469
10.7k
}
4470
4471
static int
4472
is_set_opt_anc_info(OptAncInfo* to, int anc)
4473
1.92k
{
4474
1.92k
  if ((to->left_anchor & anc) != 0) return 1;
4475
4476
1.92k
  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4477
1.92k
}
4478
4479
static void
4480
add_opt_anc_info(OptAncInfo* to, int anc)
4481
10.7k
{
4482
10.7k
  if (is_left_anchor(anc))
4483
7.70k
    to->left_anchor |= anc;
4484
3.08k
  else
4485
3.08k
    to->right_anchor |= anc;
4486
10.7k
}
4487
4488
static void
4489
remove_opt_anc_info(OptAncInfo* to, int anc)
4490
0
{
4491
0
  if (is_left_anchor(anc))
4492
0
    to->left_anchor &= ~anc;
4493
0
  else
4494
0
    to->right_anchor &= ~anc;
4495
0
}
4496
4497
static void
4498
alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4499
8.47k
{
4500
8.47k
  to->left_anchor  &= add->left_anchor;
4501
8.47k
  to->right_anchor &= add->right_anchor;
4502
8.47k
}
4503
4504
static int
4505
is_full_opt_exact_info(OptExactInfo* ex)
4506
0
{
4507
0
  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4508
0
}
4509
4510
static void
4511
clear_opt_exact_info(OptExactInfo* ex)
4512
220k
{
4513
220k
  clear_mml(&ex->mmd);
4514
220k
  clear_opt_anc_info(&ex->anc);
4515
220k
  ex->reach_end   = 0;
4516
220k
  ex->ignore_case = -1;   /* unset */
4517
220k
  ex->len         = 0;
4518
220k
  ex->s[0]        = '\0';
4519
220k
}
4520
4521
static void
4522
copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4523
13.0k
{
4524
13.0k
  *to = *from;
4525
13.0k
}
4526
4527
static void
4528
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4529
385
{
4530
385
  int i, j, len;
4531
385
  UChar *p, *end;
4532
385
  OptAncInfo tanc;
4533
4534
385
  if (to->ignore_case < 0)
4535
0
    to->ignore_case = add->ignore_case;
4536
385
  else if (to->ignore_case != add->ignore_case)
4537
0
    return ;  /* avoid */
4538
4539
385
  p = add->s;
4540
385
  end = p + add->len;
4541
1.54k
  for (i = to->len; p < end; ) {
4542
1.15k
    len = enclen(enc, p, end);
4543
1.15k
    if (i + len > OPT_EXACT_MAXLEN) break;
4544
2.31k
    for (j = 0; j < len && p < end; j++)
4545
1.15k
      to->s[i++] = *p++;
4546
1.15k
  }
4547
4548
385
  to->len = i;
4549
385
  to->reach_end = (p == end ? add->reach_end : 0);
4550
4551
385
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4552
385
  if (! to->reach_end) tanc.right_anchor = 0;
4553
385
  copy_opt_anc_info(&to->anc, &tanc);
4554
385
}
4555
4556
static void
4557
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4558
        int raw ARG_UNUSED, OnigEncoding enc)
4559
16.5k
{
4560
16.5k
  int i, j, len;
4561
16.5k
  UChar *p;
4562
4563
136k
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4564
120k
    len = enclen(enc, p, end);
4565
120k
    if (i + len > OPT_EXACT_MAXLEN) break;
4566
240k
    for (j = 0; j < len && p < end; j++)
4567
120k
      to->s[i++] = *p++;
4568
120k
  }
4569
4570
16.5k
  to->len = i;
4571
16.5k
}
4572
4573
static void
4574
alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4575
9.24k
{
4576
9.24k
  int i, j, len;
4577
4578
9.24k
  if (add->len == 0 || to->len == 0) {
4579
6.54k
    clear_opt_exact_info(to);
4580
6.54k
    return ;
4581
6.54k
  }
4582
4583
2.69k
  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4584
385
    clear_opt_exact_info(to);
4585
385
    return ;
4586
385
  }
4587
4588
3.85k
  for (i = 0; i < to->len && i < add->len; ) {
4589
3.85k
    if (to->s[i] != add->s[i]) break;
4590
1.54k
    len = enclen(env->enc, to->s + i, to->s + to->len);
4591
4592
1.54k
    for (j = 1; j < len; j++) {
4593
0
      if (to->s[i+j] != add->s[i+j]) break;
4594
0
    }
4595
1.54k
    if (j < len) break;
4596
1.54k
    i += len;
4597
1.54k
  }
4598
4599
2.31k
  if (! add->reach_end || i < add->len || i < to->len) {
4600
2.31k
    to->reach_end = 0;
4601
2.31k
  }
4602
2.31k
  to->len = i;
4603
2.31k
  if (to->ignore_case < 0)
4604
0
    to->ignore_case = add->ignore_case;
4605
2.31k
  else if (add->ignore_case >= 0)
4606
2.31k
    to->ignore_case |= add->ignore_case;
4607
4608
2.31k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4609
2.31k
  if (! to->reach_end) to->anc.right_anchor = 0;
4610
2.31k
}
4611
4612
static void
4613
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4614
82.3k
{
4615
82.3k
  int v1, v2;
4616
4617
82.3k
  v1 = now->len;
4618
82.3k
  v2 = alt->len;
4619
4620
82.3k
  if (v2 == 0) {
4621
65.8k
    return ;
4622
65.8k
  }
4623
16.5k
  else if (v1 == 0) {
4624
13.0k
    copy_opt_exact_info(now, alt);
4625
13.0k
    return ;
4626
13.0k
  }
4627
3.46k
  else if (v1 <= 2 && v2 <= 2) {
4628
    /* ByteValTable[x] is big value --> low price */
4629
385
    v2 = map_position_value(enc, now->s[0]);
4630
385
    v1 = map_position_value(enc, alt->s[0]);
4631
4632
385
    if (now->len > 1) v1 += 5;
4633
385
    if (alt->len > 1) v2 += 5;
4634
385
  }
4635
4636
3.46k
  if (now->ignore_case <= 0) v1 *= 2;
4637
3.46k
  if (alt->ignore_case <= 0) v2 *= 2;
4638
4639
3.46k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4640
0
    copy_opt_exact_info(now, alt);
4641
3.46k
}
4642
4643
static void
4644
clear_opt_map_info(OptMapInfo* map)
4645
71.2k
{
4646
71.2k
  static const OptMapInfo clean_info = {
4647
71.2k
    {0, 0}, {0, 0}, 0,
4648
71.2k
    {
4649
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4650
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4651
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4652
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4653
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4654
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4655
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4656
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4657
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4658
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4659
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4660
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4661
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4662
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4663
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4664
71.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4665
71.2k
    }
4666
71.2k
  };
4667
4668
71.2k
  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4669
71.2k
}
4670
4671
static void
4672
copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4673
8.08k
{
4674
8.08k
  *to = *from;
4675
8.08k
}
4676
4677
static void
4678
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4679
50.4k
{
4680
50.4k
  if (map->map[c] == 0) {
4681
50.4k
    map->map[c] = 1;
4682
50.4k
    map->value += map_position_value(enc, c);
4683
50.4k
  }
4684
50.4k
}
4685
4686
static int
4687
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4688
                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4689
0
{
4690
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4691
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4692
0
  int i, n;
4693
4694
0
  add_char_opt_map_info(map, p[0], enc);
4695
4696
0
  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4697
0
  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4698
0
  if (n < 0) return n;
4699
4700
0
  for (i = 0; i < n; i++) {
4701
0
    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4702
0
    add_char_opt_map_info(map, buf[0], enc);
4703
0
  }
4704
4705
0
  return 0;
4706
0
}
4707
4708
static void
4709
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4710
38.1k
{
4711
38.1k
  const int z = 1<<15; /* 32768: something big value */
4712
4713
38.1k
  int v1, v2;
4714
4715
38.1k
  if (alt->value == 0) return ;
4716
17.7k
  if (now->value == 0) {
4717
8.08k
    copy_opt_map_info(now, alt);
4718
8.08k
    return ;
4719
8.08k
  }
4720
4721
9.62k
  v1 = z / now->value;
4722
9.62k
  v2 = z / alt->value;
4723
9.62k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4724
0
    copy_opt_map_info(now, alt);
4725
9.62k
}
4726
4727
static int
4728
comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4729
6.16k
{
4730
12.3k
#define COMP_EM_BASE  20
4731
6.16k
  int ve, vm;
4732
4733
6.16k
  if (m->value <= 0) return -1;
4734
4735
6.16k
  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4736
6.16k
  vm = COMP_EM_BASE * 5 * 2 / m->value;
4737
6.16k
  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4738
6.16k
}
4739
4740
static void
4741
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4742
3.08k
{
4743
3.08k
  int i, val;
4744
4745
  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4746
3.08k
  if (to->value == 0) return ;
4747
3.08k
  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4748
0
    clear_opt_map_info(to);
4749
0
    return ;
4750
0
  }
4751
4752
3.08k
  alt_merge_mml(&to->mmd, &add->mmd);
4753
4754
3.08k
  val = 0;
4755
791k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4756
788k
    if (add->map[i])
4757
3.08k
      to->map[i] = 1;
4758
4759
788k
    if (to->map[i])
4760
5.77k
      val += map_position_value(enc, i);
4761
788k
  }
4762
3.08k
  to->value = val;
4763
4764
3.08k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4765
3.08k
}
4766
4767
static void
4768
set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4769
71.2k
{
4770
71.2k
  copy_mml(&(opt->exb.mmd),  mmd);
4771
71.2k
  copy_mml(&(opt->expr.mmd), mmd);
4772
71.2k
  copy_mml(&(opt->map.mmd),  mmd);
4773
71.2k
}
4774
4775
static void
4776
clear_node_opt_info(NodeOptInfo* opt)
4777
71.2k
{
4778
71.2k
  clear_mml(&opt->len);
4779
71.2k
  clear_opt_anc_info(&opt->anc);
4780
71.2k
  clear_opt_exact_info(&opt->exb);
4781
71.2k
  clear_opt_exact_info(&opt->exm);
4782
71.2k
  clear_opt_exact_info(&opt->expr);
4783
71.2k
  clear_opt_map_info(&opt->map);
4784
71.2k
}
4785
4786
static void
4787
copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4788
9.24k
{
4789
9.24k
  *to = *from;
4790
9.24k
}
4791
4792
static void
4793
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4794
38.1k
{
4795
38.1k
  int exb_reach, exm_reach;
4796
38.1k
  OptAncInfo tanc;
4797
4798
38.1k
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4799
38.1k
  copy_opt_anc_info(&to->anc, &tanc);
4800
4801
38.1k
  if (add->exb.len > 0 && to->len.max == 0) {
4802
2.31k
    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4803
2.31k
      to->len.max, add->len.max);
4804
2.31k
    copy_opt_anc_info(&add->exb.anc, &tanc);
4805
2.31k
  }
4806
4807
38.1k
  if (add->map.value > 0 && to->len.max == 0) {
4808
4.23k
    if (add->map.mmd.max == 0)
4809
4.23k
      add->map.anc.left_anchor |= to->anc.left_anchor;
4810
4.23k
  }
4811
4812
38.1k
  exb_reach = to->exb.reach_end;
4813
38.1k
  exm_reach = to->exm.reach_end;
4814
4815
38.1k
  if (add->len.max != 0)
4816
26.5k
    to->exb.reach_end = to->exm.reach_end = 0;
4817
4818
38.1k
  if (add->exb.len > 0) {
4819
11.1k
    if (exb_reach) {
4820
0
      concat_opt_exact_info(&to->exb, &add->exb, enc);
4821
0
      clear_opt_exact_info(&add->exb);
4822
0
    }
4823
11.1k
    else if (exm_reach) {
4824
385
      concat_opt_exact_info(&to->exm, &add->exb, enc);
4825
385
      clear_opt_exact_info(&add->exb);
4826
385
    }
4827
11.1k
  }
4828
38.1k
  select_opt_exact_info(enc, &to->exm, &add->exb);
4829
38.1k
  select_opt_exact_info(enc, &to->exm, &add->exm);
4830
4831
38.1k
  if (to->expr.len > 0) {
4832
0
    if (add->len.max > 0) {
4833
0
      if (to->expr.len > (int )add->len.max)
4834
0
  to->expr.len = (int )add->len.max;
4835
4836
0
      if (to->expr.mmd.max == 0)
4837
0
  select_opt_exact_info(enc, &to->exb, &to->expr);
4838
0
      else
4839
0
  select_opt_exact_info(enc, &to->exm, &to->expr);
4840
0
    }
4841
0
  }
4842
38.1k
  else if (add->expr.len > 0) {
4843
0
    copy_opt_exact_info(&to->expr, &add->expr);
4844
0
  }
4845
4846
38.1k
  select_opt_map_info(&to->map, &add->map);
4847
4848
38.1k
  add_mml(&to->len, &add->len);
4849
38.1k
}
4850
4851
static void
4852
alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4853
3.08k
{
4854
3.08k
  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4855
3.08k
  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4856
3.08k
  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4857
3.08k
  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4858
3.08k
  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4859
4860
3.08k
  alt_merge_mml(&to->len, &add->len);
4861
3.08k
}
4862
4863
4864
1.92k
#define MAX_NODE_OPT_INFO_REF_COUNT    5
4865
4866
static int
4867
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4868
71.2k
{
4869
71.2k
  int type;
4870
71.2k
  int r = 0;
4871
4872
71.2k
  clear_node_opt_info(opt);
4873
71.2k
  set_bound_node_opt_info(opt, &env->mmd);
4874
4875
71.2k
  type = NTYPE(node);
4876
71.2k
  switch (type) {
4877
9.24k
  case NT_LIST:
4878
9.24k
    {
4879
9.24k
      OptEnv nenv;
4880
9.24k
      NodeOptInfo nopt;
4881
9.24k
      Node* nd = node;
4882
4883
9.24k
      copy_opt_env(&nenv, env);
4884
38.1k
      do {
4885
38.1k
  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4886
38.1k
  if (r == 0) {
4887
38.1k
    add_mml(&nenv.mmd, &nopt.len);
4888
38.1k
    concat_left_node_opt_info(env->enc, opt, &nopt);
4889
38.1k
  }
4890
38.1k
      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4891
9.24k
    }
4892
9.24k
    break;
4893
4894
2.31k
  case NT_ALT:
4895
2.31k
    {
4896
2.31k
      NodeOptInfo nopt;
4897
2.31k
      Node* nd = node;
4898
4899
5.39k
      do {
4900
5.39k
  r = optimize_node_left(NCAR(nd), &nopt, env);
4901
5.39k
  if (r == 0) {
4902
5.39k
    if (nd == node) copy_node_opt_info(opt, &nopt);
4903
3.08k
    else            alt_merge_node_opt_info(opt, &nopt, env);
4904
5.39k
  }
4905
5.39k
      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4906
2.31k
    }
4907
2.31k
    break;
4908
4909
16.5k
  case NT_STR:
4910
16.5k
    {
4911
16.5k
      StrNode* sn = NSTR(node);
4912
16.5k
      OnigDistance slen = sn->end - sn->s;
4913
16.5k
      int is_raw = NSTRING_IS_RAW(node);
4914
4915
16.5k
      if (! NSTRING_IS_AMBIG(node)) {
4916
16.5k
  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4917
16.5k
          is_raw, env->enc);
4918
16.5k
  opt->exb.ignore_case = 0;
4919
16.5k
  if (slen > 0) {
4920
16.1k
    add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4921
16.1k
  }
4922
16.5k
  set_mml(&opt->len, slen, slen);
4923
16.5k
      }
4924
0
      else {
4925
0
  OnigDistance max;
4926
4927
0
  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4928
0
    int n = onigenc_strlen(env->enc, sn->s, sn->end);
4929
0
    max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4930
0
  }
4931
0
  else {
4932
0
    concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4933
0
            is_raw, env->enc);
4934
0
    opt->exb.ignore_case = 1;
4935
4936
0
    if (slen > 0) {
4937
0
      r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4938
0
            env->enc, env->case_fold_flag);
4939
0
      if (r != 0) break;
4940
0
    }
4941
4942
0
    max = slen;
4943
0
  }
4944
4945
0
  set_mml(&opt->len, slen, max);
4946
0
      }
4947
4948
16.5k
      if ((OnigDistance )opt->exb.len == slen)
4949
15.0k
  opt->exb.reach_end = 1;
4950
16.5k
    }
4951
0
    break;
4952
4953
9.24k
  case NT_CCLASS:
4954
9.24k
    {
4955
9.24k
      int i, z;
4956
9.24k
      CClassNode* cc = NCCLASS(node);
4957
4958
      /* no need to check ignore case. (set in setup_tree()) */
4959
4960
9.24k
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4961
2.31k
  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4962
2.31k
  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4963
4964
2.31k
  set_mml(&opt->len, min, max);
4965
2.31k
      }
4966
6.93k
      else {
4967
1.78M
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4968
1.77M
    z = BITSET_AT(cc->bs, i);
4969
1.77M
    if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4970
34.2k
      add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4971
34.2k
    }
4972
1.77M
  }
4973
6.93k
  set_mml(&opt->len, 1, 1);
4974
6.93k
      }
4975
9.24k
    }
4976
9.24k
    break;
4977
4978
0
  case NT_CTYPE:
4979
0
    {
4980
0
      int i, min, max;
4981
0
      int maxcode;
4982
4983
0
      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4984
4985
0
      if (max == 1) {
4986
0
  min = 1;
4987
4988
0
  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4989
0
  switch (NCTYPE(node)->ctype) {
4990
0
  case ONIGENC_CTYPE_WORD:
4991
0
    if (NCTYPE(node)->not != 0) {
4992
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4993
0
        if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
4994
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4995
0
        }
4996
0
      }
4997
0
    }
4998
0
    else {
4999
0
      for (i = 0; i < maxcode; i++) {
5000
0
        if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5001
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5002
0
        }
5003
0
      }
5004
0
    }
5005
0
    break;
5006
0
  }
5007
0
      }
5008
0
      else {
5009
0
  min = ONIGENC_MBC_MINLEN(env->enc);
5010
0
      }
5011
0
      set_mml(&opt->len, min, max);
5012
0
    }
5013
0
    break;
5014
5015
3.85k
  case NT_CANY:
5016
3.85k
    {
5017
3.85k
      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5018
3.85k
      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5019
3.85k
      set_mml(&opt->len, min, max);
5020
3.85k
    }
5021
3.85k
    break;
5022
5023
11.1k
  case NT_ANCHOR:
5024
11.1k
    switch (NANCHOR(node)->type) {
5025
0
    case ANCHOR_BEGIN_BUF:
5026
0
    case ANCHOR_BEGIN_POSITION:
5027
7.70k
    case ANCHOR_BEGIN_LINE:
5028
7.70k
    case ANCHOR_END_BUF:
5029
7.70k
    case ANCHOR_SEMI_END_BUF:
5030
10.7k
    case ANCHOR_END_LINE:
5031
10.7k
    case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5032
10.7k
    case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5033
10.7k
      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5034
10.7k
      break;
5035
5036
0
    case ANCHOR_PREC_READ:
5037
0
      {
5038
0
  NodeOptInfo nopt;
5039
5040
0
  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5041
0
  if (r == 0) {
5042
0
    if (nopt.exb.len > 0)
5043
0
      copy_opt_exact_info(&opt->expr, &nopt.exb);
5044
0
    else if (nopt.exm.len > 0)
5045
0
      copy_opt_exact_info(&opt->expr, &nopt.exm);
5046
5047
0
    opt->expr.reach_end = 0;
5048
5049
0
    if (nopt.map.value > 0)
5050
0
      copy_opt_map_info(&opt->map, &nopt.map);
5051
0
  }
5052
0
      }
5053
0
      break;
5054
5055
0
    case ANCHOR_LOOK_BEHIND_NOT:
5056
0
      break;
5057
11.1k
    }
5058
11.1k
    break;
5059
5060
11.1k
  case NT_BREF:
5061
0
    {
5062
0
      int i;
5063
0
      int* backs;
5064
0
      OnigDistance min, max, tmin, tmax;
5065
0
      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5066
0
      BRefNode* br = NBREF(node);
5067
5068
0
      if (br->state & NST_RECURSION) {
5069
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5070
0
  break;
5071
0
      }
5072
0
      backs = BACKREFS_P(br);
5073
0
      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5074
0
      if (r != 0) break;
5075
0
      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5076
0
      if (r != 0) break;
5077
0
      for (i = 1; i < br->back_num; i++) {
5078
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5079
0
  if (r != 0) break;
5080
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5081
0
  if (r != 0) break;
5082
0
  if (min > tmin) min = tmin;
5083
0
  if (max < tmax) max = tmax;
5084
0
      }
5085
0
      if (r == 0) set_mml(&opt->len, min, max);
5086
0
    }
5087
0
    break;
5088
5089
0
#ifdef USE_SUBEXP_CALL
5090
0
  case NT_CALL:
5091
0
    if (IS_CALL_RECURSION(NCALL(node)))
5092
0
      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5093
0
    else {
5094
0
      OnigOptionType save = env->options;
5095
0
      env->options = NENCLOSE(NCALL(node)->target)->option;
5096
0
      r = optimize_node_left(NCALL(node)->target, opt, env);
5097
0
      env->options = save;
5098
0
    }
5099
0
    break;
5100
0
#endif
5101
5102
11.5k
  case NT_QTFR:
5103
11.5k
    {
5104
11.5k
      int i;
5105
11.5k
      OnigDistance min, max;
5106
11.5k
      NodeOptInfo nopt;
5107
11.5k
      QtfrNode* qn = NQTFR(node);
5108
5109
11.5k
      r = optimize_node_left(qn->target, &nopt, env);
5110
11.5k
      if (r) break;
5111
5112
11.5k
      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5113
4.23k
  if (env->mmd.max == 0 &&
5114
4.23k
      NTYPE(qn->target) == NT_CANY && qn->greedy) {
5115
0
    if (IS_MULTILINE(env->options))
5116
      /* implicit anchor: /.*a/ ==> /\A.*a/ */
5117
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5118
0
    else
5119
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5120
0
  }
5121
4.23k
      }
5122
7.31k
      else {
5123
7.31k
  if (qn->lower > 0) {
5124
6.93k
    copy_node_opt_info(opt, &nopt);
5125
6.93k
    if (nopt.exb.len > 0) {
5126
0
      if (nopt.exb.reach_end) {
5127
0
        for (i = 2; i <= qn->lower &&
5128
0
        ! is_full_opt_exact_info(&opt->exb); i++) {
5129
0
    concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5130
0
        }
5131
0
        if (i < qn->lower) {
5132
0
    opt->exb.reach_end = 0;
5133
0
        }
5134
0
      }
5135
0
    }
5136
5137
6.93k
    if (qn->lower != qn->upper) {
5138
6.93k
      opt->exb.reach_end = 0;
5139
6.93k
      opt->exm.reach_end = 0;
5140
6.93k
    }
5141
6.93k
    if (qn->lower > 1)
5142
0
      opt->exm.reach_end = 0;
5143
6.93k
  }
5144
7.31k
      }
5145
5146
11.5k
      min = distance_multiply(nopt.len.min, qn->lower);
5147
11.5k
      if (IS_REPEAT_INFINITE(qn->upper))
5148
11.1k
  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5149
385
      else
5150
385
  max = distance_multiply(nopt.len.max, qn->upper);
5151
5152
11.5k
      set_mml(&opt->len, min, max);
5153
11.5k
    }
5154
0
    break;
5155
5156
7.31k
  case NT_ENCLOSE:
5157
7.31k
    {
5158
7.31k
      EncloseNode* en = NENCLOSE(node);
5159
5160
7.31k
      switch (en->type) {
5161
385
      case ENCLOSE_OPTION:
5162
385
  {
5163
385
    OnigOptionType save = env->options;
5164
5165
385
    env->options = en->option;
5166
385
    r = optimize_node_left(en->target, opt, env);
5167
385
    env->options = save;
5168
385
  }
5169
385
  break;
5170
5171
1.92k
      case ENCLOSE_MEMORY:
5172
1.92k
#ifdef USE_SUBEXP_CALL
5173
1.92k
  en->opt_count++;
5174
1.92k
  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5175
0
    OnigDistance min, max;
5176
5177
0
    min = 0;
5178
0
    max = ONIG_INFINITE_DISTANCE;
5179
0
    if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5180
0
    if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5181
0
    set_mml(&opt->len, min, max);
5182
0
  }
5183
1.92k
  else
5184
1.92k
#endif
5185
1.92k
  {
5186
1.92k
    r = optimize_node_left(en->target, opt, env);
5187
5188
1.92k
    if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5189
0
      if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5190
0
        remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5191
0
    }
5192
1.92k
  }
5193
1.92k
  break;
5194
5195
5.00k
      case ENCLOSE_STOP_BACKTRACK:
5196
5.00k
      case ENCLOSE_CONDITION:
5197
5.00k
  r = optimize_node_left(en->target, opt, env);
5198
5.00k
  break;
5199
5200
0
      case ENCLOSE_ABSENT:
5201
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5202
0
  break;
5203
7.31k
      }
5204
7.31k
    }
5205
7.31k
    break;
5206
5207
7.31k
  default:
5208
#ifdef ONIG_DEBUG
5209
    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5210
      NTYPE(node));
5211
#endif
5212
0
    r = ONIGERR_TYPE_BUG;
5213
0
    break;
5214
71.2k
  }
5215
5216
71.2k
  return r;
5217
71.2k
}
5218
5219
static int
5220
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5221
4.23k
{
5222
4.23k
  int allow_reverse;
5223
5224
4.23k
  if (e->len == 0) return 0;
5225
5226
4.23k
  reg->exact = (UChar* )xmalloc(e->len);
5227
4.23k
  CHECK_NULL_RETURN_MEMERR(reg->exact);
5228
4.23k
  xmemcpy(reg->exact, e->s, e->len);
5229
4.23k
  reg->exact_end = reg->exact + e->len;
5230
5231
4.23k
  allow_reverse =
5232
4.23k
  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5233
5234
4.23k
  if (e->ignore_case > 0) {
5235
0
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5236
0
      e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5237
0
          reg->map, 1);
5238
0
      reg->exact_end = reg->exact + e->len;
5239
0
      if (e->len >= 3) {
5240
0
  reg->optimize = (allow_reverse != 0
5241
0
       ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5242
0
      }
5243
0
      else if (e->len > 0) {
5244
0
  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5245
0
      }
5246
0
      else
5247
0
  return 0;
5248
0
    }
5249
0
    else {
5250
0
      reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5251
0
    }
5252
0
  }
5253
4.23k
  else {
5254
4.23k
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5255
3.08k
      set_bm_skip(reg->exact, reg->exact_end, reg,
5256
3.08k
      reg->map, 0);
5257
3.08k
      reg->optimize = (allow_reverse != 0
5258
3.08k
         ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5259
3.08k
    }
5260
1.15k
    else {
5261
1.15k
      reg->optimize = ONIG_OPTIMIZE_EXACT;
5262
1.15k
    }
5263
4.23k
  }
5264
5265
4.23k
  reg->dmin = e->mmd.min;
5266
4.23k
  reg->dmax = e->mmd.max;
5267
5268
4.23k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5269
4.23k
    reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5270
4.23k
  }
5271
5272
4.23k
  return 0;
5273
4.23k
}
5274
5275
static void
5276
set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5277
3.08k
{
5278
3.08k
  int i;
5279
5280
791k
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5281
788k
    reg->map[i] = m->map[i];
5282
5283
3.08k
  reg->optimize   = ONIG_OPTIMIZE_MAP;
5284
3.08k
  reg->dmin       = m->mmd.min;
5285
3.08k
  reg->dmax       = m->mmd.max;
5286
5287
3.08k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5288
3.08k
    reg->threshold_len = (int )(reg->dmin + 1);
5289
3.08k
  }
5290
3.08k
}
5291
5292
static void
5293
set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5294
7.31k
{
5295
7.31k
  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
5296
7.31k
  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5297
7.31k
}
5298
5299
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5300
static void print_optimize_info(FILE* f, regex_t* reg);
5301
#endif
5302
5303
static int
5304
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5305
8.85k
{
5306
5307
8.85k
  int r;
5308
8.85k
  NodeOptInfo opt;
5309
8.85k
  OptEnv env;
5310
5311
8.85k
  env.enc            = reg->enc;
5312
8.85k
  env.options        = reg->options;
5313
8.85k
  env.case_fold_flag = reg->case_fold_flag;
5314
8.85k
  env.scan_env   = scan_env;
5315
8.85k
  clear_mml(&env.mmd);
5316
5317
8.85k
  r = optimize_node_left(node, &opt, &env);
5318
8.85k
  if (r) return r;
5319
5320
8.85k
  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5321
8.85k
        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5322
8.85k
        ANCHOR_LOOK_BEHIND);
5323
5324
8.85k
  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5325
0
    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5326
5327
8.85k
  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5328
8.85k
  ANCHOR_PREC_READ_NOT);
5329
5330
8.85k
  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5331
0
    reg->anchor_dmin = opt.len.min;
5332
0
    reg->anchor_dmax = opt.len.max;
5333
0
  }
5334
5335
8.85k
  if (opt.exb.len > 0 || opt.exm.len > 0) {
5336
6.16k
    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5337
6.16k
    if (opt.map.value > 0 &&
5338
6.16k
  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5339
1.92k
      goto set_map;
5340
1.92k
    }
5341
4.23k
    else {
5342
4.23k
      r = set_optimize_exact_info(reg, &opt.exb);
5343
4.23k
      set_sub_anchor(reg, &opt.exb.anc);
5344
4.23k
    }
5345
6.16k
  }
5346
2.69k
  else if (opt.map.value > 0) {
5347
3.08k
  set_map:
5348
3.08k
    set_optimize_map_info(reg, &opt.map);
5349
3.08k
    set_sub_anchor(reg, &opt.map.anc);
5350
3.08k
  }
5351
1.54k
  else {
5352
1.54k
    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5353
1.54k
    if (opt.len.max == 0)
5354
770
      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5355
1.54k
  }
5356
5357
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5358
  print_optimize_info(stderr, reg);
5359
#endif
5360
8.85k
  return r;
5361
8.85k
}
5362
5363
static void
5364
clear_optimize_info(regex_t* reg)
5365
8.85k
{
5366
8.85k
  reg->optimize      = ONIG_OPTIMIZE_NONE;
5367
8.85k
  reg->anchor        = 0;
5368
8.85k
  reg->anchor_dmin   = 0;
5369
8.85k
  reg->anchor_dmax   = 0;
5370
8.85k
  reg->sub_anchor    = 0;
5371
8.85k
  reg->exact_end     = (UChar* )NULL;
5372
8.85k
  reg->threshold_len = 0;
5373
8.85k
  if (IS_NOT_NULL(reg->exact)) {
5374
0
    xfree(reg->exact);
5375
0
    reg->exact = (UChar* )NULL;
5376
0
  }
5377
8.85k
}
5378
5379
#ifdef ONIG_DEBUG
5380
5381
static void print_enc_string(FILE* fp, OnigEncoding enc,
5382
           const UChar *s, const UChar *end)
5383
{
5384
  fprintf(fp, "\nPATTERN: /");
5385
5386
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5387
    const UChar *p;
5388
    OnigCodePoint code;
5389
5390
    p = s;
5391
    while (p < end) {
5392
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5393
      if (code >= 0x80) {
5394
  fprintf(fp, " 0x%04x ", (int )code);
5395
      }
5396
      else {
5397
  fputc((int )code, fp);
5398
      }
5399
5400
      p += enclen(enc, p, end);
5401
    }
5402
  }
5403
  else {
5404
    while (s < end) {
5405
      fputc((int )*s, fp);
5406
      s++;
5407
    }
5408
  }
5409
5410
  fprintf(fp, "/ (%s)\n", enc->name);
5411
}
5412
#endif  /* ONIG_DEBUG */
5413
5414
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5415
static void
5416
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5417
{
5418
  if (a == ONIG_INFINITE_DISTANCE)
5419
    fputs("inf", f);
5420
  else
5421
    fprintf(f, "(%"PRIuPTR")", a);
5422
5423
  fputs("-", f);
5424
5425
  if (b == ONIG_INFINITE_DISTANCE)
5426
    fputs("inf", f);
5427
  else
5428
    fprintf(f, "(%"PRIuPTR")", b);
5429
}
5430
5431
static void
5432
print_anchor(FILE* f, int anchor)
5433
{
5434
  int q = 0;
5435
5436
  fprintf(f, "[");
5437
5438
  if (anchor & ANCHOR_BEGIN_BUF) {
5439
    fprintf(f, "begin-buf");
5440
    q = 1;
5441
  }
5442
  if (anchor & ANCHOR_BEGIN_LINE) {
5443
    if (q) fprintf(f, ", ");
5444
    q = 1;
5445
    fprintf(f, "begin-line");
5446
  }
5447
  if (anchor & ANCHOR_BEGIN_POSITION) {
5448
    if (q) fprintf(f, ", ");
5449
    q = 1;
5450
    fprintf(f, "begin-pos");
5451
  }
5452
  if (anchor & ANCHOR_END_BUF) {
5453
    if (q) fprintf(f, ", ");
5454
    q = 1;
5455
    fprintf(f, "end-buf");
5456
  }
5457
  if (anchor & ANCHOR_SEMI_END_BUF) {
5458
    if (q) fprintf(f, ", ");
5459
    q = 1;
5460
    fprintf(f, "semi-end-buf");
5461
  }
5462
  if (anchor & ANCHOR_END_LINE) {
5463
    if (q) fprintf(f, ", ");
5464
    q = 1;
5465
    fprintf(f, "end-line");
5466
  }
5467
  if (anchor & ANCHOR_ANYCHAR_STAR) {
5468
    if (q) fprintf(f, ", ");
5469
    q = 1;
5470
    fprintf(f, "anychar-star");
5471
  }
5472
  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5473
    if (q) fprintf(f, ", ");
5474
    fprintf(f, "anychar-star-ml");
5475
  }
5476
5477
  fprintf(f, "]");
5478
}
5479
5480
static void
5481
print_optimize_info(FILE* f, regex_t* reg)
5482
{
5483
  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5484
                              "EXACT_IC", "MAP",
5485
                              "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5486
5487
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5488
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5489
  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5490
    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5491
  fprintf(f, "\n");
5492
5493
  if (reg->optimize) {
5494
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5495
    fprintf(f, "\n");
5496
  }
5497
  fprintf(f, "\n");
5498
5499
  if (reg->exact) {
5500
    UChar *p;
5501
    fprintf(f, "exact: [");
5502
    for (p = reg->exact; p < reg->exact_end; p++) {
5503
      fputc(*p, f);
5504
    }
5505
    fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5506
  }
5507
  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5508
    int c, i, n = 0;
5509
5510
    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5511
      if (reg->map[i]) n++;
5512
5513
    fprintf(f, "map: n=%d\n", n);
5514
    if (n > 0) {
5515
      c = 0;
5516
      fputc('[', f);
5517
      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5518
  if (reg->map[i] != 0) {
5519
    if (c > 0)  fputs(", ", f);
5520
    c++;
5521
    if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5522
        ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5523
      fputc(i, f);
5524
    else
5525
      fprintf(f, "%d", i);
5526
  }
5527
      }
5528
      fprintf(f, "]\n");
5529
    }
5530
  }
5531
}
5532
#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5533
5534
5535
extern void
5536
onig_free_body(regex_t* reg)
5537
8.85k
{
5538
8.85k
  if (IS_NOT_NULL(reg)) {
5539
8.85k
    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5540
8.85k
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5541
8.85k
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5542
8.85k
    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5543
5544
8.85k
#ifdef USE_NAMED_GROUP
5545
8.85k
    onig_names_free(reg);
5546
8.85k
#endif
5547
8.85k
  }
5548
8.85k
}
5549
5550
extern void
5551
onig_free(regex_t* reg)
5552
8.85k
{
5553
8.85k
  if (IS_NOT_NULL(reg)) {
5554
8.85k
    onig_free_body(reg);
5555
8.85k
    xfree(reg);
5556
8.85k
  }
5557
8.85k
}
5558
5559
#ifdef RUBY
5560
size_t
5561
onig_memsize(const regex_t *reg)
5562
{
5563
    size_t size = sizeof(regex_t);
5564
    if (IS_NULL(reg)) return 0;
5565
    if (IS_NOT_NULL(reg->p))                size += reg->alloc;
5566
    if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
5567
    if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5568
    if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
5569
5570
    return size;
5571
}
5572
5573
size_t
5574
onig_region_memsize(const OnigRegion *regs)
5575
{
5576
    size_t size = sizeof(*regs);
5577
    if (IS_NULL(regs)) return 0;
5578
    size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5579
    return size;
5580
}
5581
#endif
5582
5583
#define REGEX_TRANSFER(to,from) do {\
5584
  onig_free_body(to);\
5585
  xmemcpy(to, from, sizeof(regex_t));\
5586
  xfree(from);\
5587
} while (0)
5588
5589
#if 0
5590
extern void
5591
onig_transfer(regex_t* to, regex_t* from)
5592
{
5593
  REGEX_TRANSFER(to, from);
5594
}
5595
#endif
5596
5597
#ifdef ONIG_DEBUG_COMPILE
5598
static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5599
#endif
5600
#ifdef ONIG_DEBUG_PARSE_TREE
5601
static void print_tree(FILE* f, Node* node);
5602
#endif
5603
5604
#ifdef RUBY
5605
extern int
5606
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5607
       OnigErrorInfo* einfo)
5608
{
5609
  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5610
}
5611
#endif
5612
5613
#ifdef RUBY
5614
extern int
5615
onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5616
        OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5617
#else
5618
extern int
5619
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5620
       OnigErrorInfo* einfo)
5621
#endif
5622
8.85k
{
5623
8.85k
#define COMPILE_INIT_SIZE  20
5624
5625
8.85k
  int r;
5626
8.85k
  OnigDistance init_size;
5627
8.85k
  Node*  root;
5628
8.85k
  ScanEnv  scan_env = {0};
5629
8.85k
#ifdef USE_SUBEXP_CALL
5630
8.85k
  UnsetAddrList  uslist;
5631
8.85k
#endif
5632
5633
8.85k
  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5634
5635
#ifdef RUBY
5636
  scan_env.sourcefile = sourcefile;
5637
  scan_env.sourceline = sourceline;
5638
#endif
5639
5640
#ifdef ONIG_DEBUG
5641
  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5642
#endif
5643
5644
8.85k
  if (reg->alloc == 0) {
5645
8.85k
    init_size = (pattern_end - pattern) * 2;
5646
8.85k
    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5647
8.85k
    r = BBUF_INIT(reg, init_size);
5648
8.85k
    if (r != 0) goto end;
5649
8.85k
  }
5650
0
  else
5651
0
    reg->used = 0;
5652
5653
8.85k
  reg->num_mem            = 0;
5654
8.85k
  reg->num_repeat         = 0;
5655
8.85k
  reg->num_null_check     = 0;
5656
8.85k
  reg->repeat_range_alloc = 0;
5657
8.85k
  reg->repeat_range       = (OnigRepeatRange* )NULL;
5658
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5659
  reg->num_comb_exp_check = 0;
5660
#endif
5661
5662
8.85k
  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5663
8.85k
  if (r != 0) goto err;
5664
5665
#ifdef ONIG_DEBUG_PARSE_TREE
5666
# if 0
5667
  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5668
  print_tree(stderr, root);
5669
# endif
5670
#endif
5671
5672
8.85k
#ifdef USE_NAMED_GROUP
5673
  /* mixed use named group and no-named group */
5674
8.85k
  if (scan_env.num_named > 0 &&
5675
8.85k
      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5676
8.85k
      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5677
385
    if (scan_env.num_named != scan_env.num_mem)
5678
0
      r = disable_noname_group_capture(&root, reg, &scan_env);
5679
385
    else
5680
385
      r = numbered_ref_check(root);
5681
5682
385
    if (r != 0) goto err;
5683
385
  }
5684
8.85k
#endif
5685
5686
8.85k
#ifdef USE_SUBEXP_CALL
5687
8.85k
  if (scan_env.num_call > 0) {
5688
0
    r = unset_addr_list_init(&uslist, scan_env.num_call);
5689
0
    if (r != 0) goto err;
5690
0
    scan_env.unset_addr_list = &uslist;
5691
0
    r = setup_subexp_call(root, &scan_env);
5692
0
    if (r != 0) goto err_unset;
5693
0
    r = subexp_recursive_check_trav(root, &scan_env);
5694
0
    if (r  < 0) goto err_unset;
5695
0
    r = subexp_inf_recursive_check_trav(root, &scan_env);
5696
0
    if (r != 0) goto err_unset;
5697
5698
0
    reg->num_call = scan_env.num_call;
5699
0
  }
5700
8.85k
  else
5701
8.85k
    reg->num_call = 0;
5702
8.85k
#endif
5703
5704
8.85k
  r = setup_tree(root, reg, 0, &scan_env);
5705
8.85k
  if (r != 0) goto err_unset;
5706
5707
#ifdef ONIG_DEBUG_PARSE_TREE
5708
  print_tree(stderr, root);
5709
#endif
5710
5711
8.85k
  reg->capture_history  = scan_env.capture_history;
5712
8.85k
  reg->bt_mem_start     = scan_env.bt_mem_start;
5713
8.85k
  reg->bt_mem_start    |= reg->capture_history;
5714
8.85k
  if (IS_FIND_CONDITION(reg->options))
5715
0
    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5716
8.85k
  else {
5717
8.85k
    reg->bt_mem_end  = scan_env.bt_mem_end;
5718
8.85k
    reg->bt_mem_end |= reg->capture_history;
5719
8.85k
  }
5720
5721
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5722
  if (scan_env.backrefed_mem == 0
5723
# ifdef USE_SUBEXP_CALL
5724
      || scan_env.num_call == 0
5725
# endif
5726
      ) {
5727
    setup_comb_exp_check(root, 0, &scan_env);
5728
# ifdef USE_SUBEXP_CALL
5729
    if (scan_env.has_recursion != 0) {
5730
      scan_env.num_comb_exp_check = 0;
5731
    }
5732
    else
5733
# endif
5734
    if (scan_env.comb_exp_max_regnum > 0) {
5735
      int i;
5736
      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5737
  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5738
    scan_env.num_comb_exp_check = 0;
5739
    break;
5740
  }
5741
      }
5742
    }
5743
  }
5744
5745
  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5746
#endif
5747
5748
8.85k
  clear_optimize_info(reg);
5749
8.85k
#ifndef ONIG_DONT_OPTIMIZE
5750
8.85k
  r = set_optimize_info_from_tree(root, reg, &scan_env);
5751
8.85k
  if (r != 0) goto err_unset;
5752
8.85k
#endif
5753
5754
8.85k
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5755
0
    xfree(scan_env.mem_nodes_dynamic);
5756
0
    scan_env.mem_nodes_dynamic = (Node** )NULL;
5757
0
  }
5758
5759
8.85k
  r = compile_tree(root, reg);
5760
8.85k
  if (r == 0) {
5761
8.85k
    r = add_opcode(reg, OP_END);
5762
8.85k
#ifdef USE_SUBEXP_CALL
5763
8.85k
    if (scan_env.num_call > 0) {
5764
0
      r = unset_addr_list_fix(&uslist, reg);
5765
0
      unset_addr_list_end(&uslist);
5766
0
      if (r) goto err;
5767
0
    }
5768
8.85k
#endif
5769
5770
8.85k
    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5771
0
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5772
8.85k
    else {
5773
8.85k
      if (reg->bt_mem_start != 0)
5774
0
  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5775
8.85k
      else
5776
8.85k
  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5777
8.85k
    }
5778
8.85k
  }
5779
0
#ifdef USE_SUBEXP_CALL
5780
0
  else if (scan_env.num_call > 0) {
5781
0
    unset_addr_list_end(&uslist);
5782
0
  }
5783
8.85k
#endif
5784
8.85k
  onig_node_free(root);
5785
5786
#ifdef ONIG_DEBUG_COMPILE
5787
# ifdef USE_NAMED_GROUP
5788
  onig_print_names(stderr, reg);
5789
# endif
5790
  print_compiled_byte_code_list(stderr, reg);
5791
#endif
5792
5793
8.85k
 end:
5794
8.85k
  return r;
5795
5796
0
 err_unset:
5797
0
#ifdef USE_SUBEXP_CALL
5798
0
  if (scan_env.num_call > 0) {
5799
0
    unset_addr_list_end(&uslist);
5800
0
  }
5801
0
#endif
5802
0
 err:
5803
0
  if (IS_NOT_NULL(scan_env.error)) {
5804
0
    if (IS_NOT_NULL(einfo)) {
5805
0
      einfo->enc     = scan_env.enc;
5806
0
      einfo->par     = scan_env.error;
5807
0
      einfo->par_end = scan_env.error_end;
5808
0
    }
5809
0
  }
5810
5811
0
  onig_node_free(root);
5812
0
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5813
0
      xfree(scan_env.mem_nodes_dynamic);
5814
0
  return r;
5815
0
}
5816
5817
static int onig_inited = 0;
5818
5819
extern int
5820
onig_reg_init(regex_t* reg, OnigOptionType option,
5821
        OnigCaseFoldType case_fold_flag,
5822
        OnigEncoding enc, const OnigSyntaxType* syntax)
5823
8.85k
{
5824
8.85k
  if (! onig_inited)
5825
1
    onig_init();
5826
5827
8.85k
  if (IS_NULL(reg))
5828
0
    return ONIGERR_INVALID_ARGUMENT;
5829
5830
8.85k
  (reg)->exact            = (UChar* )NULL;
5831
8.85k
  (reg)->chain            = (regex_t* )NULL;
5832
8.85k
  (reg)->p                = (UChar* )NULL;
5833
8.85k
  (reg)->name_table       = (void* )NULL;
5834
8.85k
  (reg)->repeat_range     = (OnigRepeatRange* )NULL;
5835
5836
8.85k
  if (ONIGENC_IS_UNDEF(enc))
5837
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5838
5839
8.85k
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5840
8.85k
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5841
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5842
0
  }
5843
5844
8.85k
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5845
0
    option |= syntax->options;
5846
0
    option &= ~ONIG_OPTION_SINGLELINE;
5847
0
  }
5848
8.85k
  else
5849
8.85k
    option |= syntax->options;
5850
5851
8.85k
  (reg)->enc              = enc;
5852
8.85k
  (reg)->options          = option;
5853
8.85k
  (reg)->syntax           = syntax;
5854
8.85k
  (reg)->optimize         = 0;
5855
5856
8.85k
  (reg)->alloc            = 0;
5857
8.85k
  (reg)->used             = 0;
5858
5859
8.85k
  (reg)->case_fold_flag   = case_fold_flag;
5860
8.85k
  return 0;
5861
8.85k
}
5862
5863
extern int
5864
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5865
          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5866
          const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5867
0
{
5868
0
  int r;
5869
5870
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5871
0
  if (r) return r;
5872
5873
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
5874
0
  return r;
5875
0
}
5876
5877
extern int
5878
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5879
    OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5880
    OnigErrorInfo* einfo)
5881
8.85k
{
5882
8.85k
  int r;
5883
5884
8.85k
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5885
8.85k
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5886
5887
8.85k
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5888
8.85k
  if (r) goto err;
5889
5890
8.85k
  r = onig_compile(*reg, pattern, pattern_end, einfo);
5891
8.85k
  if (r) {
5892
0
  err:
5893
0
    onig_free(*reg);
5894
0
    *reg = NULL;
5895
0
  }
5896
8.85k
  return r;
5897
8.85k
}
5898
5899
extern int
5900
onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5901
0
{
5902
0
  return onig_init();
5903
0
}
5904
5905
extern int
5906
onig_init(void)
5907
386
{
5908
386
  if (onig_inited != 0)
5909
385
    return 0;
5910
5911
1
  onig_inited = 1;
5912
5913
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5914
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
5915
#endif
5916
5917
1
  onigenc_init();
5918
  /* onigenc_set_default_caseconv_table((UChar* )0); */
5919
5920
#ifdef ONIG_DEBUG_STATISTICS
5921
  onig_statistics_init();
5922
#endif
5923
5924
1
  return 0;
5925
386
}
5926
5927
5928
static OnigEndCallListItemType* EndCallTop;
5929
5930
extern void onig_add_end_call(void (*func)(void))
5931
0
{
5932
0
  OnigEndCallListItemType* item;
5933
5934
0
  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5935
0
  if (item == 0) return ;
5936
5937
0
  item->next = EndCallTop;
5938
0
  item->func = func;
5939
5940
0
  EndCallTop = item;
5941
0
}
5942
5943
static void
5944
exec_end_call_list(void)
5945
0
{
5946
0
  OnigEndCallListItemType* prev;
5947
0
  void (*func)(void);
5948
5949
0
  while (EndCallTop != 0) {
5950
0
    func = EndCallTop->func;
5951
0
    (*func)();
5952
5953
0
    prev = EndCallTop;
5954
0
    EndCallTop = EndCallTop->next;
5955
0
    xfree(prev);
5956
0
  }
5957
0
}
5958
5959
extern int
5960
onig_end(void)
5961
0
{
5962
0
  exec_end_call_list();
5963
5964
#ifdef ONIG_DEBUG_STATISTICS
5965
  onig_print_statistics(stderr);
5966
#endif
5967
5968
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5969
  _CrtDumpMemoryLeaks();
5970
#endif
5971
5972
0
  onig_inited = 0;
5973
5974
0
  return 0;
5975
0
}
5976
5977
extern int
5978
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5979
0
{
5980
0
  OnigCodePoint n, *data;
5981
0
  OnigCodePoint low, high, x;
5982
5983
0
  GET_CODE_POINT(n, p);
5984
0
  data = (OnigCodePoint* )p;
5985
0
  data++;
5986
5987
0
  for (low = 0, high = n; low < high; ) {
5988
0
    x = (low + high) >> 1;
5989
0
    if (code > data[x * 2 + 1])
5990
0
      low = x + 1;
5991
0
    else
5992
0
      high = x;
5993
0
  }
5994
5995
0
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5996
0
}
5997
5998
extern int
5999
onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6000
5.00k
{
6001
5.00k
  int found;
6002
6003
5.00k
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6004
0
    if (IS_NULL(cc->mbuf)) {
6005
0
      found = 0;
6006
0
    }
6007
0
    else {
6008
0
      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6009
0
    }
6010
0
  }
6011
5.00k
  else {
6012
5.00k
    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6013
5.00k
  }
6014
6015
5.00k
  if (IS_NCCLASS_NOT(cc))
6016
1.92k
    return !found;
6017
3.08k
  else
6018
3.08k
    return found;
6019
5.00k
}
6020
6021
extern int
6022
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6023
5.00k
{
6024
5.00k
  int len;
6025
6026
5.00k
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6027
0
    len = 2;
6028
0
  }
6029
5.00k
  else {
6030
5.00k
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6031
5.00k
  }
6032
5.00k
  return onig_is_code_in_cc_len(len, code, cc);
6033
5.00k
}
6034
6035
6036
#ifdef ONIG_DEBUG
6037
6038
/* arguments type */
6039
# define ARG_SPECIAL     -1
6040
# define ARG_NON          0
6041
# define ARG_RELADDR      1
6042
# define ARG_ABSADDR      2
6043
# define ARG_LENGTH       3
6044
# define ARG_MEMNUM       4
6045
# define ARG_OPTION       5
6046
# define ARG_STATE_CHECK  6
6047
6048
OnigOpInfoType OnigOpInfo[] = {
6049
  { OP_FINISH,            "finish",          ARG_NON },
6050
  { OP_END,               "end",             ARG_NON },
6051
  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
6052
  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
6053
  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
6054
  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
6055
  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
6056
  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
6057
  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
6058
  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
6059
  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
6060
  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
6061
  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
6062
  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
6063
  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
6064
  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
6065
  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
6066
  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
6067
  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
6068
  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
6069
  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
6070
  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
6071
  { OP_ANYCHAR,           "anychar",         ARG_NON },
6072
  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
6073
  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
6074
  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
6075
  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6076
  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6077
  { OP_WORD,                "word",            ARG_NON },
6078
  { OP_NOT_WORD,            "not-word",        ARG_NON },
6079
  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
6080
  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
6081
  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
6082
  { OP_WORD_END,            "word-end",        ARG_NON },
6083
  { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
6084
  { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
6085
  { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
6086
  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6087
  { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
6088
  { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
6089
  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
6090
  { OP_END_BUF,             "end-buf",         ARG_NON },
6091
  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
6092
  { OP_END_LINE,            "end-line",        ARG_NON },
6093
  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
6094
  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
6095
  { OP_BACKREF1,            "backref1",             ARG_NON },
6096
  { OP_BACKREF2,            "backref2",             ARG_NON },
6097
  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
6098
  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
6099
  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
6100
  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
6101
  { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
6102
  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
6103
  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
6104
  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
6105
  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
6106
  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
6107
  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
6108
  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
6109
  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
6110
  { OP_KEEP,                "keep",                 ARG_NON },
6111
  { OP_FAIL,                "fail",                 ARG_NON },
6112
  { OP_JUMP,                "jump",                 ARG_RELADDR },
6113
  { OP_PUSH,                "push",                 ARG_RELADDR },
6114
  { OP_POP,                 "pop",                  ARG_NON },
6115
  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
6116
  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
6117
  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
6118
  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
6119
  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
6120
  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
6121
  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
6122
  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
6123
  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
6124
  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
6125
  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
6126
  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
6127
  { OP_PUSH_POS,             "push-pos",             ARG_NON },
6128
  { OP_POP_POS,              "pop-pos",              ARG_NON },
6129
  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
6130
  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
6131
  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
6132
  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
6133
  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
6134
  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6135
  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6136
  { OP_PUSH_ABSENT_POS,      "push-absent-pos",      ARG_NON },
6137
  { OP_ABSENT,               "absent",               ARG_RELADDR },
6138
  { OP_ABSENT_END,           "absent-end",           ARG_NON },
6139
  { OP_CALL,                 "call",                 ARG_ABSADDR },
6140
  { OP_RETURN,               "return",               ARG_NON },
6141
  { OP_CONDITION,            "condition",            ARG_SPECIAL },
6142
  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
6143
  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6144
  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
6145
  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
6146
  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6147
    "state-check-anychar-ml*", ARG_STATE_CHECK },
6148
  { -1, "", ARG_NON }
6149
};
6150
6151
static const char*
6152
op2name(int opcode)
6153
{
6154
  int i;
6155
6156
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6157
    if (opcode == OnigOpInfo[i].opcode)
6158
      return OnigOpInfo[i].name;
6159
  }
6160
  return "";
6161
}
6162
6163
static int
6164
op2arg_type(int opcode)
6165
{
6166
  int i;
6167
6168
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6169
    if (opcode == OnigOpInfo[i].opcode)
6170
      return OnigOpInfo[i].arg_type;
6171
  }
6172
  return ARG_SPECIAL;
6173
}
6174
6175
# ifdef ONIG_DEBUG_PARSE_TREE
6176
static void
6177
Indent(FILE* f, int indent)
6178
{
6179
  int i;
6180
  for (i = 0; i < indent; i++) putc(' ', f);
6181
}
6182
# endif /* ONIG_DEBUG_PARSE_TREE */
6183
6184
static void
6185
p_string(FILE* f, ptrdiff_t len, UChar* s)
6186
{
6187
  fputs(":", f);
6188
  while (len-- > 0) { fputc(*s++, f); }
6189
}
6190
6191
static void
6192
p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6193
{
6194
  int x = len * mb_len;
6195
6196
  fprintf(f, ":%d:", len);
6197
  while (x-- > 0) { fputc(*s++, f); }
6198
}
6199
6200
extern void
6201
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6202
                              OnigEncoding enc)
6203
{
6204
  int i, n, arg_type;
6205
  RelAddrType addr;
6206
  LengthType len;
6207
  MemNumType mem;
6208
  StateCheckNumType scn;
6209
  OnigCodePoint code;
6210
  UChar *q;
6211
6212
  fprintf(f, "[%s", op2name(*bp));
6213
  arg_type = op2arg_type(*bp);
6214
  if (arg_type != ARG_SPECIAL) {
6215
    bp++;
6216
    switch (arg_type) {
6217
    case ARG_NON:
6218
      break;
6219
    case ARG_RELADDR:
6220
      GET_RELADDR_INC(addr, bp);
6221
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6222
      break;
6223
    case ARG_ABSADDR:
6224
      GET_ABSADDR_INC(addr, bp);
6225
      fprintf(f, ":(%d)", addr);
6226
      break;
6227
    case ARG_LENGTH:
6228
      GET_LENGTH_INC(len, bp);
6229
      fprintf(f, ":%d", len);
6230
      break;
6231
    case ARG_MEMNUM:
6232
      mem = *((MemNumType* )bp);
6233
      bp += SIZE_MEMNUM;
6234
      fprintf(f, ":%d", mem);
6235
      break;
6236
    case ARG_OPTION:
6237
      {
6238
  OnigOptionType option = *((OnigOptionType* )bp);
6239
  bp += SIZE_OPTION;
6240
  fprintf(f, ":%d", option);
6241
      }
6242
      break;
6243
6244
    case ARG_STATE_CHECK:
6245
      scn = *((StateCheckNumType* )bp);
6246
      bp += SIZE_STATE_CHECK_NUM;
6247
      fprintf(f, ":%d", scn);
6248
      break;
6249
    }
6250
  }
6251
  else {
6252
    switch (*bp++) {
6253
    case OP_EXACT1:
6254
    case OP_ANYCHAR_STAR_PEEK_NEXT:
6255
    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6256
      p_string(f, 1, bp++); break;
6257
    case OP_EXACT2:
6258
      p_string(f, 2, bp); bp += 2; break;
6259
    case OP_EXACT3:
6260
      p_string(f, 3, bp); bp += 3; break;
6261
    case OP_EXACT4:
6262
      p_string(f, 4, bp); bp += 4; break;
6263
    case OP_EXACT5:
6264
      p_string(f, 5, bp); bp += 5; break;
6265
    case OP_EXACTN:
6266
      GET_LENGTH_INC(len, bp);
6267
      p_len_string(f, len, 1, bp);
6268
      bp += len;
6269
      break;
6270
6271
    case OP_EXACTMB2N1:
6272
      p_string(f, 2, bp); bp += 2; break;
6273
    case OP_EXACTMB2N2:
6274
      p_string(f, 4, bp); bp += 4; break;
6275
    case OP_EXACTMB2N3:
6276
      p_string(f, 6, bp); bp += 6; break;
6277
    case OP_EXACTMB2N:
6278
      GET_LENGTH_INC(len, bp);
6279
      p_len_string(f, len, 2, bp);
6280
      bp += len * 2;
6281
      break;
6282
    case OP_EXACTMB3N:
6283
      GET_LENGTH_INC(len, bp);
6284
      p_len_string(f, len, 3, bp);
6285
      bp += len * 3;
6286
      break;
6287
    case OP_EXACTMBN:
6288
      {
6289
  int mb_len;
6290
6291
  GET_LENGTH_INC(mb_len, bp);
6292
  GET_LENGTH_INC(len, bp);
6293
  fprintf(f, ":%d:%d:", mb_len, len);
6294
  n = len * mb_len;
6295
  while (n-- > 0) { fputc(*bp++, f); }
6296
      }
6297
      break;
6298
6299
    case OP_EXACT1_IC:
6300
      len = enclen(enc, bp, bpend);
6301
      p_string(f, len, bp);
6302
      bp += len;
6303
      break;
6304
    case OP_EXACTN_IC:
6305
      GET_LENGTH_INC(len, bp);
6306
      p_len_string(f, len, 1, bp);
6307
      bp += len;
6308
      break;
6309
6310
    case OP_CCLASS:
6311
      n = bitset_on_num((BitSetRef )bp);
6312
      bp += SIZE_BITSET;
6313
      fprintf(f, ":%d", n);
6314
      break;
6315
6316
    case OP_CCLASS_NOT:
6317
      n = bitset_on_num((BitSetRef )bp);
6318
      bp += SIZE_BITSET;
6319
      fprintf(f, ":%d", n);
6320
      break;
6321
6322
    case OP_CCLASS_MB:
6323
    case OP_CCLASS_MB_NOT:
6324
      GET_LENGTH_INC(len, bp);
6325
      q = bp;
6326
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6327
      ALIGNMENT_RIGHT(q);
6328
# endif
6329
      GET_CODE_POINT(code, q);
6330
      bp += len;
6331
      fprintf(f, ":%d:%d", (int )code, len);
6332
      break;
6333
6334
    case OP_CCLASS_MIX:
6335
    case OP_CCLASS_MIX_NOT:
6336
      n = bitset_on_num((BitSetRef )bp);
6337
      bp += SIZE_BITSET;
6338
      GET_LENGTH_INC(len, bp);
6339
      q = bp;
6340
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6341
      ALIGNMENT_RIGHT(q);
6342
# endif
6343
      GET_CODE_POINT(code, q);
6344
      bp += len;
6345
      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6346
      break;
6347
6348
    case OP_BACKREFN_IC:
6349
      mem = *((MemNumType* )bp);
6350
      bp += SIZE_MEMNUM;
6351
      fprintf(f, ":%d", mem);
6352
      break;
6353
6354
    case OP_BACKREF_MULTI_IC:
6355
    case OP_BACKREF_MULTI:
6356
      fputs(" ", f);
6357
      GET_LENGTH_INC(len, bp);
6358
      for (i = 0; i < len; i++) {
6359
  GET_MEMNUM_INC(mem, bp);
6360
  if (i > 0) fputs(", ", f);
6361
  fprintf(f, "%d", mem);
6362
      }
6363
      break;
6364
6365
    case OP_BACKREF_WITH_LEVEL:
6366
      {
6367
  OnigOptionType option;
6368
  LengthType level;
6369
6370
  GET_OPTION_INC(option, bp);
6371
  fprintf(f, ":%d", option);
6372
  GET_LENGTH_INC(level, bp);
6373
  fprintf(f, ":%d", level);
6374
6375
  fputs(" ", f);
6376
  GET_LENGTH_INC(len, bp);
6377
  for (i = 0; i < len; i++) {
6378
    GET_MEMNUM_INC(mem, bp);
6379
    if (i > 0) fputs(", ", f);
6380
    fprintf(f, "%d", mem);
6381
  }
6382
      }
6383
      break;
6384
6385
    case OP_REPEAT:
6386
    case OP_REPEAT_NG:
6387
      {
6388
  mem = *((MemNumType* )bp);
6389
  bp += SIZE_MEMNUM;
6390
  addr = *((RelAddrType* )bp);
6391
  bp += SIZE_RELADDR;
6392
  fprintf(f, ":%d:%d", mem, addr);
6393
      }
6394
      break;
6395
6396
    case OP_PUSH_OR_JUMP_EXACT1:
6397
    case OP_PUSH_IF_PEEK_NEXT:
6398
      addr = *((RelAddrType* )bp);
6399
      bp += SIZE_RELADDR;
6400
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6401
      p_string(f, 1, bp);
6402
      bp += 1;
6403
      break;
6404
6405
    case OP_LOOK_BEHIND:
6406
      GET_LENGTH_INC(len, bp);
6407
      fprintf(f, ":%d", len);
6408
      break;
6409
6410
    case OP_PUSH_LOOK_BEHIND_NOT:
6411
      GET_RELADDR_INC(addr, bp);
6412
      GET_LENGTH_INC(len, bp);
6413
      fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6414
      break;
6415
6416
    case OP_STATE_CHECK_PUSH:
6417
    case OP_STATE_CHECK_PUSH_OR_JUMP:
6418
      scn = *((StateCheckNumType* )bp);
6419
      bp += SIZE_STATE_CHECK_NUM;
6420
      addr = *((RelAddrType* )bp);
6421
      bp += SIZE_RELADDR;
6422
      fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6423
      break;
6424
6425
    case OP_CONDITION:
6426
      GET_MEMNUM_INC(mem, bp);
6427
      GET_RELADDR_INC(addr, bp);
6428
      fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6429
      break;
6430
6431
    default:
6432
      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6433
        bp[-1]);
6434
    }
6435
  }
6436
  fputs("]", f);
6437
  if (nextp) *nextp = bp;
6438
}
6439
6440
# ifdef ONIG_DEBUG_COMPILE
6441
static void
6442
print_compiled_byte_code_list(FILE* f, regex_t* reg)
6443
{
6444
  int ncode;
6445
  UChar* bp = reg->p;
6446
  UChar* end = reg->p + reg->used;
6447
6448
  fprintf(f, "code length: %d", reg->used);
6449
6450
  ncode = -1;
6451
  while (bp < end) {
6452
    ncode++;
6453
    if (ncode % 5 == 0)
6454
      fprintf(f, "\n%ld:", bp - reg->p);
6455
    else
6456
      fprintf(f, " %ld:", bp - reg->p);
6457
    onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6458
  }
6459
6460
  fprintf(f, "\n");
6461
}
6462
# endif /* ONIG_DEBUG_COMPILE */
6463
6464
# ifdef ONIG_DEBUG_PARSE_TREE
6465
static void
6466
print_indent_tree(FILE* f, Node* node, int indent)
6467
{
6468
  int i, type, container_p = 0;
6469
  int add = 3;
6470
  UChar* p;
6471
6472
  Indent(f, indent);
6473
  if (IS_NULL(node)) {
6474
    fprintf(f, "ERROR: null node!!!\n");
6475
    exit (0);
6476
  }
6477
6478
  type = NTYPE(node);
6479
  switch (type) {
6480
  case NT_LIST:
6481
  case NT_ALT:
6482
    if (NTYPE(node) == NT_LIST)
6483
      fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6484
    else
6485
      fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6486
6487
    print_indent_tree(f, NCAR(node), indent + add);
6488
    while (IS_NOT_NULL(node = NCDR(node))) {
6489
      if (NTYPE(node) != type) {
6490
  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6491
  exit(0);
6492
      }
6493
      print_indent_tree(f, NCAR(node), indent + add);
6494
    }
6495
    break;
6496
6497
  case NT_STR:
6498
    fprintf(f, "<string%s:%"PRIxPTR">",
6499
      (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6500
    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6501
      if (*p >= 0x20 && *p < 0x7f)
6502
  fputc(*p, f);
6503
      else {
6504
  fprintf(f, " 0x%02x", *p);
6505
      }
6506
    }
6507
    break;
6508
6509
  case NT_CCLASS:
6510
    fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6511
    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6512
    if (NCCLASS(node)->mbuf) {
6513
      BBuf* bbuf = NCCLASS(node)->mbuf;
6514
      OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6515
      OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6516
      fprintf(f, "%d", *data++);
6517
      for (; data < end; data+=2) {
6518
  fprintf(f, ",");
6519
  fprintf(f, "%04x-%04x", data[0], data[1]);
6520
      }
6521
    }
6522
    break;
6523
6524
  case NT_CTYPE:
6525
    fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6526
    switch (NCTYPE(node)->ctype) {
6527
    case ONIGENC_CTYPE_WORD:
6528
      if (NCTYPE(node)->not != 0)
6529
  fputs("not word",       f);
6530
      else
6531
  fputs("word",           f);
6532
      break;
6533
6534
    default:
6535
      fprintf(f, "ERROR: undefined ctype.\n");
6536
      exit(0);
6537
    }
6538
    break;
6539
6540
  case NT_CANY:
6541
    fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6542
    break;
6543
6544
  case NT_ANCHOR:
6545
    fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6546
    switch (NANCHOR(node)->type) {
6547
    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6548
    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6549
    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6550
    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6551
    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6552
    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6553
6554
    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6555
    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6556
#  ifdef USE_WORD_BEGIN_END
6557
    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6558
    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6559
#  endif
6560
    case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
6561
    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
6562
    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
6563
    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6564
    case ANCHOR_KEEP:            fputs("keep",f);            break;
6565
6566
    default:
6567
      fprintf(f, "ERROR: undefined anchor type.\n");
6568
      break;
6569
    }
6570
    break;
6571
6572
  case NT_BREF:
6573
    {
6574
      int* p;
6575
      BRefNode* br = NBREF(node);
6576
      p = BACKREFS_P(br);
6577
      fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6578
      for (i = 0; i < br->back_num; i++) {
6579
  if (i > 0) fputs(", ", f);
6580
  fprintf(f, "%d", p[i]);
6581
      }
6582
    }
6583
    break;
6584
6585
#  ifdef USE_SUBEXP_CALL
6586
  case NT_CALL:
6587
    {
6588
      CallNode* cn = NCALL(node);
6589
      fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6590
      p_string(f, cn->name_end - cn->name, cn->name);
6591
    }
6592
    break;
6593
#  endif
6594
6595
  case NT_QTFR:
6596
    fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6597
      NQTFR(node)->lower, NQTFR(node)->upper,
6598
      (NQTFR(node)->greedy ? "" : "?"));
6599
    print_indent_tree(f, NQTFR(node)->target, indent + add);
6600
    break;
6601
6602
  case NT_ENCLOSE:
6603
    fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6604
    switch (NENCLOSE(node)->type) {
6605
    case ENCLOSE_OPTION:
6606
      fprintf(f, "option:%d", NENCLOSE(node)->option);
6607
      break;
6608
    case ENCLOSE_MEMORY:
6609
      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6610
      break;
6611
    case ENCLOSE_STOP_BACKTRACK:
6612
      fprintf(f, "stop-bt");
6613
      break;
6614
    case ENCLOSE_CONDITION:
6615
      fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6616
      break;
6617
    case ENCLOSE_ABSENT:
6618
      fprintf(f, "absent");
6619
      break;
6620
6621
    default:
6622
      break;
6623
    }
6624
    fprintf(f, "\n");
6625
    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6626
    break;
6627
6628
  default:
6629
    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6630
    break;
6631
  }
6632
6633
  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6634
      type != NT_ENCLOSE)
6635
    fprintf(f, "\n");
6636
6637
  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6638
6639
  fflush(f);
6640
}
6641
6642
static void
6643
print_tree(FILE* f, Node* node)
6644
{
6645
  print_indent_tree(f, node, 0);
6646
}
6647
# endif /* ONIG_DEBUG_PARSE_TREE */
6648
#endif /* ONIG_DEBUG */