Coverage Report

Created: 2025-01-28 06:34

/src/fluent-bit/lib/onigmo/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regparse.h"
32
33
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35
extern OnigCaseFoldType
36
onig_get_default_case_fold_flag(void)
37
0
{
38
0
  return OnigDefaultCaseFoldFlag;
39
0
}
40
41
extern int
42
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43
0
{
44
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
45
0
  return 0;
46
0
}
47
48
49
#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51
#endif
52
53
#if 0
54
static UChar*
55
str_dup(UChar* s, UChar* end)
56
{
57
  ptrdiff_t len = end - s;
58
59
  if (len > 0) {
60
    UChar* r = (UChar* )xmalloc(len + 1);
61
    CHECK_NULL_RETURN(r);
62
    xmemcpy(r, s, len);
63
    r[len] = (UChar )0;
64
    return r;
65
  }
66
  else return NULL;
67
}
68
#endif
69
70
static void
71
swap_node(Node* a, Node* b)
72
6.63k
{
73
6.63k
  Node c;
74
6.63k
  c = *a; *a = *b; *b = c;
75
76
6.63k
  if (NTYPE(a) == NT_STR) {
77
0
    StrNode* sn = NSTR(a);
78
0
    if (sn->capa == 0) {
79
0
      size_t len = sn->end - sn->s;
80
0
      sn->s   = sn->buf;
81
0
      sn->end = sn->s + len;
82
0
    }
83
0
  }
84
85
6.63k
  if (NTYPE(b) == NT_STR) {
86
0
    StrNode* sn = NSTR(b);
87
0
    if (sn->capa == 0) {
88
0
      size_t len = sn->end - sn->s;
89
0
      sn->s   = sn->buf;
90
0
      sn->end = sn->s + len;
91
0
    }
92
0
  }
93
6.63k
}
94
95
static OnigDistance
96
distance_add(OnigDistance d1, OnigDistance d2)
97
205k
{
98
205k
  if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99
64.4k
    return ONIG_INFINITE_DISTANCE;
100
141k
  else {
101
141k
    if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102
0
    else return ONIG_INFINITE_DISTANCE;
103
141k
  }
104
205k
}
105
106
static OnigDistance
107
distance_multiply(OnigDistance d, int m)
108
17.0k
{
109
17.0k
  if (m == 0) return 0;
110
111
10.7k
  if (d < ONIG_INFINITE_DISTANCE / m)
112
10.7k
    return d * m;
113
0
  else
114
0
    return ONIG_INFINITE_DISTANCE;
115
10.7k
}
116
117
static int
118
bitset_is_empty(BitSetRef bs)
119
0
{
120
0
  int i;
121
0
  for (i = 0; i < BITSET_SIZE; i++) {
122
0
    if (bs[i] != 0) return 0;
123
0
  }
124
0
  return 1;
125
0
}
126
127
#ifdef ONIG_DEBUG
128
static int
129
bitset_on_num(BitSetRef bs)
130
{
131
  int i, n;
132
133
  n = 0;
134
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135
    if (BITSET_AT(bs, i)) n++;
136
  }
137
  return n;
138
}
139
#endif
140
141
extern int
142
onig_bbuf_init(BBuf* buf, OnigDistance size)
143
18.4k
{
144
18.4k
  if (size <= 0) {
145
0
    size   = 0;
146
0
    buf->p = NULL;
147
0
  }
148
18.4k
  else {
149
18.4k
    buf->p = (UChar* )xmalloc(size);
150
18.4k
    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151
18.4k
  }
152
153
18.4k
  buf->alloc = (unsigned int )size;
154
18.4k
  buf->used  = 0;
155
18.4k
  return 0;
156
18.4k
}
157
158
159
#ifdef USE_SUBEXP_CALL
160
161
static int
162
unset_addr_list_init(UnsetAddrList* uslist, int size)
163
0
{
164
0
  UnsetAddr* p;
165
166
0
  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167
0
  CHECK_NULL_RETURN_MEMERR(p);
168
0
  uslist->num   = 0;
169
0
  uslist->alloc = size;
170
0
  uslist->us    = p;
171
0
  return 0;
172
0
}
173
174
static void
175
unset_addr_list_end(UnsetAddrList* uslist)
176
0
{
177
0
  if (IS_NOT_NULL(uslist->us))
178
0
    xfree(uslist->us);
179
0
}
180
181
static int
182
unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183
0
{
184
0
  UnsetAddr* p;
185
0
  int size;
186
187
0
  if (uslist->num >= uslist->alloc) {
188
0
    size = uslist->alloc * 2;
189
0
    p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190
0
    CHECK_NULL_RETURN_MEMERR(p);
191
0
    uslist->alloc = size;
192
0
    uslist->us    = p;
193
0
  }
194
195
0
  uslist->us[uslist->num].offset = offset;
196
0
  uslist->us[uslist->num].target = node;
197
0
  uslist->num++;
198
0
  return 0;
199
0
}
200
#endif /* USE_SUBEXP_CALL */
201
202
203
static int
204
add_opcode(regex_t* reg, int opcode)
205
121k
{
206
121k
  BBUF_ADD1(reg, opcode);
207
121k
  return 0;
208
121k
}
209
210
#ifdef USE_COMBINATION_EXPLOSION_CHECK
211
static int
212
add_state_check_num(regex_t* reg, int num)
213
{
214
  StateCheckNumType n = (StateCheckNumType )num;
215
216
  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217
  return 0;
218
}
219
#endif
220
221
static int
222
add_rel_addr(regex_t* reg, int addr)
223
33.8k
{
224
33.8k
  RelAddrType ra = (RelAddrType )addr;
225
226
33.8k
  BBUF_ADD(reg, &ra, SIZE_RELADDR);
227
33.8k
  return 0;
228
33.8k
}
229
230
static int
231
add_abs_addr(regex_t* reg, int addr)
232
0
{
233
0
  AbsAddrType ra = (AbsAddrType )addr;
234
235
0
  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236
0
  return 0;
237
0
}
238
239
static int
240
add_length(regex_t* reg, OnigDistance len)
241
9.51k
{
242
9.51k
  LengthType l = (LengthType )len;
243
244
9.51k
  BBUF_ADD(reg, &l, SIZE_LENGTH);
245
9.51k
  return 0;
246
9.51k
}
247
248
static int
249
add_mem_num(regex_t* reg, int num)
250
5.48k
{
251
5.48k
  MemNumType n = (MemNumType )num;
252
253
5.48k
  BBUF_ADD(reg, &n, SIZE_MEMNUM);
254
5.48k
  return 0;
255
5.48k
}
256
257
#if 0
258
static int
259
add_pointer(regex_t* reg, void* addr)
260
{
261
  PointerType ptr = (PointerType )addr;
262
263
  BBUF_ADD(reg, &ptr, SIZE_POINTER);
264
  return 0;
265
}
266
#endif
267
268
static int
269
add_option(regex_t* reg, OnigOptionType option)
270
0
{
271
0
  BBUF_ADD(reg, &option, SIZE_OPTION);
272
0
  return 0;
273
0
}
274
275
static int
276
add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277
33.8k
{
278
33.8k
  int r;
279
280
33.8k
  r = add_opcode(reg, opcode);
281
33.8k
  if (r) return r;
282
33.8k
  r = add_rel_addr(reg, addr);
283
33.8k
  return r;
284
33.8k
}
285
286
static int
287
add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288
23.0k
{
289
23.0k
  BBUF_ADD(reg, bytes, len);
290
23.0k
  return 0;
291
23.0k
}
292
293
static int
294
add_bitset(regex_t* reg, BitSetRef bs)
295
20.5k
{
296
20.5k
  BBUF_ADD(reg, bs, SIZE_BITSET);
297
20.5k
  return 0;
298
20.5k
}
299
300
static int
301
add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302
0
{
303
0
  int r;
304
0
305
0
  r = add_opcode(reg, opcode);
306
0
  if (r) return r;
307
0
  r = add_option(reg, option);
308
0
  return r;
309
0
}
310
311
static int compile_length_tree(Node* node, regex_t* reg);
312
static int compile_tree(Node* node, regex_t* reg);
313
314
315
#define IS_NEED_STR_LEN_OP_EXACT(op) \
316
39.5k
   ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
317
39.5k
    (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
318
319
static int
320
select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321
39.5k
{
322
39.5k
  int op;
323
39.5k
  OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325
39.5k
  if (ignore_case) {
326
0
    switch (str_len) {
327
0
    case 1:  op = OP_EXACT1_IC; break;
328
0
    default: op = OP_EXACTN_IC; break;
329
0
    }
330
0
  }
331
39.5k
  else {
332
39.5k
    switch (mb_len) {
333
39.5k
    case 1:
334
39.5k
      switch (str_len) {
335
12.5k
      case 1:  op = OP_EXACT1; break;
336
1.01k
      case 2:  op = OP_EXACT2; break;
337
1.54k
      case 3:  op = OP_EXACT3; break;
338
2.09k
      case 4:  op = OP_EXACT4; break;
339
3.19k
      case 5:  op = OP_EXACT5; break;
340
19.1k
      default: op = OP_EXACTN; break;
341
39.5k
      }
342
39.5k
      break;
343
344
39.5k
    case 2:
345
0
      switch (str_len) {
346
0
      case 1:  op = OP_EXACTMB2N1; break;
347
0
      case 2:  op = OP_EXACTMB2N2; break;
348
0
      case 3:  op = OP_EXACTMB2N3; break;
349
0
      default: op = OP_EXACTMB2N;  break;
350
0
      }
351
0
      break;
352
353
0
    case 3:
354
0
      op = OP_EXACTMB3N;
355
0
      break;
356
357
0
    default:
358
0
      op = OP_EXACTMBN;
359
0
      break;
360
39.5k
    }
361
39.5k
  }
362
39.5k
  return op;
363
39.5k
}
364
365
static int
366
compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367
5.73k
{
368
5.73k
  int r;
369
5.73k
  int saved_num_null_check = reg->num_null_check;
370
371
5.73k
  if (empty_info != 0) {
372
0
    r = add_opcode(reg, OP_NULL_CHECK_START);
373
0
    if (r) return r;
374
0
    r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375
0
    if (r) return r;
376
0
    reg->num_null_check++;
377
0
  }
378
379
5.73k
  r = compile_tree(node, reg);
380
5.73k
  if (r) return r;
381
382
5.73k
  if (empty_info != 0) {
383
0
    if (empty_info == NQ_TARGET_IS_EMPTY)
384
0
      r = add_opcode(reg, OP_NULL_CHECK_END);
385
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387
0
    else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388
0
      r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390
0
    if (r) return r;
391
0
    r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392
0
  }
393
5.73k
  return r;
394
5.73k
}
395
396
#ifdef USE_SUBEXP_CALL
397
static int
398
compile_call(CallNode* node, regex_t* reg)
399
0
{
400
0
  int r;
401
402
0
  r = add_opcode(reg, OP_CALL);
403
0
  if (r) return r;
404
0
  r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405
0
                          node->target);
406
0
  if (r) return r;
407
0
  r = add_abs_addr(reg, 0 /*dummy addr.*/);
408
0
  return r;
409
0
}
410
#endif
411
412
static int
413
compile_tree_n_times(Node* node, int n, regex_t* reg)
414
15.4k
{
415
15.4k
  int i, r;
416
417
24.6k
  for (i = 0; i < n; i++) {
418
9.19k
    r = compile_tree(node, reg);
419
9.19k
    if (r) return r;
420
9.19k
  }
421
15.4k
  return 0;
422
15.4k
}
423
424
static int
425
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426
                          regex_t* reg ARG_UNUSED, int ignore_case)
427
17.5k
{
428
17.5k
  int len;
429
17.5k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431
17.5k
  len = SIZE_OPCODE;
432
433
17.5k
  if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
434
17.5k
  if (IS_NEED_STR_LEN_OP_EXACT(op))
435
9.64k
    len += SIZE_LENGTH;
436
437
17.5k
  len += (int )byte_len;
438
17.5k
  return len;
439
17.5k
}
440
441
static int
442
add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443
                   regex_t* reg, int ignore_case)
444
22.0k
{
445
22.0k
  int op = select_str_opcode(mb_len, byte_len, ignore_case);
446
22.0k
  add_opcode(reg, op);
447
448
22.0k
  if (op == OP_EXACTMBN)
449
0
    add_length(reg, mb_len);
450
451
22.0k
  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452
9.51k
    if (op == OP_EXACTN_IC)
453
0
      add_length(reg, byte_len);
454
9.51k
    else
455
9.51k
      add_length(reg, byte_len / mb_len);
456
9.51k
  }
457
458
22.0k
  add_bytes(reg, s, byte_len);
459
22.0k
  return 0;
460
22.0k
}
461
462
463
static int
464
compile_length_string_node(Node* node, regex_t* reg)
465
17.5k
{
466
17.5k
  int rlen, r, len, prev_len, blen, ambig;
467
17.5k
  OnigEncoding enc = reg->enc;
468
17.5k
  UChar *p, *prev;
469
17.5k
  StrNode* sn;
470
471
17.5k
  sn = NSTR(node);
472
17.5k
  if (sn->end <= sn->s)
473
0
    return 0;
474
475
17.5k
  ambig = NSTRING_IS_AMBIG(node);
476
477
17.5k
  p = prev = sn->s;
478
17.5k
  prev_len = enclen(enc, p, sn->end);
479
17.5k
  p += prev_len;
480
17.5k
  blen = prev_len;
481
17.5k
  rlen = 0;
482
483
126k
  for (; p < sn->end; ) {
484
109k
    len = enclen(enc, p, sn->end);
485
109k
    if (len == prev_len || ambig) {
486
109k
      blen += len;
487
109k
    }
488
0
    else {
489
0
      r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490
0
      rlen += r;
491
0
      prev = p;
492
0
      blen = len;
493
0
      prev_len = len;
494
0
    }
495
109k
    p += len;
496
109k
  }
497
17.5k
  r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498
17.5k
  rlen += r;
499
17.5k
  return rlen;
500
17.5k
}
501
502
static int
503
compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504
0
{
505
0
  if (sn->end <= sn->s)
506
0
    return 0;
507
508
0
  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509
0
}
510
511
static int
512
compile_string_node(Node* node, regex_t* reg)
513
22.5k
{
514
22.5k
  int r, len, prev_len, blen, ambig;
515
22.5k
  OnigEncoding enc = reg->enc;
516
22.5k
  UChar *p, *prev, *end;
517
22.5k
  StrNode* sn;
518
519
22.5k
  sn = NSTR(node);
520
22.5k
  if (sn->end <= sn->s)
521
534
    return 0;
522
523
22.0k
  end = sn->end;
524
22.0k
  ambig = NSTRING_IS_AMBIG(node);
525
526
22.0k
  p = prev = sn->s;
527
22.0k
  prev_len = enclen(enc, p, end);
528
22.0k
  p += prev_len;
529
22.0k
  blen = prev_len;
530
531
193k
  for (; p < end; ) {
532
171k
    len = enclen(enc, p, end);
533
171k
    if (len == prev_len || ambig) {
534
171k
      blen += len;
535
171k
    }
536
0
    else {
537
0
      r = add_compile_string(prev, prev_len, blen, reg, ambig);
538
0
      if (p + len > end) {
539
0
        return 0;
540
0
      }
541
0
      if (r) return r;
542
543
0
      prev  = p;
544
0
      blen  = len;
545
0
      prev_len = len;
546
0
    }
547
548
171k
    p += len;
549
171k
  }
550
22.0k
  return add_compile_string(prev, prev_len, blen, reg, ambig);
551
22.0k
}
552
553
static int
554
compile_string_raw_node(StrNode* sn, regex_t* reg)
555
0
{
556
0
  if (sn->end <= sn->s)
557
0
    return 0;
558
559
0
  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560
0
}
561
562
static int
563
add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
564
0
{
565
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566
0
  add_length(reg, mbuf->used);
567
0
  return add_bytes(reg, mbuf->p, mbuf->used);
568
#else
569
  int r, pad_size;
570
  UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
571
572
  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573
  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575
576
  r = add_bytes(reg, mbuf->p, mbuf->used);
577
578
  /* padding for return value from compile_length_cclass_node() to be fix. */
579
  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580
  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581
  return r;
582
#endif
583
0
}
584
585
static int
586
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
587
16.8k
{
588
16.8k
  int len;
589
590
16.8k
  if (IS_NULL(cc->mbuf)) {
591
16.8k
    len = SIZE_OPCODE + SIZE_BITSET;
592
16.8k
  }
593
0
  else {
594
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
595
0
      len = SIZE_OPCODE;
596
0
    }
597
0
    else {
598
0
      len = SIZE_OPCODE + SIZE_BITSET;
599
0
    }
600
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
601
0
    len += SIZE_LENGTH + cc->mbuf->used;
602
#else
603
    len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
604
#endif
605
0
  }
606
607
16.8k
  return len;
608
16.8k
}
609
610
static int
611
compile_cclass_node(CClassNode* cc, regex_t* reg)
612
20.5k
{
613
20.5k
  int r;
614
615
20.5k
  if (IS_NULL(cc->mbuf)) {
616
20.5k
    if (IS_NCCLASS_NOT(cc))
617
5.52k
      add_opcode(reg, OP_CCLASS_NOT);
618
14.9k
    else
619
14.9k
      add_opcode(reg, OP_CCLASS);
620
621
20.5k
    r = add_bitset(reg, cc->bs);
622
20.5k
  }
623
0
  else {
624
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
625
0
      if (IS_NCCLASS_NOT(cc))
626
0
  add_opcode(reg, OP_CCLASS_MB_NOT);
627
0
      else
628
0
  add_opcode(reg, OP_CCLASS_MB);
629
630
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
631
0
    }
632
0
    else {
633
0
      if (IS_NCCLASS_NOT(cc))
634
0
  add_opcode(reg, OP_CCLASS_MIX_NOT);
635
0
      else
636
0
  add_opcode(reg, OP_CCLASS_MIX);
637
638
0
      r = add_bitset(reg, cc->bs);
639
0
      if (r) return r;
640
0
      r = add_multi_byte_cclass(cc->mbuf, reg);
641
0
    }
642
0
  }
643
644
20.5k
  return r;
645
20.5k
}
646
647
static int
648
entry_repeat_range(regex_t* reg, int id, int lower, int upper)
649
0
{
650
0
#define REPEAT_RANGE_ALLOC  4
651
652
0
  OnigRepeatRange* p;
653
654
0
  if (reg->repeat_range_alloc == 0) {
655
0
    p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
656
0
    CHECK_NULL_RETURN_MEMERR(p);
657
0
    reg->repeat_range = p;
658
0
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
659
0
  }
660
0
  else if (reg->repeat_range_alloc <= id) {
661
0
    int n;
662
0
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
663
0
    p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
664
0
                                    sizeof(OnigRepeatRange) * n);
665
0
    CHECK_NULL_RETURN_MEMERR(p);
666
0
    reg->repeat_range = p;
667
0
    reg->repeat_range_alloc = n;
668
0
  }
669
0
  else {
670
0
    p = reg->repeat_range;
671
0
  }
672
673
0
  p[id].lower = lower;
674
0
  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
675
0
  return 0;
676
0
}
677
678
static int
679
compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
680
                          regex_t* reg)
681
0
{
682
0
  int r;
683
0
  int num_repeat = reg->num_repeat;
684
685
0
  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
686
0
  if (r) return r;
687
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
688
0
  reg->num_repeat++;
689
0
  if (r) return r;
690
0
  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
691
0
  if (r) return r;
692
693
0
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
694
0
  if (r) return r;
695
696
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
697
0
  if (r) return r;
698
699
0
  if (
700
0
#ifdef USE_SUBEXP_CALL
701
0
      reg->num_call > 0 ||
702
0
#endif
703
0
      IS_QUANTIFIER_IN_REPEAT(qn)) {
704
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
705
0
  }
706
0
  else {
707
0
    r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
708
0
  }
709
0
  if (r) return r;
710
0
  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
711
0
  return r;
712
0
}
713
714
static int
715
is_anychar_star_quantifier(QtfrNode* qn)
716
8.84k
{
717
8.84k
  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
718
8.84k
      NTYPE(qn->target) == NT_CANY)
719
2.56k
    return 1;
720
6.27k
  else
721
6.27k
    return 0;
722
8.84k
}
723
724
5.63k
#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
725
#define CKN_ON   (ckn > 0)
726
727
#ifdef USE_COMBINATION_EXPLOSION_CHECK
728
729
static int
730
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
731
{
732
  int len, mod_tlen, cklen;
733
  int ckn;
734
  int infinite = IS_REPEAT_INFINITE(qn->upper);
735
  int empty_info = qn->target_empty_info;
736
  int tlen = compile_length_tree(qn->target, reg);
737
738
  if (tlen < 0) return tlen;
739
740
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
741
742
  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
743
744
  /* anychar repeat */
745
  if (NTYPE(qn->target) == NT_CANY) {
746
    if (qn->greedy && infinite) {
747
      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
748
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
749
      else
750
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
751
    }
752
  }
753
754
  if (empty_info != 0)
755
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
756
  else
757
    mod_tlen = tlen;
758
759
  if (infinite && qn->lower <= 1) {
760
    if (qn->greedy) {
761
      if (qn->lower == 1)
762
  len = SIZE_OP_JUMP;
763
      else
764
  len = 0;
765
766
      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
767
    }
768
    else {
769
      if (qn->lower == 0)
770
  len = SIZE_OP_JUMP;
771
      else
772
  len = 0;
773
774
      len += mod_tlen + SIZE_OP_PUSH + cklen;
775
    }
776
  }
777
  else if (qn->upper == 0) {
778
    if (qn->is_referred != 0) /* /(?<n>..){0}/ */
779
      len = SIZE_OP_JUMP + tlen;
780
    else
781
      len = 0;
782
  }
783
  else if (qn->upper == 1 && qn->greedy) {
784
    if (qn->lower == 0) {
785
      if (CKN_ON) {
786
  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
787
      }
788
      else {
789
  len = SIZE_OP_PUSH + tlen;
790
      }
791
    }
792
    else {
793
      len = tlen;
794
    }
795
  }
796
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
797
    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
798
  }
799
  else {
800
    len = SIZE_OP_REPEAT_INC
801
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
802
    if (CKN_ON)
803
      len += SIZE_OP_STATE_CHECK;
804
  }
805
806
  return len;
807
}
808
809
static int
810
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
811
{
812
  int r, mod_tlen;
813
  int ckn;
814
  int infinite = IS_REPEAT_INFINITE(qn->upper);
815
  int empty_info = qn->target_empty_info;
816
  int tlen = compile_length_tree(qn->target, reg);
817
818
  if (tlen < 0) return tlen;
819
820
  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
821
822
  if (is_anychar_star_quantifier(qn)) {
823
    r = compile_tree_n_times(qn->target, qn->lower, reg);
824
    if (r) return r;
825
    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
826
      if (IS_MULTILINE(reg->options))
827
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
828
      else
829
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
830
      if (r) return r;
831
      if (CKN_ON) {
832
  r = add_state_check_num(reg, ckn);
833
  if (r) return r;
834
      }
835
836
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
837
    }
838
    else {
839
      if (IS_MULTILINE(reg->options)) {
840
  r = add_opcode(reg, (CKN_ON ?
841
             OP_STATE_CHECK_ANYCHAR_ML_STAR
842
           : OP_ANYCHAR_ML_STAR));
843
      }
844
      else {
845
  r = add_opcode(reg, (CKN_ON ?
846
             OP_STATE_CHECK_ANYCHAR_STAR
847
           : OP_ANYCHAR_STAR));
848
      }
849
      if (r) return r;
850
      if (CKN_ON)
851
  r = add_state_check_num(reg, ckn);
852
853
      return r;
854
    }
855
  }
856
857
  if (empty_info != 0)
858
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
859
  else
860
    mod_tlen = tlen;
861
862
  if (infinite && qn->lower <= 1) {
863
    if (qn->greedy) {
864
      if (qn->lower == 1) {
865
  r = add_opcode_rel_addr(reg, OP_JUMP,
866
      (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
867
  if (r) return r;
868
      }
869
870
      if (CKN_ON) {
871
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
872
  if (r) return r;
873
  r = add_state_check_num(reg, ckn);
874
  if (r) return r;
875
  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
876
      }
877
      else {
878
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
879
      }
880
      if (r) return r;
881
      r = compile_tree_empty_check(qn->target, reg, empty_info);
882
      if (r) return r;
883
      r = add_opcode_rel_addr(reg, OP_JUMP,
884
        -(mod_tlen + (int )SIZE_OP_JUMP
885
    + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
886
    }
887
    else {
888
      if (qn->lower == 0) {
889
  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
890
  if (r) return r;
891
      }
892
      r = compile_tree_empty_check(qn->target, reg, empty_info);
893
      if (r) return r;
894
      if (CKN_ON) {
895
  r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
896
  if (r) return r;
897
  r = add_state_check_num(reg, ckn);
898
  if (r) return r;
899
  r = add_rel_addr(reg,
900
     -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
901
      }
902
      else
903
  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
904
    }
905
  }
906
  else if (qn->upper == 0) {
907
    if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
908
      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
909
      if (r) return r;
910
      r = compile_tree(qn->target, reg);
911
    }
912
    else
913
      r = 0;
914
  }
915
  else if (qn->upper == 1 && qn->greedy) {
916
    if (qn->lower == 0) {
917
      if (CKN_ON) {
918
  r = add_opcode(reg, OP_STATE_CHECK_PUSH);
919
  if (r) return r;
920
  r = add_state_check_num(reg, ckn);
921
  if (r) return r;
922
  r = add_rel_addr(reg, tlen);
923
      }
924
      else {
925
  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
926
      }
927
      if (r) return r;
928
    }
929
930
    r = compile_tree(qn->target, reg);
931
  }
932
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
933
    if (CKN_ON) {
934
      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
935
      if (r) return r;
936
      r = add_state_check_num(reg, ckn);
937
      if (r) return r;
938
      r = add_rel_addr(reg, SIZE_OP_JUMP);
939
    }
940
    else {
941
      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
942
    }
943
944
    if (r) return r;
945
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
946
    if (r) return r;
947
    r = compile_tree(qn->target, reg);
948
  }
949
  else {
950
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
951
    if (CKN_ON) {
952
      if (r) return r;
953
      r = add_opcode(reg, OP_STATE_CHECK);
954
      if (r) return r;
955
      r = add_state_check_num(reg, ckn);
956
    }
957
  }
958
  return r;
959
}
960
961
#else /* USE_COMBINATION_EXPLOSION_CHECK */
962
963
static int
964
compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
965
4.05k
{
966
4.05k
  int len, mod_tlen;
967
4.05k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
968
4.05k
  int empty_info = qn->target_empty_info;
969
4.05k
  int tlen = compile_length_tree(qn->target, reg);
970
971
4.05k
  if (tlen < 0) return tlen;
972
973
  /* anychar repeat */
974
4.05k
  if (NTYPE(qn->target) == NT_CANY) {
975
0
    if (qn->greedy && infinite) {
976
0
      if (IS_NOT_NULL(qn->next_head_exact))
977
0
  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
978
0
      else
979
0
  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
980
0
    }
981
0
  }
982
983
4.05k
  if (empty_info != 0)
984
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
985
4.05k
  else
986
4.05k
    mod_tlen = tlen;
987
988
4.05k
  if (infinite &&
989
4.05k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
990
4.05k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
991
0
      len = SIZE_OP_JUMP;
992
0
    }
993
4.05k
    else {
994
4.05k
      len = tlen * qn->lower;
995
4.05k
    }
996
997
4.05k
    if (qn->greedy) {
998
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
999
      if (IS_NOT_NULL(qn->head_exact))
1000
  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1001
      else
1002
#endif
1003
4.05k
      if (IS_NOT_NULL(qn->next_head_exact))
1004
3.03k
  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1005
1.01k
      else
1006
1.01k
  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1007
4.05k
    }
1008
0
    else
1009
0
      len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1010
4.05k
  }
1011
0
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1012
0
    len = SIZE_OP_JUMP + tlen;
1013
0
  }
1014
0
  else if (!infinite && qn->greedy &&
1015
0
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1016
0
                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1017
0
    len = tlen * qn->lower;
1018
0
    len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1019
0
  }
1020
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1021
0
    len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1022
0
  }
1023
0
  else {
1024
0
    len = SIZE_OP_REPEAT_INC
1025
0
        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1026
0
  }
1027
1028
4.05k
  return len;
1029
4.05k
}
1030
1031
static int
1032
compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1033
8.84k
{
1034
8.84k
  int i, r, mod_tlen;
1035
8.84k
  int infinite = IS_REPEAT_INFINITE(qn->upper);
1036
8.84k
  int empty_info = qn->target_empty_info;
1037
8.84k
  int tlen = compile_length_tree(qn->target, reg);
1038
1039
8.84k
  if (tlen < 0) return tlen;
1040
1041
8.84k
  if (is_anychar_star_quantifier(qn)) {
1042
2.56k
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1043
2.56k
    if (r) return r;
1044
2.56k
    if (IS_NOT_NULL(qn->next_head_exact)) {
1045
1.00k
      if (IS_MULTILINE(reg->options))
1046
0
  r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1047
1.00k
      else
1048
1.00k
  r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1049
1.00k
      if (r) return r;
1050
1.00k
      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1051
1.00k
    }
1052
1.55k
    else {
1053
1.55k
      if (IS_MULTILINE(reg->options))
1054
0
  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1055
1.55k
      else
1056
1.55k
  return add_opcode(reg, OP_ANYCHAR_STAR);
1057
1.55k
    }
1058
2.56k
  }
1059
1060
6.27k
  if (empty_info != 0)
1061
0
    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1062
6.27k
  else
1063
6.27k
    mod_tlen = tlen;
1064
1065
6.27k
  if (infinite &&
1066
6.27k
      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1067
5.73k
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1068
0
      if (qn->greedy) {
1069
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1070
  if (IS_NOT_NULL(qn->head_exact))
1071
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1072
  else
1073
#endif
1074
0
  if (IS_NOT_NULL(qn->next_head_exact))
1075
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1076
0
  else
1077
0
    r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1078
0
      }
1079
0
      else {
1080
0
  r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1081
0
      }
1082
0
      if (r) return r;
1083
0
    }
1084
5.73k
    else {
1085
5.73k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1086
5.73k
      if (r) return r;
1087
5.73k
    }
1088
1089
5.73k
    if (qn->greedy) {
1090
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1091
      if (IS_NOT_NULL(qn->head_exact)) {
1092
  r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1093
           mod_tlen + SIZE_OP_JUMP);
1094
  if (r) return r;
1095
  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1096
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1097
  if (r) return r;
1098
  r = add_opcode_rel_addr(reg, OP_JUMP,
1099
  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1100
      }
1101
      else
1102
#endif
1103
5.18k
      if (IS_NOT_NULL(qn->next_head_exact)) {
1104
0
  r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1105
0
        mod_tlen + SIZE_OP_JUMP);
1106
0
  if (r) return r;
1107
0
  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1108
0
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1109
0
  if (r) return r;
1110
0
  r = add_opcode_rel_addr(reg, OP_JUMP,
1111
0
          -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1112
0
      }
1113
5.18k
      else {
1114
5.18k
  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1115
5.18k
  if (r) return r;
1116
5.18k
  r = compile_tree_empty_check(qn->target, reg, empty_info);
1117
5.18k
  if (r) return r;
1118
5.18k
  r = add_opcode_rel_addr(reg, OP_JUMP,
1119
5.18k
         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1120
5.18k
      }
1121
5.18k
    }
1122
550
    else {
1123
550
      r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1124
550
      if (r) return r;
1125
550
      r = compile_tree_empty_check(qn->target, reg, empty_info);
1126
550
      if (r) return r;
1127
550
      r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1128
550
    }
1129
5.73k
  }
1130
537
  else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1131
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1132
0
    if (r) return r;
1133
0
    r = compile_tree(qn->target, reg);
1134
0
  }
1135
537
  else if (!infinite && qn->greedy &&
1136
537
           (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1137
537
                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1138
537
    int n = qn->upper - qn->lower;
1139
1140
537
    r = compile_tree_n_times(qn->target, qn->lower, reg);
1141
537
    if (r) return r;
1142
1143
1.07k
    for (i = 0; i < n; i++) {
1144
537
      r = add_opcode_rel_addr(reg, OP_PUSH,
1145
537
         (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1146
537
      if (r) return r;
1147
537
      r = compile_tree(qn->target, reg);
1148
537
      if (r) return r;
1149
537
    }
1150
537
  }
1151
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1152
0
    r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1153
0
    if (r) return r;
1154
0
    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1155
0
    if (r) return r;
1156
0
    r = compile_tree(qn->target, reg);
1157
0
  }
1158
0
  else {
1159
0
    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1160
0
  }
1161
6.27k
  return r;
1162
6.27k
}
1163
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1164
1165
static int
1166
compile_length_option_node(EncloseNode* node, regex_t* reg)
1167
0
{
1168
0
  int tlen;
1169
0
  OnigOptionType prev = reg->options;
1170
1171
0
  reg->options = node->option;
1172
0
  tlen = compile_length_tree(node->target, reg);
1173
0
  reg->options = prev;
1174
1175
0
  if (tlen < 0) return tlen;
1176
1177
0
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1178
0
    return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1179
0
           + tlen + SIZE_OP_SET_OPTION;
1180
0
  }
1181
0
  else
1182
0
    return tlen;
1183
0
}
1184
1185
static int
1186
compile_option_node(EncloseNode* node, regex_t* reg)
1187
534
{
1188
534
  int r;
1189
534
  OnigOptionType prev = reg->options;
1190
1191
534
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1192
0
    r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1193
0
    if (r) return r;
1194
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1195
0
    if (r) return r;
1196
0
    r = add_opcode(reg, OP_FAIL);
1197
0
    if (r) return r;
1198
0
  }
1199
1200
534
  reg->options = node->option;
1201
534
  r = compile_tree(node->target, reg);
1202
534
  reg->options = prev;
1203
1204
534
  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1205
0
    if (r) return r;
1206
0
    r = add_opcode_option(reg, OP_SET_OPTION, prev);
1207
0
  }
1208
534
  return r;
1209
534
}
1210
1211
static int
1212
compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1213
3.03k
{
1214
3.03k
  int len;
1215
3.03k
  int tlen;
1216
1217
3.03k
  if (node->type == ENCLOSE_OPTION)
1218
0
    return compile_length_option_node(node, reg);
1219
1220
3.03k
  if (node->target) {
1221
3.03k
    tlen = compile_length_tree(node->target, reg);
1222
3.03k
    if (tlen < 0) return tlen;
1223
3.03k
  }
1224
0
  else
1225
0
    tlen = 0;
1226
1227
3.03k
  switch (node->type) {
1228
0
  case ENCLOSE_MEMORY:
1229
0
#ifdef USE_SUBEXP_CALL
1230
0
    if (IS_ENCLOSE_CALLED(node)) {
1231
0
      len = SIZE_OP_MEMORY_START_PUSH + tlen
1232
0
    + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1233
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1234
0
  len += (IS_ENCLOSE_RECURSION(node)
1235
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1236
0
      else
1237
0
  len += (IS_ENCLOSE_RECURSION(node)
1238
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1239
0
    }
1240
0
    else if (IS_ENCLOSE_RECURSION(node)) {
1241
0
      len = SIZE_OP_MEMORY_START_PUSH;
1242
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1243
0
         ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1244
0
    }
1245
0
    else
1246
0
#endif
1247
0
    {
1248
0
      if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1249
0
  len = SIZE_OP_MEMORY_START_PUSH;
1250
0
      else
1251
0
  len = SIZE_OP_MEMORY_START;
1252
1253
0
      len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1254
0
         ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1255
0
    }
1256
0
    break;
1257
1258
3.03k
  case ENCLOSE_STOP_BACKTRACK:
1259
3.03k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1260
3.03k
      QtfrNode* qn = NQTFR(node->target);
1261
3.03k
      tlen = compile_length_tree(qn->target, reg);
1262
3.03k
      if (tlen < 0) return tlen;
1263
1264
3.03k
      len = tlen * qn->lower
1265
3.03k
    + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1266
3.03k
    }
1267
0
    else {
1268
0
      len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1269
0
    }
1270
3.03k
    break;
1271
1272
3.03k
  case ENCLOSE_CONDITION:
1273
0
    len = SIZE_OP_CONDITION;
1274
0
    if (NTYPE(node->target) == NT_ALT) {
1275
0
      Node* x = node->target;
1276
1277
0
      tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1278
0
      if (tlen < 0) return tlen;
1279
0
      len += tlen + SIZE_OP_JUMP;
1280
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1281
0
      x = NCDR(x);
1282
0
      tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1283
0
      if (tlen < 0) return tlen;
1284
0
      len += tlen;
1285
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1286
0
    }
1287
0
    else {
1288
0
      return ONIGERR_PARSER_BUG;
1289
0
    }
1290
0
    break;
1291
1292
0
  case ENCLOSE_ABSENT:
1293
0
    len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1294
0
    break;
1295
1296
0
  default:
1297
0
    return ONIGERR_TYPE_BUG;
1298
0
    break;
1299
3.03k
  }
1300
1301
3.03k
  return len;
1302
3.03k
}
1303
1304
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1305
1306
static int
1307
compile_enclose_node(EncloseNode* node, regex_t* reg)
1308
9.90k
{
1309
9.90k
  int r, len;
1310
1311
9.90k
  if (node->type == ENCLOSE_OPTION)
1312
534
    return compile_option_node(node, reg);
1313
1314
9.37k
  switch (node->type) {
1315
2.74k
  case ENCLOSE_MEMORY:
1316
2.74k
#ifdef USE_SUBEXP_CALL
1317
2.74k
    if (IS_ENCLOSE_CALLED(node)) {
1318
0
      r = add_opcode(reg, OP_CALL);
1319
0
      if (r) return r;
1320
0
      node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1321
0
      node->state |= NST_ADDR_FIXED;
1322
0
      r = add_abs_addr(reg, (int )node->call_addr);
1323
0
      if (r) return r;
1324
0
      len = compile_length_tree(node->target, reg);
1325
0
      len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1326
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1327
0
  len += (IS_ENCLOSE_RECURSION(node)
1328
0
    ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1329
0
      else
1330
0
  len += (IS_ENCLOSE_RECURSION(node)
1331
0
    ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1332
1333
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len);
1334
0
      if (r) return r;
1335
0
    }
1336
2.74k
#endif
1337
2.74k
    if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1338
0
      r = add_opcode(reg, OP_MEMORY_START_PUSH);
1339
2.74k
    else
1340
2.74k
      r = add_opcode(reg, OP_MEMORY_START);
1341
2.74k
    if (r) return r;
1342
2.74k
    r = add_mem_num(reg, node->regnum);
1343
2.74k
    if (r) return r;
1344
2.74k
    r = compile_tree(node->target, reg);
1345
2.74k
    if (r) return r;
1346
2.74k
#ifdef USE_SUBEXP_CALL
1347
2.74k
    if (IS_ENCLOSE_CALLED(node)) {
1348
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1349
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350
0
           ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1351
0
      else
1352
0
  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1353
0
           ? OP_MEMORY_END_REC : OP_MEMORY_END));
1354
1355
0
      if (r) return r;
1356
0
      r = add_mem_num(reg, node->regnum);
1357
0
      if (r) return r;
1358
0
      r = add_opcode(reg, OP_RETURN);
1359
0
    }
1360
2.74k
    else if (IS_ENCLOSE_RECURSION(node)) {
1361
0
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1362
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1363
0
      else
1364
0
  r = add_opcode(reg, OP_MEMORY_END_REC);
1365
0
      if (r) return r;
1366
0
      r = add_mem_num(reg, node->regnum);
1367
0
    }
1368
2.74k
    else
1369
2.74k
#endif
1370
2.74k
    {
1371
2.74k
      if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1372
0
  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1373
2.74k
      else
1374
2.74k
  r = add_opcode(reg, OP_MEMORY_END);
1375
2.74k
      if (r) return r;
1376
2.74k
      r = add_mem_num(reg, node->regnum);
1377
2.74k
    }
1378
2.74k
    break;
1379
1380
6.63k
  case ENCLOSE_STOP_BACKTRACK:
1381
6.63k
    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1382
6.63k
      QtfrNode* qn = NQTFR(node->target);
1383
6.63k
      r = compile_tree_n_times(qn->target, qn->lower, reg);
1384
6.63k
      if (r) return r;
1385
1386
6.63k
      len = compile_length_tree(qn->target, reg);
1387
6.63k
      if (len < 0) return len;
1388
1389
6.63k
      r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1390
6.63k
      if (r) return r;
1391
6.63k
      r = compile_tree(qn->target, reg);
1392
6.63k
      if (r) return r;
1393
6.63k
      r = add_opcode(reg, OP_POP);
1394
6.63k
      if (r) return r;
1395
6.63k
      r = add_opcode_rel_addr(reg, OP_JUMP,
1396
6.63k
   -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1397
6.63k
    }
1398
0
    else {
1399
0
      r = add_opcode(reg, OP_PUSH_STOP_BT);
1400
0
      if (r) return r;
1401
0
      r = compile_tree(node->target, reg);
1402
0
      if (r) return r;
1403
0
      r = add_opcode(reg, OP_POP_STOP_BT);
1404
0
    }
1405
6.63k
    break;
1406
1407
6.63k
  case ENCLOSE_CONDITION:
1408
0
    r = add_opcode(reg, OP_CONDITION);
1409
0
    if (r) return r;
1410
0
    r = add_mem_num(reg, node->regnum);
1411
0
    if (r) return r;
1412
1413
0
    if (NTYPE(node->target) == NT_ALT) {
1414
0
      Node* x = node->target;
1415
0
      int len2;
1416
1417
0
      len = compile_length_tree(NCAR(x), reg);  /* yes-node */
1418
0
      if (len < 0) return len;
1419
0
      if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1420
0
      x = NCDR(x);
1421
0
      len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1422
0
      if (len2 < 0) return len2;
1423
0
      if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1424
1425
0
      x = node->target;
1426
0
      r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1427
0
      if (r) return r;
1428
0
      r = compile_tree(NCAR(x), reg);   /* yes-node */
1429
0
      if (r) return r;
1430
0
      r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1431
0
      if (r) return r;
1432
0
      x = NCDR(x);
1433
0
      r = compile_tree(NCAR(x), reg);   /* no-node */
1434
0
    }
1435
0
    else {
1436
0
      return ONIGERR_PARSER_BUG;
1437
0
    }
1438
0
    break;
1439
1440
0
  case ENCLOSE_ABSENT:
1441
0
    len = compile_length_tree(node->target, reg);
1442
0
    if (len < 0) return len;
1443
1444
0
    r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1445
0
    if (r) return r;
1446
0
    r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1447
0
    if (r) return r;
1448
0
    r = compile_tree(node->target, reg);
1449
0
    if (r) return r;
1450
0
    r = add_opcode(reg, OP_ABSENT_END);
1451
0
    break;
1452
1453
0
  default:
1454
0
    return ONIGERR_TYPE_BUG;
1455
0
    break;
1456
9.37k
  }
1457
1458
9.37k
  return r;
1459
9.37k
}
1460
1461
static int
1462
compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1463
2.03k
{
1464
2.03k
  int len;
1465
2.03k
  int tlen = 0;
1466
1467
2.03k
  if (node->target) {
1468
0
    tlen = compile_length_tree(node->target, reg);
1469
0
    if (tlen < 0) return tlen;
1470
0
  }
1471
1472
2.03k
  switch (node->type) {
1473
0
  case ANCHOR_PREC_READ:
1474
0
    len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1475
0
    break;
1476
0
  case ANCHOR_PREC_READ_NOT:
1477
0
    len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1478
0
    break;
1479
0
  case ANCHOR_LOOK_BEHIND:
1480
0
    len = SIZE_OP_LOOK_BEHIND + tlen;
1481
0
    break;
1482
0
  case ANCHOR_LOOK_BEHIND_NOT:
1483
0
    len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1484
0
    break;
1485
1486
2.03k
  default:
1487
2.03k
    len = SIZE_OPCODE;
1488
2.03k
    break;
1489
2.03k
  }
1490
1491
2.03k
  return len;
1492
2.03k
}
1493
1494
static int
1495
compile_anchor_node(AnchorNode* node, regex_t* reg)
1496
15.0k
{
1497
15.0k
  int r, len;
1498
1499
15.0k
  switch (node->type) {
1500
0
  case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
1501
0
  case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
1502
10.3k
  case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
1503
4.19k
  case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
1504
0
  case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
1505
0
  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1506
1507
517
  case ANCHOR_WORD_BOUND:
1508
517
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1509
517
    else                      r = add_opcode(reg, OP_WORD_BOUND);
1510
517
    break;
1511
0
  case ANCHOR_NOT_WORD_BOUND:
1512
0
    if (node->ascii_range)    r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1513
0
    else                      r = add_opcode(reg, OP_NOT_WORD_BOUND);
1514
0
    break;
1515
0
#ifdef USE_WORD_BEGIN_END
1516
0
  case ANCHOR_WORD_BEGIN:
1517
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1518
0
    else                      r = add_opcode(reg, OP_WORD_BEGIN);
1519
0
    break;
1520
0
  case ANCHOR_WORD_END:
1521
0
    if (node->ascii_range)    r = add_opcode(reg, OP_ASCII_WORD_END);
1522
0
    else                      r = add_opcode(reg, OP_WORD_END);
1523
0
    break;
1524
0
#endif
1525
0
  case ANCHOR_KEEP:           r = add_opcode(reg, OP_KEEP);           break;
1526
1527
0
  case ANCHOR_PREC_READ:
1528
0
    r = add_opcode(reg, OP_PUSH_POS);
1529
0
    if (r) return r;
1530
0
    r = compile_tree(node->target, reg);
1531
0
    if (r) return r;
1532
0
    r = add_opcode(reg, OP_POP_POS);
1533
0
    break;
1534
1535
0
  case ANCHOR_PREC_READ_NOT:
1536
0
    len = compile_length_tree(node->target, reg);
1537
0
    if (len < 0) return len;
1538
0
    r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1539
0
    if (r) return r;
1540
0
    r = compile_tree(node->target, reg);
1541
0
    if (r) return r;
1542
0
    r = add_opcode(reg, OP_FAIL_POS);
1543
0
    break;
1544
1545
0
  case ANCHOR_LOOK_BEHIND:
1546
0
    {
1547
0
      int n;
1548
0
      r = add_opcode(reg, OP_LOOK_BEHIND);
1549
0
      if (r) return r;
1550
0
      if (node->char_len < 0) {
1551
0
  r = get_char_length_tree(node->target, reg, &n);
1552
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1553
0
      }
1554
0
      else
1555
0
  n = node->char_len;
1556
0
      r = add_length(reg, n);
1557
0
      if (r) return r;
1558
0
      r = compile_tree(node->target, reg);
1559
0
    }
1560
0
    break;
1561
1562
0
  case ANCHOR_LOOK_BEHIND_NOT:
1563
0
    {
1564
0
      int n;
1565
0
      len = compile_length_tree(node->target, reg);
1566
0
      r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1567
0
         len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1568
0
      if (r) return r;
1569
0
      if (node->char_len < 0) {
1570
0
  r = get_char_length_tree(node->target, reg, &n);
1571
0
  if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1572
0
      }
1573
0
      else
1574
0
  n = node->char_len;
1575
0
      r = add_length(reg, n);
1576
0
      if (r) return r;
1577
0
      r = compile_tree(node->target, reg);
1578
0
      if (r) return r;
1579
0
      r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1580
0
    }
1581
0
    break;
1582
1583
0
  default:
1584
0
    return ONIGERR_TYPE_BUG;
1585
0
    break;
1586
15.0k
  }
1587
1588
15.0k
  return r;
1589
15.0k
}
1590
1591
static int
1592
compile_length_tree(Node* node, regex_t* reg)
1593
50.7k
{
1594
50.7k
  int len, type, r;
1595
1596
50.7k
  type = NTYPE(node);
1597
50.7k
  switch (type) {
1598
4.05k
  case NT_LIST:
1599
4.05k
    len = 0;
1600
10.1k
    do {
1601
10.1k
      r = compile_length_tree(NCAR(node), reg);
1602
10.1k
      if (r < 0) return r;
1603
10.1k
      len += r;
1604
10.1k
    } while (IS_NOT_NULL(node = NCDR(node)));
1605
4.05k
    r = len;
1606
4.05k
    break;
1607
1608
0
  case NT_ALT:
1609
0
    {
1610
0
      int n = 0;
1611
0
      len = 0;
1612
0
      do {
1613
0
  r = compile_length_tree(NCAR(node), reg);
1614
0
  if (r < 0) return r;
1615
0
  len += r;
1616
0
  n++;
1617
0
      } while (IS_NOT_NULL(node = NCDR(node)));
1618
0
      r = len;
1619
0
      r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1620
0
    }
1621
0
    break;
1622
1623
17.5k
  case NT_STR:
1624
17.5k
    if (NSTRING_IS_RAW(node))
1625
0
      r = compile_length_string_raw_node(NSTR(node), reg);
1626
17.5k
    else
1627
17.5k
      r = compile_length_string_node(node, reg);
1628
17.5k
    break;
1629
1630
16.8k
  case NT_CCLASS:
1631
16.8k
    r = compile_length_cclass_node(NCCLASS(node), reg);
1632
16.8k
    break;
1633
1634
0
  case NT_CTYPE:
1635
3.11k
  case NT_CANY:
1636
3.11k
    r = SIZE_OPCODE;
1637
3.11k
    break;
1638
1639
0
  case NT_BREF:
1640
0
    {
1641
0
      BRefNode* br = NBREF(node);
1642
1643
0
#ifdef USE_BACKREF_WITH_LEVEL
1644
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1645
0
  r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1646
0
            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1647
0
      }
1648
0
      else
1649
0
#endif
1650
0
      if (br->back_num == 1) {
1651
0
  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1652
0
       ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1653
0
      }
1654
0
      else {
1655
0
  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1656
0
      }
1657
0
    }
1658
0
    break;
1659
1660
0
#ifdef USE_SUBEXP_CALL
1661
0
  case NT_CALL:
1662
0
    r = SIZE_OP_CALL;
1663
0
    break;
1664
0
#endif
1665
1666
4.05k
  case NT_QTFR:
1667
4.05k
    r = compile_length_quantifier_node(NQTFR(node), reg);
1668
4.05k
    break;
1669
1670
3.03k
  case NT_ENCLOSE:
1671
3.03k
    r = compile_length_enclose_node(NENCLOSE(node), reg);
1672
3.03k
    break;
1673
1674
2.03k
  case NT_ANCHOR:
1675
2.03k
    r = compile_length_anchor_node(NANCHOR(node), reg);
1676
2.03k
    break;
1677
1678
0
  default:
1679
0
    return ONIGERR_TYPE_BUG;
1680
0
    break;
1681
50.7k
  }
1682
1683
50.7k
  return r;
1684
50.7k
}
1685
1686
static int
1687
compile_tree(Node* node, regex_t* reg)
1688
96.2k
{
1689
96.2k
  int n, type, len, pos, r = 0;
1690
1691
96.2k
  type = NTYPE(node);
1692
96.2k
  switch (type) {
1693
12.4k
  case NT_LIST:
1694
51.4k
    do {
1695
51.4k
      r = compile_tree(NCAR(node), reg);
1696
51.4k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1697
12.4k
    break;
1698
1699
3.21k
  case NT_ALT:
1700
3.21k
    {
1701
3.21k
      Node* x = node;
1702
3.21k
      len = 0;
1703
7.50k
      do {
1704
7.50k
  len += compile_length_tree(NCAR(x), reg);
1705
7.50k
  if (NCDR(x) != NULL) {
1706
4.29k
    len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1707
4.29k
  }
1708
7.50k
      } while (IS_NOT_NULL(x = NCDR(x)));
1709
3.21k
      pos = reg->used + len;  /* goal position */
1710
1711
7.50k
      do {
1712
7.50k
  len = compile_length_tree(NCAR(node), reg);
1713
7.50k
  if (IS_NOT_NULL(NCDR(node))) {
1714
4.29k
    r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1715
4.29k
    if (r) break;
1716
4.29k
  }
1717
7.50k
  r = compile_tree(NCAR(node), reg);
1718
7.50k
  if (r) break;
1719
7.50k
  if (IS_NOT_NULL(NCDR(node))) {
1720
4.29k
    len = pos - (reg->used + SIZE_OP_JUMP);
1721
4.29k
    r = add_opcode_rel_addr(reg, OP_JUMP, len);
1722
4.29k
    if (r) break;
1723
4.29k
  }
1724
7.50k
      } while (IS_NOT_NULL(node = NCDR(node)));
1725
3.21k
    }
1726
0
    break;
1727
1728
22.5k
  case NT_STR:
1729
22.5k
    if (NSTRING_IS_RAW(node))
1730
0
      r = compile_string_raw_node(NSTR(node), reg);
1731
22.5k
    else
1732
22.5k
      r = compile_string_node(node, reg);
1733
22.5k
    break;
1734
1735
20.5k
  case NT_CCLASS:
1736
20.5k
    r = compile_cclass_node(NCCLASS(node), reg);
1737
20.5k
    break;
1738
1739
0
  case NT_CTYPE:
1740
0
    {
1741
0
      int op;
1742
1743
0
      switch (NCTYPE(node)->ctype) {
1744
0
      case ONIGENC_CTYPE_WORD:
1745
0
  if (NCTYPE(node)->ascii_range != 0) {
1746
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_ASCII_WORD;
1747
0
    else                         op = OP_ASCII_WORD;
1748
0
  }
1749
0
  else {
1750
0
    if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
1751
0
    else                         op = OP_WORD;
1752
0
  }
1753
0
  break;
1754
0
      default:
1755
0
  return ONIGERR_TYPE_BUG;
1756
0
  break;
1757
0
      }
1758
0
      r = add_opcode(reg, op);
1759
0
    }
1760
0
    break;
1761
1762
3.73k
  case NT_CANY:
1763
3.73k
    if (IS_MULTILINE(reg->options))
1764
0
      r = add_opcode(reg, OP_ANYCHAR_ML);
1765
3.73k
    else
1766
3.73k
      r = add_opcode(reg, OP_ANYCHAR);
1767
3.73k
    break;
1768
1769
0
  case NT_BREF:
1770
0
    {
1771
0
      BRefNode* br = NBREF(node);
1772
1773
0
#ifdef USE_BACKREF_WITH_LEVEL
1774
0
      if (IS_BACKREF_NEST_LEVEL(br)) {
1775
0
  r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1776
0
  if (r) return r;
1777
0
  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1778
0
  if (r) return r;
1779
0
  r = add_length(reg, br->nest_level);
1780
0
  if (r) return r;
1781
1782
0
  goto add_bacref_mems;
1783
0
      }
1784
0
      else
1785
0
#endif
1786
0
      if (br->back_num == 1) {
1787
0
  n = br->back_static[0];
1788
0
  if (IS_IGNORECASE(reg->options)) {
1789
0
    r = add_opcode(reg, OP_BACKREFN_IC);
1790
0
    if (r) return r;
1791
0
    r = add_mem_num(reg, n);
1792
0
  }
1793
0
  else {
1794
0
    switch (n) {
1795
0
    case 1:  r = add_opcode(reg, OP_BACKREF1); break;
1796
0
    case 2:  r = add_opcode(reg, OP_BACKREF2); break;
1797
0
    default:
1798
0
      r = add_opcode(reg, OP_BACKREFN);
1799
0
      if (r) return r;
1800
0
      r = add_mem_num(reg, n);
1801
0
      break;
1802
0
    }
1803
0
  }
1804
0
      }
1805
0
      else {
1806
0
  int i;
1807
0
  int* p;
1808
1809
0
  if (IS_IGNORECASE(reg->options)) {
1810
0
    r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1811
0
  }
1812
0
  else {
1813
0
    r = add_opcode(reg, OP_BACKREF_MULTI);
1814
0
  }
1815
0
  if (r) return r;
1816
1817
0
#ifdef USE_BACKREF_WITH_LEVEL
1818
0
      add_bacref_mems:
1819
0
#endif
1820
0
  r = add_length(reg, br->back_num);
1821
0
  if (r) return r;
1822
0
  p = BACKREFS_P(br);
1823
0
  for (i = br->back_num - 1; i >= 0; i--) {
1824
0
    r = add_mem_num(reg, p[i]);
1825
0
    if (r) return r;
1826
0
  }
1827
0
      }
1828
0
    }
1829
0
    break;
1830
1831
0
#ifdef USE_SUBEXP_CALL
1832
0
  case NT_CALL:
1833
0
    r = compile_call(NCALL(node), reg);
1834
0
    break;
1835
0
#endif
1836
1837
8.84k
  case NT_QTFR:
1838
8.84k
    r = compile_quantifier_node(NQTFR(node), reg);
1839
8.84k
    break;
1840
1841
9.90k
  case NT_ENCLOSE:
1842
9.90k
    r = compile_enclose_node(NENCLOSE(node), reg);
1843
9.90k
    break;
1844
1845
15.0k
  case NT_ANCHOR:
1846
15.0k
    r = compile_anchor_node(NANCHOR(node), reg);
1847
15.0k
    break;
1848
1849
0
  default:
1850
#ifdef ONIG_DEBUG
1851
    fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1852
#endif
1853
0
    break;
1854
96.2k
  }
1855
1856
96.2k
  return r;
1857
96.2k
}
1858
1859
#ifdef USE_NAMED_GROUP
1860
1861
static int
1862
noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1863
0
{
1864
0
  int r = 0;
1865
0
  Node* node = *plink;
1866
1867
0
  switch (NTYPE(node)) {
1868
0
  case NT_LIST:
1869
0
  case NT_ALT:
1870
0
    do {
1871
0
      r = noname_disable_map(&(NCAR(node)), map, counter);
1872
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1873
0
    break;
1874
1875
0
  case NT_QTFR:
1876
0
    {
1877
0
      Node** ptarget = &(NQTFR(node)->target);
1878
0
      Node*  old = *ptarget;
1879
0
      r = noname_disable_map(ptarget, map, counter);
1880
0
      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1881
0
  onig_reduce_nested_quantifier(node, *ptarget);
1882
0
      }
1883
0
    }
1884
0
    break;
1885
1886
0
  case NT_ENCLOSE:
1887
0
    {
1888
0
      EncloseNode* en = NENCLOSE(node);
1889
0
      if (en->type == ENCLOSE_MEMORY) {
1890
0
  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1891
0
    (*counter)++;
1892
0
    map[en->regnum].new_val = *counter;
1893
0
    en->regnum = *counter;
1894
0
  }
1895
0
  else if (en->regnum != 0) {
1896
0
    *plink = en->target;
1897
0
    en->target = NULL_NODE;
1898
0
    onig_node_free(node);
1899
0
    r = noname_disable_map(plink, map, counter);
1900
0
    break;
1901
0
  }
1902
0
      }
1903
0
      r = noname_disable_map(&(en->target), map, counter);
1904
0
    }
1905
0
    break;
1906
1907
0
  case NT_ANCHOR:
1908
0
    if (NANCHOR(node)->target)
1909
0
      r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1910
0
    break;
1911
1912
0
  default:
1913
0
    break;
1914
0
  }
1915
1916
0
  return r;
1917
0
}
1918
1919
static int
1920
renumber_node_backref(Node* node, GroupNumRemap* map)
1921
0
{
1922
0
  int i, pos, n, old_num;
1923
0
  int *backs;
1924
0
  BRefNode* bn = NBREF(node);
1925
1926
0
  if (! IS_BACKREF_NAME_REF(bn))
1927
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1928
1929
0
  old_num = bn->back_num;
1930
0
  if (IS_NULL(bn->back_dynamic))
1931
0
    backs = bn->back_static;
1932
0
  else
1933
0
    backs = bn->back_dynamic;
1934
1935
0
  for (i = 0, pos = 0; i < old_num; i++) {
1936
0
    n = map[backs[i]].new_val;
1937
0
    if (n > 0) {
1938
0
      backs[pos] = n;
1939
0
      pos++;
1940
0
    }
1941
0
  }
1942
1943
0
  bn->back_num = pos;
1944
0
  return 0;
1945
0
}
1946
1947
static int
1948
renumber_by_map(Node* node, GroupNumRemap* map)
1949
0
{
1950
0
  int r = 0;
1951
1952
0
  switch (NTYPE(node)) {
1953
0
  case NT_LIST:
1954
0
  case NT_ALT:
1955
0
    do {
1956
0
      r = renumber_by_map(NCAR(node), map);
1957
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1958
0
    break;
1959
0
  case NT_QTFR:
1960
0
    r = renumber_by_map(NQTFR(node)->target, map);
1961
0
    break;
1962
0
  case NT_ENCLOSE:
1963
0
    {
1964
0
      EncloseNode* en = NENCLOSE(node);
1965
0
      if (en->type == ENCLOSE_CONDITION)
1966
0
  en->regnum = map[en->regnum].new_val;
1967
0
      r = renumber_by_map(en->target, map);
1968
0
    }
1969
0
    break;
1970
1971
0
  case NT_BREF:
1972
0
    r = renumber_node_backref(node, map);
1973
0
    break;
1974
1975
0
  case NT_ANCHOR:
1976
0
    if (NANCHOR(node)->target)
1977
0
      r = renumber_by_map(NANCHOR(node)->target, map);
1978
0
    break;
1979
1980
0
  default:
1981
0
    break;
1982
0
  }
1983
1984
0
  return r;
1985
0
}
1986
1987
static int
1988
numbered_ref_check(Node* node)
1989
11.0k
{
1990
11.0k
  int r = 0;
1991
1992
11.0k
  switch (NTYPE(node)) {
1993
550
  case NT_LIST:
1994
1.65k
  case NT_ALT:
1995
7.15k
    do {
1996
7.15k
      r = numbered_ref_check(NCAR(node));
1997
7.15k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1998
1.65k
    break;
1999
1.10k
  case NT_QTFR:
2000
1.10k
    r = numbered_ref_check(NQTFR(node)->target);
2001
1.10k
    break;
2002
2.20k
  case NT_ENCLOSE:
2003
2.20k
    r = numbered_ref_check(NENCLOSE(node)->target);
2004
2.20k
    break;
2005
2006
0
  case NT_BREF:
2007
0
    if (! IS_BACKREF_NAME_REF(NBREF(node)))
2008
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2009
0
    break;
2010
2011
1.10k
  case NT_ANCHOR:
2012
1.10k
    if (NANCHOR(node)->target)
2013
0
      r = numbered_ref_check(NANCHOR(node)->target);
2014
1.10k
    break;
2015
2016
4.95k
  default:
2017
4.95k
    break;
2018
11.0k
  }
2019
2020
11.0k
  return r;
2021
11.0k
}
2022
2023
static int
2024
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2025
0
{
2026
0
  int r, i, pos, counter;
2027
0
  BitStatusType loc;
2028
0
  GroupNumRemap* map;
2029
2030
0
  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2031
0
  CHECK_NULL_RETURN_MEMERR(map);
2032
0
  for (i = 1; i <= env->num_mem; i++) {
2033
0
    map[i].new_val = 0;
2034
0
  }
2035
0
  counter = 0;
2036
0
  r = noname_disable_map(root, map, &counter);
2037
0
  if (r != 0) return r;
2038
2039
0
  r = renumber_by_map(*root, map);
2040
0
  if (r != 0) return r;
2041
2042
0
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2043
0
    if (map[i].new_val > 0) {
2044
0
      SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2045
0
      pos++;
2046
0
    }
2047
0
  }
2048
2049
0
  loc = env->capture_history;
2050
0
  BIT_STATUS_CLEAR(env->capture_history);
2051
0
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2052
0
    if (BIT_STATUS_AT(loc, i)) {
2053
0
      BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2054
0
    }
2055
0
  }
2056
2057
0
  env->num_mem = env->num_named;
2058
0
  reg->num_mem = env->num_named;
2059
2060
0
  return onig_renumber_name_table(reg, map);
2061
0
}
2062
#endif /* USE_NAMED_GROUP */
2063
2064
#ifdef USE_SUBEXP_CALL
2065
static int
2066
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2067
0
{
2068
0
  int i, offset;
2069
0
  EncloseNode* en;
2070
0
  AbsAddrType addr;
2071
2072
0
  for (i = 0; i < uslist->num; i++) {
2073
0
    en = NENCLOSE(uslist->us[i].target);
2074
0
    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2075
0
    addr = en->call_addr;
2076
0
    offset = uslist->us[i].offset;
2077
2078
0
    BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2079
0
  }
2080
0
  return 0;
2081
0
}
2082
#endif
2083
2084
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2085
static int
2086
quantifiers_memory_node_info(Node* node)
2087
0
{
2088
0
  int r = 0;
2089
2090
0
  switch (NTYPE(node)) {
2091
0
  case NT_LIST:
2092
0
  case NT_ALT:
2093
0
    {
2094
0
      int v;
2095
0
      do {
2096
0
  v = quantifiers_memory_node_info(NCAR(node));
2097
0
  if (v > r) r = v;
2098
0
      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2099
0
    }
2100
0
    break;
2101
2102
0
# ifdef USE_SUBEXP_CALL
2103
0
  case NT_CALL:
2104
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2105
0
      return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2106
0
    }
2107
0
    else
2108
0
      r = quantifiers_memory_node_info(NCALL(node)->target);
2109
0
    break;
2110
0
# endif
2111
2112
0
  case NT_QTFR:
2113
0
    {
2114
0
      QtfrNode* qn = NQTFR(node);
2115
0
      if (qn->upper != 0) {
2116
0
  r = quantifiers_memory_node_info(qn->target);
2117
0
      }
2118
0
    }
2119
0
    break;
2120
2121
0
  case NT_ENCLOSE:
2122
0
    {
2123
0
      EncloseNode* en = NENCLOSE(node);
2124
0
      switch (en->type) {
2125
0
      case ENCLOSE_MEMORY:
2126
0
  return NQ_TARGET_IS_EMPTY_MEM;
2127
0
  break;
2128
2129
0
      case ENCLOSE_OPTION:
2130
0
      case ENCLOSE_STOP_BACKTRACK:
2131
0
      case ENCLOSE_CONDITION:
2132
0
      case ENCLOSE_ABSENT:
2133
0
  r = quantifiers_memory_node_info(en->target);
2134
0
  break;
2135
0
      default:
2136
0
  break;
2137
0
      }
2138
0
    }
2139
0
    break;
2140
2141
0
  case NT_BREF:
2142
0
  case NT_STR:
2143
0
  case NT_CTYPE:
2144
0
  case NT_CCLASS:
2145
0
  case NT_CANY:
2146
0
  case NT_ANCHOR:
2147
0
  default:
2148
0
    break;
2149
0
  }
2150
2151
0
  return r;
2152
0
}
2153
#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2154
2155
static int
2156
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2157
18.4k
{
2158
18.4k
  OnigDistance tmin;
2159
18.4k
  int r = 0;
2160
2161
18.4k
  *min = 0;
2162
18.4k
  switch (NTYPE(node)) {
2163
0
  case NT_BREF:
2164
0
    {
2165
0
      int i;
2166
0
      int* backs;
2167
0
      Node** nodes = SCANENV_MEM_NODES(env);
2168
0
      BRefNode* br = NBREF(node);
2169
0
      if (br->state & NST_RECURSION) break;
2170
2171
0
      backs = BACKREFS_P(br);
2172
0
      if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2173
0
      r = get_min_match_length(nodes[backs[0]], min, env);
2174
0
      if (r != 0) break;
2175
0
      for (i = 1; i < br->back_num; i++) {
2176
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2177
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2178
0
  if (r != 0) break;
2179
0
  if (*min > tmin) *min = tmin;
2180
0
      }
2181
0
    }
2182
0
    break;
2183
2184
0
#ifdef USE_SUBEXP_CALL
2185
0
  case NT_CALL:
2186
0
    if (IS_CALL_RECURSION(NCALL(node))) {
2187
0
      EncloseNode* en = NENCLOSE(NCALL(node)->target);
2188
0
      if (IS_ENCLOSE_MIN_FIXED(en))
2189
0
  *min = en->min_len;
2190
0
    }
2191
0
    else
2192
0
      r = get_min_match_length(NCALL(node)->target, min, env);
2193
0
    break;
2194
0
#endif
2195
2196
1.00k
  case NT_LIST:
2197
2.00k
    do {
2198
2.00k
      r = get_min_match_length(NCAR(node), &tmin, env);
2199
2.00k
      if (r == 0) *min += tmin;
2200
2.00k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2201
1.00k
    break;
2202
2203
0
  case NT_ALT:
2204
0
    {
2205
0
      Node *x, *y;
2206
0
      y = node;
2207
0
      do {
2208
0
  x = NCAR(y);
2209
0
  r = get_min_match_length(x, &tmin, env);
2210
0
  if (r != 0) break;
2211
0
  if (y == node) *min = tmin;
2212
0
  else if (*min > tmin) *min = tmin;
2213
0
      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2214
0
    }
2215
0
    break;
2216
2217
1.53k
  case NT_STR:
2218
1.53k
    {
2219
1.53k
      StrNode* sn = NSTR(node);
2220
1.53k
      *min = sn->end - sn->s;
2221
1.53k
    }
2222
1.53k
    break;
2223
2224
0
  case NT_CTYPE:
2225
0
    *min = 1;
2226
0
    break;
2227
2228
11.8k
  case NT_CCLASS:
2229
14.9k
  case NT_CANY:
2230
14.9k
    *min = 1;
2231
14.9k
    break;
2232
2233
1.00k
  case NT_QTFR:
2234
1.00k
    {
2235
1.00k
      QtfrNode* qn = NQTFR(node);
2236
2237
1.00k
      if (qn->lower > 0) {
2238
1.00k
  r = get_min_match_length(qn->target, min, env);
2239
1.00k
  if (r == 0)
2240
1.00k
    *min = distance_multiply(*min, qn->lower);
2241
1.00k
      }
2242
1.00k
    }
2243
1.00k
    break;
2244
2245
0
  case NT_ENCLOSE:
2246
0
    {
2247
0
      EncloseNode* en = NENCLOSE(node);
2248
0
      switch (en->type) {
2249
0
      case ENCLOSE_MEMORY:
2250
0
        if (IS_ENCLOSE_MIN_FIXED(en))
2251
0
          *min = en->min_len;
2252
0
        else {
2253
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2254
0
      *min = 0;  /* recursive */
2255
0
    else {
2256
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2257
0
      r = get_min_match_length(en->target, min, env);
2258
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2259
0
      if (r == 0) {
2260
0
        en->min_len = *min;
2261
0
        SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2262
0
      }
2263
0
    }
2264
0
        }
2265
0
        break;
2266
2267
0
      case ENCLOSE_OPTION:
2268
0
      case ENCLOSE_STOP_BACKTRACK:
2269
0
      case ENCLOSE_CONDITION:
2270
0
  r = get_min_match_length(en->target, min, env);
2271
0
  break;
2272
2273
0
      case ENCLOSE_ABSENT:
2274
0
  break;
2275
0
      }
2276
0
    }
2277
0
    break;
2278
2279
0
  case NT_ANCHOR:
2280
0
  default:
2281
0
    break;
2282
18.4k
  }
2283
2284
18.4k
  return r;
2285
18.4k
}
2286
2287
static int
2288
get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2289
0
{
2290
0
  OnigDistance tmax;
2291
0
  int r = 0;
2292
2293
0
  *max = 0;
2294
0
  switch (NTYPE(node)) {
2295
0
  case NT_LIST:
2296
0
    do {
2297
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2298
0
      if (r == 0)
2299
0
  *max = distance_add(*max, tmax);
2300
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2301
0
    break;
2302
2303
0
  case NT_ALT:
2304
0
    do {
2305
0
      r = get_max_match_length(NCAR(node), &tmax, env);
2306
0
      if (r == 0 && *max < tmax) *max = tmax;
2307
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2308
0
    break;
2309
2310
0
  case NT_STR:
2311
0
    {
2312
0
      StrNode* sn = NSTR(node);
2313
0
      *max = sn->end - sn->s;
2314
0
    }
2315
0
    break;
2316
2317
0
  case NT_CTYPE:
2318
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2319
0
    break;
2320
2321
0
  case NT_CCLASS:
2322
0
  case NT_CANY:
2323
0
    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2324
0
    break;
2325
2326
0
  case NT_BREF:
2327
0
    {
2328
0
      int i;
2329
0
      int* backs;
2330
0
      Node** nodes = SCANENV_MEM_NODES(env);
2331
0
      BRefNode* br = NBREF(node);
2332
0
      if (br->state & NST_RECURSION) {
2333
0
  *max = ONIG_INFINITE_DISTANCE;
2334
0
  break;
2335
0
      }
2336
0
      backs = BACKREFS_P(br);
2337
0
      for (i = 0; i < br->back_num; i++) {
2338
0
  if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
2339
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2340
0
  if (r != 0) break;
2341
0
  if (*max < tmax) *max = tmax;
2342
0
      }
2343
0
    }
2344
0
    break;
2345
2346
0
#ifdef USE_SUBEXP_CALL
2347
0
  case NT_CALL:
2348
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2349
0
      r = get_max_match_length(NCALL(node)->target, max, env);
2350
0
    else
2351
0
      *max = ONIG_INFINITE_DISTANCE;
2352
0
    break;
2353
0
#endif
2354
2355
0
  case NT_QTFR:
2356
0
    {
2357
0
      QtfrNode* qn = NQTFR(node);
2358
2359
0
      if (qn->upper != 0) {
2360
0
  r = get_max_match_length(qn->target, max, env);
2361
0
  if (r == 0 && *max != 0) {
2362
0
    if (! IS_REPEAT_INFINITE(qn->upper))
2363
0
      *max = distance_multiply(*max, qn->upper);
2364
0
    else
2365
0
      *max = ONIG_INFINITE_DISTANCE;
2366
0
  }
2367
0
      }
2368
0
    }
2369
0
    break;
2370
2371
0
  case NT_ENCLOSE:
2372
0
    {
2373
0
      EncloseNode* en = NENCLOSE(node);
2374
0
      switch (en->type) {
2375
0
      case ENCLOSE_MEMORY:
2376
0
  if (IS_ENCLOSE_MAX_FIXED(en))
2377
0
    *max = en->max_len;
2378
0
  else {
2379
0
    if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2380
0
      *max = ONIG_INFINITE_DISTANCE;
2381
0
    else {
2382
0
      SET_ENCLOSE_STATUS(node, NST_MARK1);
2383
0
      r = get_max_match_length(en->target, max, env);
2384
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2385
0
      if (r == 0) {
2386
0
        en->max_len = *max;
2387
0
        SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2388
0
      }
2389
0
    }
2390
0
  }
2391
0
  break;
2392
2393
0
      case ENCLOSE_OPTION:
2394
0
      case ENCLOSE_STOP_BACKTRACK:
2395
0
      case ENCLOSE_CONDITION:
2396
0
  r = get_max_match_length(en->target, max, env);
2397
0
  break;
2398
2399
0
      case ENCLOSE_ABSENT:
2400
0
  break;
2401
0
      }
2402
0
    }
2403
0
    break;
2404
2405
0
  case NT_ANCHOR:
2406
0
  default:
2407
0
    break;
2408
0
  }
2409
2410
0
  return r;
2411
0
}
2412
2413
0
#define GET_CHAR_LEN_VARLEN           -1
2414
0
#define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
2415
2416
/* fixed size pattern node only */
2417
static int
2418
get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2419
0
{
2420
0
  int tlen;
2421
0
  int r = 0;
2422
2423
0
  level++;
2424
0
  *len = 0;
2425
0
  switch (NTYPE(node)) {
2426
0
  case NT_LIST:
2427
0
    do {
2428
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2429
0
      if (r == 0)
2430
0
  *len = (int )distance_add(*len, tlen);
2431
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2432
0
    break;
2433
2434
0
  case NT_ALT:
2435
0
    {
2436
0
      int tlen2;
2437
0
      int varlen = 0;
2438
2439
0
      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2440
0
      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2441
0
  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2442
0
  if (r == 0) {
2443
0
    if (tlen != tlen2)
2444
0
      varlen = 1;
2445
0
  }
2446
0
      }
2447
0
      if (r == 0) {
2448
0
  if (varlen != 0) {
2449
0
    if (level == 1)
2450
0
      r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2451
0
    else
2452
0
      r = GET_CHAR_LEN_VARLEN;
2453
0
  }
2454
0
  else
2455
0
    *len = tlen;
2456
0
      }
2457
0
    }
2458
0
    break;
2459
2460
0
  case NT_STR:
2461
0
    {
2462
0
      StrNode* sn = NSTR(node);
2463
0
      UChar *s = sn->s;
2464
0
      while (s < sn->end) {
2465
0
  s += enclen(reg->enc, s, sn->end);
2466
0
  (*len)++;
2467
0
      }
2468
0
    }
2469
0
    break;
2470
2471
0
  case NT_QTFR:
2472
0
    {
2473
0
      QtfrNode* qn = NQTFR(node);
2474
0
      if (qn->lower == qn->upper) {
2475
0
  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2476
0
  if (r == 0)
2477
0
    *len = (int )distance_multiply(tlen, qn->lower);
2478
0
      }
2479
0
      else
2480
0
  r = GET_CHAR_LEN_VARLEN;
2481
0
    }
2482
0
    break;
2483
2484
0
#ifdef USE_SUBEXP_CALL
2485
0
  case NT_CALL:
2486
0
    if (! IS_CALL_RECURSION(NCALL(node)))
2487
0
      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2488
0
    else
2489
0
      r = GET_CHAR_LEN_VARLEN;
2490
0
    break;
2491
0
#endif
2492
2493
0
  case NT_CTYPE:
2494
0
    *len = 1;
2495
0
    break;
2496
2497
0
  case NT_CCLASS:
2498
0
  case NT_CANY:
2499
0
    *len = 1;
2500
0
    break;
2501
2502
0
  case NT_ENCLOSE:
2503
0
    {
2504
0
      EncloseNode* en = NENCLOSE(node);
2505
0
      switch (en->type) {
2506
0
      case ENCLOSE_MEMORY:
2507
0
#ifdef USE_SUBEXP_CALL
2508
0
  if (IS_ENCLOSE_CLEN_FIXED(en))
2509
0
    *len = en->char_len;
2510
0
  else {
2511
0
    r = get_char_length_tree1(en->target, reg, len, level);
2512
0
    if (r == 0) {
2513
0
      en->char_len = *len;
2514
0
      SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2515
0
    }
2516
0
  }
2517
0
  break;
2518
0
#endif
2519
0
      case ENCLOSE_OPTION:
2520
0
      case ENCLOSE_STOP_BACKTRACK:
2521
0
      case ENCLOSE_CONDITION:
2522
0
  r = get_char_length_tree1(en->target, reg, len, level);
2523
0
  break;
2524
0
      case ENCLOSE_ABSENT:
2525
0
      default:
2526
0
  break;
2527
0
      }
2528
0
    }
2529
0
    break;
2530
2531
0
  case NT_ANCHOR:
2532
0
    break;
2533
2534
0
  default:
2535
0
    r = GET_CHAR_LEN_VARLEN;
2536
0
    break;
2537
0
  }
2538
2539
0
  return r;
2540
0
}
2541
2542
static int
2543
get_char_length_tree(Node* node, regex_t* reg, int* len)
2544
0
{
2545
0
  return get_char_length_tree1(node, reg, len, 0);
2546
0
}
2547
2548
/* x is not included y ==>  1 : 0 */
2549
static int
2550
is_not_included(Node* x, Node* y, regex_t* reg)
2551
6.63k
{
2552
6.63k
  int i;
2553
6.63k
  OnigDistance len;
2554
6.63k
  OnigCodePoint code;
2555
6.63k
  UChar *p;
2556
6.63k
  int ytype;
2557
2558
13.2k
 retry:
2559
13.2k
  ytype = NTYPE(y);
2560
13.2k
  switch (NTYPE(x)) {
2561
0
  case NT_CTYPE:
2562
0
    {
2563
0
      switch (ytype) {
2564
0
      case NT_CTYPE:
2565
0
  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2566
0
      NCTYPE(y)->not   != NCTYPE(x)->not &&
2567
0
      NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2568
0
    return 1;
2569
0
  else
2570
0
    return 0;
2571
0
  break;
2572
2573
0
      case NT_CCLASS:
2574
6.63k
      swap:
2575
6.63k
  {
2576
6.63k
    Node* tmp;
2577
6.63k
    tmp = x; x = y; y = tmp;
2578
6.63k
    goto retry;
2579
0
  }
2580
0
  break;
2581
2582
0
      case NT_STR:
2583
0
  goto swap;
2584
0
  break;
2585
2586
0
      default:
2587
0
  break;
2588
0
      }
2589
0
    }
2590
0
    break;
2591
2592
6.63k
  case NT_CCLASS:
2593
6.63k
    {
2594
6.63k
      CClassNode* xc = NCCLASS(x);
2595
6.63k
      switch (ytype) {
2596
0
      case NT_CTYPE:
2597
0
  switch (NCTYPE(y)->ctype) {
2598
0
  case ONIGENC_CTYPE_WORD:
2599
0
    if (NCTYPE(y)->not == 0) {
2600
0
      if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2601
0
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2602
0
    if (BITSET_AT(xc->bs, i)) {
2603
0
      if (NCTYPE(y)->ascii_range) {
2604
0
        if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2605
0
      }
2606
0
      else {
2607
0
        if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2608
0
      }
2609
0
    }
2610
0
        }
2611
0
        return 1;
2612
0
      }
2613
0
      return 0;
2614
0
    }
2615
0
    else {
2616
0
      if (IS_NOT_NULL(xc->mbuf)) return 0;
2617
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2618
0
        int is_word;
2619
0
        if (NCTYPE(y)->ascii_range)
2620
0
    is_word = IS_CODE_SB_WORD(reg->enc, i);
2621
0
        else
2622
0
    is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2623
0
        if (! is_word) {
2624
0
    if (!IS_NCCLASS_NOT(xc)) {
2625
0
      if (BITSET_AT(xc->bs, i))
2626
0
        return 0;
2627
0
    }
2628
0
    else {
2629
0
      if (! BITSET_AT(xc->bs, i))
2630
0
        return 0;
2631
0
    }
2632
0
        }
2633
0
      }
2634
0
      return 1;
2635
0
    }
2636
0
    break;
2637
2638
0
  default:
2639
0
    break;
2640
0
  }
2641
0
  break;
2642
2643
0
      case NT_CCLASS:
2644
0
  {
2645
0
    int v;
2646
0
    CClassNode* yc = NCCLASS(y);
2647
2648
0
    for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2649
0
      v = BITSET_AT(xc->bs, i);
2650
0
      if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2651
0
    (v == 0 && IS_NCCLASS_NOT(xc))) {
2652
0
        v = BITSET_AT(yc->bs, i);
2653
0
        if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2654
0
      (v == 0 && IS_NCCLASS_NOT(yc)))
2655
0
    return 0;
2656
0
      }
2657
0
    }
2658
0
    if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2659
0
        (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2660
0
      return 1;
2661
0
    return 0;
2662
0
  }
2663
0
  break;
2664
2665
6.63k
      case NT_STR:
2666
6.63k
  goto swap;
2667
0
  break;
2668
2669
0
      default:
2670
0
  break;
2671
6.63k
      }
2672
6.63k
    }
2673
0
    break;
2674
2675
6.63k
  case NT_STR:
2676
6.63k
    {
2677
6.63k
      StrNode* xs = NSTR(x);
2678
6.63k
      if (NSTRING_LEN(x) == 0)
2679
0
  break;
2680
2681
6.63k
      switch (ytype) {
2682
0
      case NT_CTYPE:
2683
0
  switch (NCTYPE(y)->ctype) {
2684
0
  case ONIGENC_CTYPE_WORD:
2685
0
    if (NCTYPE(y)->ascii_range) {
2686
0
      if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2687
0
        return NCTYPE(y)->not;
2688
0
      else
2689
0
        return !(NCTYPE(y)->not);
2690
0
    }
2691
0
    else {
2692
0
      if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2693
0
        return NCTYPE(y)->not;
2694
0
      else
2695
0
        return !(NCTYPE(y)->not);
2696
0
    }
2697
0
    break;
2698
0
  default:
2699
0
    break;
2700
0
  }
2701
0
  break;
2702
2703
6.63k
      case NT_CCLASS:
2704
6.63k
  {
2705
6.63k
    CClassNode* cc = NCCLASS(y);
2706
2707
6.63k
    code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2708
6.63k
             xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2709
6.63k
    return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2710
0
  }
2711
0
  break;
2712
2713
0
      case NT_STR:
2714
0
  {
2715
0
    UChar *q;
2716
0
    StrNode* ys = NSTR(y);
2717
0
    len = NSTRING_LEN(x);
2718
0
    if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2719
0
    if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2720
      /* tiny version */
2721
0
      return 0;
2722
0
    }
2723
0
    else {
2724
0
      for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2725
0
        if (*p != *q) return 1;
2726
0
      }
2727
0
    }
2728
0
  }
2729
0
  break;
2730
2731
0
      default:
2732
0
  break;
2733
6.63k
      }
2734
6.63k
    }
2735
0
    break;
2736
2737
0
  default:
2738
0
    break;
2739
13.2k
  }
2740
2741
0
  return 0;
2742
13.2k
}
2743
2744
static Node*
2745
get_head_value_node(Node* node, int exact, regex_t* reg)
2746
35.9k
{
2747
35.9k
  Node* n = NULL_NODE;
2748
2749
35.9k
  switch (NTYPE(node)) {
2750
0
  case NT_BREF:
2751
1.06k
  case NT_ALT:
2752
3.67k
  case NT_CANY:
2753
3.67k
#ifdef USE_SUBEXP_CALL
2754
3.67k
  case NT_CALL:
2755
3.67k
#endif
2756
3.67k
    break;
2757
2758
0
  case NT_CTYPE:
2759
11.2k
  case NT_CCLASS:
2760
11.2k
    if (exact == 0) {
2761
10.2k
      n = node;
2762
10.2k
    }
2763
11.2k
    break;
2764
2765
0
  case NT_LIST:
2766
0
    n = get_head_value_node(NCAR(node), exact, reg);
2767
0
    break;
2768
2769
14.2k
  case NT_STR:
2770
14.2k
    {
2771
14.2k
      StrNode* sn = NSTR(node);
2772
2773
14.2k
      if (sn->end <= sn->s)
2774
0
  break;
2775
2776
14.2k
      if (exact == 0 ||
2777
14.2k
    NSTRING_IS_RAW(node) || !IS_IGNORECASE(reg->options)) {
2778
14.2k
  n = node;
2779
14.2k
      }
2780
14.2k
    }
2781
0
    break;
2782
2783
5.09k
  case NT_QTFR:
2784
5.09k
    {
2785
5.09k
      QtfrNode* qn = NQTFR(node);
2786
5.09k
      if (qn->lower > 0) {
2787
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2788
  if (IS_NOT_NULL(qn->head_exact))
2789
    n = qn->head_exact;
2790
  else
2791
#endif
2792
1.00k
    n = get_head_value_node(qn->target, exact, reg);
2793
1.00k
      }
2794
5.09k
    }
2795
5.09k
    break;
2796
2797
0
  case NT_ENCLOSE:
2798
0
    {
2799
0
      EncloseNode* en = NENCLOSE(node);
2800
0
      switch (en->type) {
2801
0
      case ENCLOSE_OPTION:
2802
0
  {
2803
0
    OnigOptionType options = reg->options;
2804
2805
0
    reg->options = NENCLOSE(node)->option;
2806
0
    n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2807
0
    reg->options = options;
2808
0
  }
2809
0
  break;
2810
2811
0
      case ENCLOSE_MEMORY:
2812
0
      case ENCLOSE_STOP_BACKTRACK:
2813
0
      case ENCLOSE_CONDITION:
2814
0
  n = get_head_value_node(en->target, exact, reg);
2815
0
  break;
2816
2817
0
      case ENCLOSE_ABSENT:
2818
0
  break;
2819
0
      }
2820
0
    }
2821
0
    break;
2822
2823
1.62k
  case NT_ANCHOR:
2824
1.62k
    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2825
0
      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2826
1.62k
    break;
2827
2828
0
  default:
2829
0
    break;
2830
35.9k
  }
2831
2832
35.9k
  return n;
2833
35.9k
}
2834
2835
static int
2836
check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2837
0
{
2838
0
  int type, r = 0;
2839
2840
0
  type = NTYPE(node);
2841
0
  if ((NTYPE2BIT(type) & type_mask) == 0)
2842
0
    return 1;
2843
2844
0
  switch (type) {
2845
0
  case NT_LIST:
2846
0
  case NT_ALT:
2847
0
    do {
2848
0
      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2849
0
        anchor_mask);
2850
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2851
0
    break;
2852
2853
0
  case NT_QTFR:
2854
0
    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2855
0
      anchor_mask);
2856
0
    break;
2857
2858
0
  case NT_ENCLOSE:
2859
0
    {
2860
0
      EncloseNode* en = NENCLOSE(node);
2861
0
      if ((en->type & enclose_mask) == 0)
2862
0
  return 1;
2863
2864
0
      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2865
0
    }
2866
0
    break;
2867
2868
0
  case NT_ANCHOR:
2869
0
    type = NANCHOR(node)->type;
2870
0
    if ((type & anchor_mask) == 0)
2871
0
      return 1;
2872
2873
0
    if (NANCHOR(node)->target)
2874
0
      r = check_type_tree(NANCHOR(node)->target,
2875
0
        type_mask, enclose_mask, anchor_mask);
2876
0
    break;
2877
2878
0
  default:
2879
0
    break;
2880
0
  }
2881
0
  return r;
2882
0
}
2883
2884
#ifdef USE_SUBEXP_CALL
2885
2886
0
# define RECURSION_EXIST       1
2887
0
# define RECURSION_INFINITE    2
2888
2889
static int
2890
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2891
0
{
2892
0
  int type;
2893
0
  int r = 0;
2894
2895
0
  type = NTYPE(node);
2896
0
  switch (type) {
2897
0
  case NT_LIST:
2898
0
    {
2899
0
      Node *x;
2900
0
      OnigDistance min;
2901
0
      int ret;
2902
2903
0
      x = node;
2904
0
      do {
2905
0
  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2906
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2907
0
  r |= ret;
2908
0
  if (head) {
2909
0
    ret = get_min_match_length(NCAR(x), &min, env);
2910
0
    if (ret != 0) return ret;
2911
0
    if (min != 0) head = 0;
2912
0
  }
2913
0
      } while (IS_NOT_NULL(x = NCDR(x)));
2914
0
    }
2915
0
    break;
2916
2917
0
  case NT_ALT:
2918
0
    {
2919
0
      int ret;
2920
0
      r = RECURSION_EXIST;
2921
0
      do {
2922
0
  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2923
0
  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2924
0
  r &= ret;
2925
0
      } while (IS_NOT_NULL(node = NCDR(node)));
2926
0
    }
2927
0
    break;
2928
2929
0
  case NT_QTFR:
2930
0
    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2931
0
    if (r == RECURSION_EXIST) {
2932
0
      if (NQTFR(node)->lower == 0) r = 0;
2933
0
    }
2934
0
    break;
2935
2936
0
  case NT_ANCHOR:
2937
0
    {
2938
0
      AnchorNode* an = NANCHOR(node);
2939
0
      switch (an->type) {
2940
0
      case ANCHOR_PREC_READ:
2941
0
      case ANCHOR_PREC_READ_NOT:
2942
0
      case ANCHOR_LOOK_BEHIND:
2943
0
      case ANCHOR_LOOK_BEHIND_NOT:
2944
0
  r = subexp_inf_recursive_check(an->target, env, head);
2945
0
  break;
2946
0
      }
2947
0
    }
2948
0
    break;
2949
2950
0
  case NT_CALL:
2951
0
    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2952
0
    break;
2953
2954
0
  case NT_ENCLOSE:
2955
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2956
0
      return 0;
2957
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2958
0
      return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2959
0
    else {
2960
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
2961
0
      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2962
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2963
0
    }
2964
0
    break;
2965
2966
0
  default:
2967
0
    break;
2968
0
  }
2969
2970
0
  return r;
2971
0
}
2972
2973
static int
2974
subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2975
0
{
2976
0
  int type;
2977
0
  int r = 0;
2978
2979
0
  type = NTYPE(node);
2980
0
  switch (type) {
2981
0
  case NT_LIST:
2982
0
  case NT_ALT:
2983
0
    do {
2984
0
      r = subexp_inf_recursive_check_trav(NCAR(node), env);
2985
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2986
0
    break;
2987
2988
0
  case NT_QTFR:
2989
0
    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2990
0
    break;
2991
2992
0
  case NT_ANCHOR:
2993
0
    {
2994
0
      AnchorNode* an = NANCHOR(node);
2995
0
      switch (an->type) {
2996
0
      case ANCHOR_PREC_READ:
2997
0
      case ANCHOR_PREC_READ_NOT:
2998
0
      case ANCHOR_LOOK_BEHIND:
2999
0
      case ANCHOR_LOOK_BEHIND_NOT:
3000
0
  r = subexp_inf_recursive_check_trav(an->target, env);
3001
0
  break;
3002
0
      }
3003
0
    }
3004
0
    break;
3005
3006
0
  case NT_ENCLOSE:
3007
0
    {
3008
0
      EncloseNode* en = NENCLOSE(node);
3009
3010
0
      if (IS_ENCLOSE_RECURSION(en)) {
3011
0
  SET_ENCLOSE_STATUS(node, NST_MARK1);
3012
0
  r = subexp_inf_recursive_check(en->target, env, 1);
3013
0
  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3014
0
  CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3015
0
      }
3016
0
      r = subexp_inf_recursive_check_trav(en->target, env);
3017
0
    }
3018
3019
0
    break;
3020
3021
0
  default:
3022
0
    break;
3023
0
  }
3024
3025
0
  return r;
3026
0
}
3027
3028
static int
3029
subexp_recursive_check(Node* node)
3030
0
{
3031
0
  int r = 0;
3032
3033
0
  switch (NTYPE(node)) {
3034
0
  case NT_LIST:
3035
0
  case NT_ALT:
3036
0
    do {
3037
0
      r |= subexp_recursive_check(NCAR(node));
3038
0
    } while (IS_NOT_NULL(node = NCDR(node)));
3039
0
    break;
3040
3041
0
  case NT_QTFR:
3042
0
    r = subexp_recursive_check(NQTFR(node)->target);
3043
0
    break;
3044
3045
0
  case NT_ANCHOR:
3046
0
    {
3047
0
      AnchorNode* an = NANCHOR(node);
3048
0
      switch (an->type) {
3049
0
      case ANCHOR_PREC_READ:
3050
0
      case ANCHOR_PREC_READ_NOT:
3051
0
      case ANCHOR_LOOK_BEHIND:
3052
0
      case ANCHOR_LOOK_BEHIND_NOT:
3053
0
  r = subexp_recursive_check(an->target);
3054
0
  break;
3055
0
      }
3056
0
    }
3057
0
    break;
3058
3059
0
  case NT_CALL:
3060
0
    r = subexp_recursive_check(NCALL(node)->target);
3061
0
    if (r != 0) SET_CALL_RECURSION(node);
3062
0
    break;
3063
3064
0
  case NT_ENCLOSE:
3065
0
    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3066
0
      return 0;
3067
0
    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3068
0
      return 1; /* recursion */
3069
0
    else {
3070
0
      SET_ENCLOSE_STATUS(node, NST_MARK2);
3071
0
      r = subexp_recursive_check(NENCLOSE(node)->target);
3072
0
      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3073
0
    }
3074
0
    break;
3075
3076
0
  default:
3077
0
    break;
3078
0
  }
3079
3080
0
  return r;
3081
0
}
3082
3083
3084
static int
3085
subexp_recursive_check_trav(Node* node, ScanEnv* env)
3086
0
{
3087
0
# define FOUND_CALLED_NODE    1
3088
3089
0
  int type;
3090
0
  int r = 0;
3091
3092
0
  type = NTYPE(node);
3093
0
  switch (type) {
3094
0
  case NT_LIST:
3095
0
  case NT_ALT:
3096
0
    {
3097
0
      int ret;
3098
0
      do {
3099
0
  ret = subexp_recursive_check_trav(NCAR(node), env);
3100
0
  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3101
0
  else if (ret < 0) return ret;
3102
0
      } while (IS_NOT_NULL(node = NCDR(node)));
3103
0
    }
3104
0
    break;
3105
3106
0
  case NT_QTFR:
3107
0
    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3108
0
    if (NQTFR(node)->upper == 0) {
3109
0
      if (r == FOUND_CALLED_NODE)
3110
0
  NQTFR(node)->is_referred = 1;
3111
0
    }
3112
0
    break;
3113
3114
0
  case NT_ANCHOR:
3115
0
    {
3116
0
      AnchorNode* an = NANCHOR(node);
3117
0
      switch (an->type) {
3118
0
      case ANCHOR_PREC_READ:
3119
0
      case ANCHOR_PREC_READ_NOT:
3120
0
      case ANCHOR_LOOK_BEHIND:
3121
0
      case ANCHOR_LOOK_BEHIND_NOT:
3122
0
  r = subexp_recursive_check_trav(an->target, env);
3123
0
  break;
3124
0
      }
3125
0
    }
3126
0
    break;
3127
3128
0
  case NT_ENCLOSE:
3129
0
    {
3130
0
      EncloseNode* en = NENCLOSE(node);
3131
3132
0
      if (! IS_ENCLOSE_RECURSION(en)) {
3133
0
  if (IS_ENCLOSE_CALLED(en)) {
3134
0
    SET_ENCLOSE_STATUS(node, NST_MARK1);
3135
0
    r = subexp_recursive_check(en->target);
3136
0
    if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3137
0
    CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3138
0
  }
3139
0
      }
3140
0
      r = subexp_recursive_check_trav(en->target, env);
3141
0
      if (IS_ENCLOSE_CALLED(en))
3142
0
  r |= FOUND_CALLED_NODE;
3143
0
    }
3144
0
    break;
3145
3146
0
  default:
3147
0
    break;
3148
0
  }
3149
3150
0
  return r;
3151
0
}
3152
3153
static int
3154
setup_subexp_call(Node* node, ScanEnv* env)
3155
0
{
3156
0
  int type;
3157
0
  int r = 0;
3158
3159
0
  type = NTYPE(node);
3160
0
  switch (type) {
3161
0
  case NT_LIST:
3162
0
    do {
3163
0
      r = setup_subexp_call(NCAR(node), env);
3164
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3165
0
    break;
3166
3167
0
  case NT_ALT:
3168
0
    do {
3169
0
      r = setup_subexp_call(NCAR(node), env);
3170
0
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3171
0
    break;
3172
3173
0
  case NT_QTFR:
3174
0
    r = setup_subexp_call(NQTFR(node)->target, env);
3175
0
    break;
3176
0
  case NT_ENCLOSE:
3177
0
    r = setup_subexp_call(NENCLOSE(node)->target, env);
3178
0
    break;
3179
3180
0
  case NT_CALL:
3181
0
    {
3182
0
      CallNode* cn = NCALL(node);
3183
0
      Node** nodes = SCANENV_MEM_NODES(env);
3184
3185
0
      if (cn->group_num != 0) {
3186
0
  int gnum = cn->group_num;
3187
3188
0
# ifdef USE_NAMED_GROUP
3189
0
  if (env->num_named > 0 &&
3190
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3191
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3192
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3193
0
  }
3194
0
# endif
3195
0
  if (gnum > env->num_mem) {
3196
0
    onig_scan_env_set_error_string(env,
3197
0
     ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3198
0
    return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3199
0
  }
3200
3201
0
# ifdef USE_NAMED_GROUP
3202
0
      set_call_attr:
3203
0
# endif
3204
0
  cn->target = nodes[cn->group_num];
3205
0
  if (IS_NULL(cn->target)) {
3206
0
    onig_scan_env_set_error_string(env,
3207
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3208
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3209
0
  }
3210
0
  SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3211
0
  BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3212
0
  cn->unset_addr_list = env->unset_addr_list;
3213
0
      }
3214
0
# ifdef USE_NAMED_GROUP
3215
0
#  ifdef USE_PERL_SUBEXP_CALL
3216
0
      else if (cn->name == cn->name_end) {
3217
0
  goto set_call_attr;
3218
0
      }
3219
0
#  endif
3220
0
      else {
3221
0
  int *refs;
3222
3223
0
  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3224
0
             &refs);
3225
0
  if (n <= 0) {
3226
0
    onig_scan_env_set_error_string(env,
3227
0
     ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3228
0
    return ONIGERR_UNDEFINED_NAME_REFERENCE;
3229
0
  }
3230
0
  else if (n > 1 &&
3231
0
      ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3232
0
    onig_scan_env_set_error_string(env,
3233
0
      ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3234
0
    return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3235
0
  }
3236
0
  else {
3237
0
    cn->group_num = refs[0];
3238
0
    goto set_call_attr;
3239
0
  }
3240
0
      }
3241
0
# endif
3242
0
    }
3243
0
    break;
3244
3245
0
  case NT_ANCHOR:
3246
0
    {
3247
0
      AnchorNode* an = NANCHOR(node);
3248
3249
0
      switch (an->type) {
3250
0
      case ANCHOR_PREC_READ:
3251
0
      case ANCHOR_PREC_READ_NOT:
3252
0
      case ANCHOR_LOOK_BEHIND:
3253
0
      case ANCHOR_LOOK_BEHIND_NOT:
3254
0
  r = setup_subexp_call(an->target, env);
3255
0
  break;
3256
0
      }
3257
0
    }
3258
0
    break;
3259
3260
0
  default:
3261
0
    break;
3262
0
  }
3263
3264
0
  return r;
3265
0
}
3266
#endif
3267
3268
10.2k
#define IN_ALT          (1<<0)
3269
2.74k
#define IN_NOT          (1<<1)
3270
30.9k
#define IN_REPEAT       (1<<2)
3271
18.2k
#define IN_VAR_REPEAT   (1<<3)
3272
2.74k
#define IN_CALL         (1<<4)
3273
2.74k
#define IN_RECCALL      (1<<5)
3274
0
#define IN_LOOK_BEHIND  (1<<6)
3275
3276
/* divide different length alternatives in look-behind.
3277
  (?<=A|B) ==> (?<=A)|(?<=B)
3278
  (?<!A|B) ==> (?<!A)(?<!B)
3279
*/
3280
static int
3281
divide_look_behind_alternatives(Node* node)
3282
0
{
3283
0
  Node *head, *np, *insert_node;
3284
0
  AnchorNode* an = NANCHOR(node);
3285
0
  int anc_type = an->type;
3286
3287
0
  head = an->target;
3288
0
  np = NCAR(head);
3289
0
  swap_node(node, head);
3290
0
  NCAR(node) = head;
3291
0
  NANCHOR(head)->target = np;
3292
3293
0
  np = node;
3294
0
  while ((np = NCDR(np)) != NULL_NODE) {
3295
0
    insert_node = onig_node_new_anchor(anc_type);
3296
0
    CHECK_NULL_RETURN_MEMERR(insert_node);
3297
0
    NANCHOR(insert_node)->target = NCAR(np);
3298
0
    NCAR(np) = insert_node;
3299
0
  }
3300
3301
0
  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3302
0
    np = node;
3303
0
    do {
3304
0
      SET_NTYPE(np, NT_LIST);  /* alt -> list */
3305
0
    } while ((np = NCDR(np)) != NULL_NODE);
3306
0
  }
3307
0
  return 0;
3308
0
}
3309
3310
static int
3311
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3312
0
{
3313
0
  int r, len;
3314
0
  AnchorNode* an = NANCHOR(node);
3315
3316
0
  r = get_char_length_tree(an->target, reg, &len);
3317
0
  if (r == 0)
3318
0
    an->char_len = len;
3319
0
  else if (r == GET_CHAR_LEN_VARLEN)
3320
0
    r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3321
0
  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3322
0
    if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3323
0
      r = divide_look_behind_alternatives(node);
3324
0
    else
3325
0
      r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3326
0
  }
3327
3328
0
  return r;
3329
0
}
3330
3331
static int
3332
next_setup(Node* node, Node* next_node, regex_t* reg)
3333
39.0k
{
3334
39.0k
  int type;
3335
3336
41.7k
 retry:
3337
41.7k
  type = NTYPE(node);
3338
41.7k
  if (type == NT_QTFR) {
3339
13.9k
    QtfrNode* qn = NQTFR(node);
3340
13.9k
    if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3341
12.8k
#ifdef USE_QTFR_PEEK_NEXT
3342
12.8k
      Node* n = get_head_value_node(next_node, 1, reg);
3343
      /* '\0': for UTF-16BE etc... */
3344
12.8k
      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3345
7.64k
  qn->next_head_exact = n;
3346
7.64k
      }
3347
12.8k
#endif
3348
      /* automatic possessification a*b ==> (?>a*)b */
3349
12.8k
      if (qn->lower <= 1) {
3350
12.8k
  int ttype = NTYPE(qn->target);
3351
12.8k
  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3352
11.8k
    Node *x, *y;
3353
11.8k
    x = get_head_value_node(qn->target, 0, reg);
3354
11.8k
    if (IS_NOT_NULL(x)) {
3355
10.2k
      y = get_head_value_node(next_node,  0, reg);
3356
10.2k
      if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3357
6.63k
        Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3358
6.63k
        CHECK_NULL_RETURN_MEMERR(en);
3359
6.63k
        SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3360
6.63k
        swap_node(node, en);
3361
6.63k
        NENCLOSE(node)->target = en;
3362
6.63k
      }
3363
10.2k
    }
3364
11.8k
  }
3365
12.8k
      }
3366
12.8k
    }
3367
13.9k
  }
3368
27.8k
  else if (type == NT_ENCLOSE) {
3369
3.27k
    EncloseNode* en = NENCLOSE(node);
3370
3.27k
    if (en->type == ENCLOSE_MEMORY) {
3371
2.74k
      node = en->target;
3372
2.74k
      goto retry;
3373
2.74k
    }
3374
3.27k
  }
3375
39.0k
  return 0;
3376
41.7k
}
3377
3378
3379
static int
3380
update_string_node_case_fold(regex_t* reg, Node *node)
3381
0
{
3382
0
  UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3383
0
  UChar *sbuf, *ebuf, *sp;
3384
0
  int r, i, len;
3385
0
  OnigDistance sbuf_size;
3386
0
  StrNode* sn = NSTR(node);
3387
3388
0
  end = sn->end;
3389
0
  sbuf_size = (end - sn->s) * 2;
3390
0
  sbuf = (UChar* )xmalloc(sbuf_size);
3391
0
  CHECK_NULL_RETURN_MEMERR(sbuf);
3392
0
  ebuf = sbuf + sbuf_size;
3393
3394
0
  sp = sbuf;
3395
0
  p = sn->s;
3396
0
  while (p < end) {
3397
0
    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3398
0
    for (i = 0; i < len; i++) {
3399
0
      if (sp >= ebuf) {
3400
0
  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3401
0
  if (IS_NULL(p)) {
3402
0
    xfree(sbuf);
3403
0
    return ONIGERR_MEMORY;
3404
0
  }
3405
0
  sbuf = p;
3406
0
  sp = sbuf + sbuf_size;
3407
0
  sbuf_size *= 2;
3408
0
  ebuf = sbuf + sbuf_size;
3409
0
      }
3410
3411
0
      *sp++ = buf[i];
3412
0
    }
3413
0
  }
3414
3415
0
  r = onig_node_str_set(node, sbuf, sp);
3416
3417
0
  xfree(sbuf);
3418
0
  return r;
3419
0
}
3420
3421
static int
3422
expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3423
         regex_t* reg)
3424
0
{
3425
0
  int r;
3426
0
  Node *node;
3427
3428
0
  node = onig_node_new_str(s, end);
3429
0
  if (IS_NULL(node)) return ONIGERR_MEMORY;
3430
3431
0
  r = update_string_node_case_fold(reg, node);
3432
0
  if (r != 0) {
3433
0
    onig_node_free(node);
3434
0
    return r;
3435
0
  }
3436
3437
0
  NSTRING_SET_AMBIG(node);
3438
0
  NSTRING_SET_DONT_GET_OPT_INFO(node);
3439
0
  *rnode = node;
3440
0
  return 0;
3441
0
}
3442
3443
static int
3444
is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3445
        int slen)
3446
0
{
3447
0
  int i;
3448
3449
0
  for (i = 0; i < item_num; i++) {
3450
0
    if (items[i].byte_len != slen) {
3451
0
      return 1;
3452
0
    }
3453
0
    if (items[i].code_len != 1) {
3454
0
      return 1;
3455
0
    }
3456
0
  }
3457
0
  return 0;
3458
0
}
3459
3460
static int
3461
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3462
          UChar *p, int slen, UChar *end,
3463
          regex_t* reg, Node **rnode)
3464
0
{
3465
0
  int r, i, j, len, varlen;
3466
0
  Node *anode, *var_anode, *snode, *xnode, *an;
3467
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3468
3469
0
  *rnode = var_anode = NULL_NODE;
3470
3471
0
  varlen = 0;
3472
0
  for (i = 0; i < item_num; i++) {
3473
0
    if (items[i].byte_len != slen) {
3474
0
      varlen = 1;
3475
0
      break;
3476
0
    }
3477
0
  }
3478
3479
0
  if (varlen != 0) {
3480
0
    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3481
0
    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3482
3483
0
    xnode = onig_node_new_list(NULL, NULL);
3484
0
    if (IS_NULL(xnode)) goto mem_err;
3485
0
    NCAR(var_anode) = xnode;
3486
3487
0
    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3488
0
    if (IS_NULL(anode)) goto mem_err;
3489
0
    NCAR(xnode) = anode;
3490
0
  }
3491
0
  else {
3492
0
    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3493
0
    if (IS_NULL(anode)) return ONIGERR_MEMORY;
3494
0
  }
3495
3496
0
  snode = onig_node_new_str(p, p + slen);
3497
0
  if (IS_NULL(snode)) goto mem_err;
3498
3499
0
  NCAR(anode) = snode;
3500
3501
0
  for (i = 0; i < item_num; i++) {
3502
0
    snode = onig_node_new_str(NULL, NULL);
3503
0
    if (IS_NULL(snode)) goto mem_err;
3504
3505
0
    for (j = 0; j < items[i].code_len; j++) {
3506
0
      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3507
0
      if (len < 0) {
3508
0
  r = len;
3509
0
  goto mem_err2;
3510
0
      }
3511
3512
0
      r = onig_node_str_cat(snode, buf, buf + len);
3513
0
      if (r != 0) goto mem_err2;
3514
0
    }
3515
3516
0
    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3517
0
    if (IS_NULL(an)) {
3518
0
      goto mem_err2;
3519
0
    }
3520
3521
0
    if (items[i].byte_len != slen) {
3522
0
      Node *rem;
3523
0
      UChar *q = p + items[i].byte_len;
3524
3525
0
      if (q < end) {
3526
0
  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3527
0
  if (r != 0) {
3528
0
    onig_node_free(an);
3529
0
    goto mem_err2;
3530
0
  }
3531
3532
0
  xnode = onig_node_list_add(NULL_NODE, snode);
3533
0
  if (IS_NULL(xnode)) {
3534
0
    onig_node_free(an);
3535
0
    onig_node_free(rem);
3536
0
    goto mem_err2;
3537
0
  }
3538
0
  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3539
0
    onig_node_free(an);
3540
0
    onig_node_free(xnode);
3541
0
    onig_node_free(rem);
3542
0
    goto mem_err;
3543
0
  }
3544
3545
0
  NCAR(an) = xnode;
3546
0
      }
3547
0
      else {
3548
0
  NCAR(an) = snode;
3549
0
      }
3550
3551
0
      NCDR(var_anode) = an;
3552
0
      var_anode = an;
3553
0
    }
3554
0
    else {
3555
0
      NCAR(an)     = snode;
3556
0
      NCDR(anode) = an;
3557
0
      anode = an;
3558
0
    }
3559
0
  }
3560
3561
0
  return varlen;
3562
3563
0
 mem_err2:
3564
0
  onig_node_free(snode);
3565
3566
0
 mem_err:
3567
0
  onig_node_free(*rnode);
3568
3569
0
  return ONIGERR_MEMORY;
3570
0
}
3571
3572
0
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
3573
3574
static int
3575
expand_case_fold_string(Node* node, regex_t* reg, int state)
3576
0
{
3577
0
  int r, n, len, alt_num;
3578
0
  int varlen = 0;
3579
0
  int is_in_look_behind;
3580
0
  UChar *start, *end, *p;
3581
0
  Node *top_root, *root, *snode, *prev_node;
3582
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3583
0
  StrNode* sn;
3584
3585
0
  if (NSTRING_IS_AMBIG(node)) return 0;
3586
3587
0
  sn = NSTR(node);
3588
3589
0
  start = sn->s;
3590
0
  end   = sn->end;
3591
0
  if (start >= end) return 0;
3592
3593
0
  is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
3594
3595
0
  r = 0;
3596
0
  top_root = root = prev_node = snode = NULL_NODE;
3597
0
  alt_num = 1;
3598
0
  p = start;
3599
0
  while (p < end) {
3600
0
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3601
0
             p, end, items);
3602
0
    if (n < 0) {
3603
0
      r = n;
3604
0
      goto err;
3605
0
    }
3606
3607
0
    len = enclen(reg->enc, p, end);
3608
3609
0
    varlen = is_case_fold_variable_len(n, items, len);
3610
0
    if (n == 0 || varlen == 0 || is_in_look_behind) {
3611
0
      if (IS_NULL(snode)) {
3612
0
  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3613
0
          onig_node_free(top_root);
3614
0
    top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3615
0
    if (IS_NULL(root)) {
3616
0
      onig_node_free(prev_node);
3617
0
      goto mem_err;
3618
0
    }
3619
0
  }
3620
3621
0
  prev_node = snode = onig_node_new_str(NULL, NULL);
3622
0
  if (IS_NULL(snode)) goto mem_err;
3623
0
  if (IS_NOT_NULL(root)) {
3624
0
    if (IS_NULL(onig_node_list_add(root, snode))) {
3625
0
      onig_node_free(snode);
3626
0
      goto mem_err;
3627
0
    }
3628
0
  }
3629
0
      }
3630
3631
0
      r = onig_node_str_cat(snode, p, p + len);
3632
0
      if (r != 0) goto err;
3633
0
    }
3634
0
    else {
3635
0
      alt_num *= (n + 1);
3636
0
      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3637
3638
0
      if (IS_NOT_NULL(snode)) {
3639
0
  r = update_string_node_case_fold(reg, snode);
3640
0
  if (r == 0) {
3641
0
    NSTRING_SET_AMBIG(snode);
3642
0
  }
3643
0
      }
3644
0
      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3645
0
        onig_node_free(top_root);
3646
0
  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3647
0
  if (IS_NULL(root)) {
3648
0
    onig_node_free(prev_node);
3649
0
    goto mem_err;
3650
0
  }
3651
0
      }
3652
3653
0
      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3654
0
      if (r < 0) goto mem_err;
3655
0
      if (r == 1) {
3656
0
  if (IS_NULL(root)) {
3657
0
    top_root = prev_node;
3658
0
  }
3659
0
  else {
3660
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3661
0
      onig_node_free(prev_node);
3662
0
      goto mem_err;
3663
0
    }
3664
0
  }
3665
3666
0
  root = NCAR(prev_node);
3667
0
      }
3668
0
      else { /* r == 0 */
3669
0
  if (IS_NOT_NULL(root)) {
3670
0
    if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671
0
      onig_node_free(prev_node);
3672
0
      goto mem_err;
3673
0
    }
3674
0
  }
3675
0
      }
3676
3677
0
      snode = NULL_NODE;
3678
0
    }
3679
3680
0
    p += len;
3681
0
  }
3682
0
  if (IS_NOT_NULL(snode)) {
3683
0
    r = update_string_node_case_fold(reg, snode);
3684
0
    if (r == 0) {
3685
0
      NSTRING_SET_AMBIG(snode);
3686
0
    }
3687
0
  }
3688
3689
0
  if (p < end) {
3690
0
    Node *srem;
3691
3692
0
    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3693
0
    if (r != 0) goto mem_err;
3694
3695
0
    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3696
0
      onig_node_free(top_root);
3697
0
      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3698
0
      if (IS_NULL(root)) {
3699
0
  onig_node_free(srem);
3700
0
  onig_node_free(prev_node);
3701
0
  goto mem_err;
3702
0
      }
3703
0
    }
3704
3705
0
    if (IS_NULL(root)) {
3706
0
      prev_node = srem;
3707
0
    }
3708
0
    else {
3709
0
      if (IS_NULL(onig_node_list_add(root, srem))) {
3710
0
  onig_node_free(srem);
3711
0
  goto mem_err;
3712
0
      }
3713
0
    }
3714
0
  }
3715
3716
  /* ending */
3717
0
  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3718
0
  swap_node(node, top_root);
3719
0
  onig_node_free(top_root);
3720
0
  return 0;
3721
3722
0
 mem_err:
3723
0
  r = ONIGERR_MEMORY;
3724
3725
0
 err:
3726
0
  onig_node_free(top_root);
3727
0
  return r;
3728
0
}
3729
3730
3731
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3732
3733
# define CEC_THRES_NUM_BIG_REPEAT         512
3734
# define CEC_INFINITE_NUM          0x7fffffff
3735
3736
# define CEC_IN_INFINITE_REPEAT    (1<<0)
3737
# define CEC_IN_FINITE_REPEAT      (1<<1)
3738
# define CEC_CONT_BIG_REPEAT       (1<<2)
3739
3740
static int
3741
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3742
{
3743
  int type;
3744
  int r = state;
3745
3746
  type = NTYPE(node);
3747
  switch (type) {
3748
  case NT_LIST:
3749
    {
3750
      do {
3751
  r = setup_comb_exp_check(NCAR(node), r, env);
3752
      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3753
    }
3754
    break;
3755
3756
  case NT_ALT:
3757
    {
3758
      int ret;
3759
      do {
3760
  ret = setup_comb_exp_check(NCAR(node), state, env);
3761
  r |= ret;
3762
      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3763
    }
3764
    break;
3765
3766
  case NT_QTFR:
3767
    {
3768
      int child_state = state;
3769
      int add_state = 0;
3770
      QtfrNode* qn = NQTFR(node);
3771
      Node* target = qn->target;
3772
      int var_num;
3773
3774
      if (! IS_REPEAT_INFINITE(qn->upper)) {
3775
  if (qn->upper > 1) {
3776
    /* {0,1}, {1,1} are allowed */
3777
    child_state |= CEC_IN_FINITE_REPEAT;
3778
3779
    /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3780
    if (env->backrefed_mem == 0) {
3781
      if (NTYPE(qn->target) == NT_ENCLOSE) {
3782
        EncloseNode* en = NENCLOSE(qn->target);
3783
        if (en->type == ENCLOSE_MEMORY) {
3784
    if (NTYPE(en->target) == NT_QTFR) {
3785
      QtfrNode* q = NQTFR(en->target);
3786
      if (IS_REPEAT_INFINITE(q->upper)
3787
          && q->greedy == qn->greedy) {
3788
        qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3789
        if (qn->upper == 1)
3790
          child_state = state;
3791
      }
3792
    }
3793
        }
3794
      }
3795
    }
3796
  }
3797
      }
3798
3799
      if (state & CEC_IN_FINITE_REPEAT) {
3800
  qn->comb_exp_check_num = -1;
3801
      }
3802
      else {
3803
  if (IS_REPEAT_INFINITE(qn->upper)) {
3804
    var_num = CEC_INFINITE_NUM;
3805
    child_state |= CEC_IN_INFINITE_REPEAT;
3806
  }
3807
  else {
3808
    var_num = qn->upper - qn->lower;
3809
  }
3810
3811
  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3812
    add_state |= CEC_CONT_BIG_REPEAT;
3813
3814
  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3815
      ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3816
       var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3817
    if (qn->comb_exp_check_num == 0) {
3818
      env->num_comb_exp_check++;
3819
      qn->comb_exp_check_num = env->num_comb_exp_check;
3820
      if (env->curr_max_regnum > env->comb_exp_max_regnum)
3821
        env->comb_exp_max_regnum = env->curr_max_regnum;
3822
    }
3823
  }
3824
      }
3825
3826
      r = setup_comb_exp_check(target, child_state, env);
3827
      r |= add_state;
3828
    }
3829
    break;
3830
3831
  case NT_ENCLOSE:
3832
    {
3833
      EncloseNode* en = NENCLOSE(node);
3834
3835
      switch (en->type) {
3836
      case ENCLOSE_MEMORY:
3837
  {
3838
    if (env->curr_max_regnum < en->regnum)
3839
      env->curr_max_regnum = en->regnum;
3840
3841
    r = setup_comb_exp_check(en->target, state, env);
3842
  }
3843
  break;
3844
3845
      default:
3846
  r = setup_comb_exp_check(en->target, state, env);
3847
  break;
3848
      }
3849
    }
3850
    break;
3851
3852
# ifdef USE_SUBEXP_CALL
3853
  case NT_CALL:
3854
    if (IS_CALL_RECURSION(NCALL(node)))
3855
      env->has_recursion = 1;
3856
    else
3857
      r = setup_comb_exp_check(NCALL(node)->target, state, env);
3858
    break;
3859
# endif
3860
3861
  default:
3862
    break;
3863
  }
3864
3865
  return r;
3866
}
3867
#endif
3868
3869
/* setup_tree does the following work.
3870
 1. check empty loop. (set qn->target_empty_info)
3871
 2. expand ignore-case in char class.
3872
 3. set memory status bit flags. (reg->mem_stats)
3873
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3874
 5. find invalid patterns in look-behind.
3875
 6. expand repeated string.
3876
 */
3877
static int
3878
setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3879
89.6k
{
3880
89.6k
  int type;
3881
89.6k
  int r = 0;
3882
3883
89.6k
restart:
3884
89.6k
  type = NTYPE(node);
3885
89.6k
  switch (type) {
3886
12.4k
  case NT_LIST:
3887
12.4k
    {
3888
12.4k
      Node* prev = NULL_NODE;
3889
51.4k
      do {
3890
51.4k
  r = setup_tree(NCAR(node), reg, state, env);
3891
51.4k
  if (IS_NOT_NULL(prev) && r == 0) {
3892
39.0k
    r = next_setup(prev, NCAR(node), reg);
3893
39.0k
  }
3894
51.4k
  prev = NCAR(node);
3895
51.4k
      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3896
12.4k
    }
3897
12.4k
    break;
3898
3899
3.21k
  case NT_ALT:
3900
7.50k
    do {
3901
7.50k
      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3902
7.50k
    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3903
3.21k
    break;
3904
3905
12.3k
  case NT_CCLASS:
3906
12.3k
    break;
3907
3908
22.5k
  case NT_STR:
3909
22.5k
    if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3910
0
      r = expand_case_fold_string(node, reg, state);
3911
0
    }
3912
22.5k
    break;
3913
3914
0
  case NT_CTYPE:
3915
5.24k
  case NT_CANY:
3916
5.24k
    break;
3917
3918
0
#ifdef USE_SUBEXP_CALL
3919
0
  case NT_CALL:
3920
0
    break;
3921
0
#endif
3922
3923
0
  case NT_BREF:
3924
0
    {
3925
0
      int i;
3926
0
      int* p;
3927
0
      Node** nodes = SCANENV_MEM_NODES(env);
3928
0
      BRefNode* br = NBREF(node);
3929
0
      p = BACKREFS_P(br);
3930
0
      for (i = 0; i < br->back_num; i++) {
3931
0
  if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
3932
0
  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3933
0
  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3934
0
#ifdef USE_BACKREF_WITH_LEVEL
3935
0
  if (IS_BACKREF_NEST_LEVEL(br)) {
3936
0
    BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3937
0
  }
3938
0
#endif
3939
0
  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3940
0
      }
3941
0
    }
3942
0
    break;
3943
3944
15.4k
  case NT_QTFR:
3945
15.4k
    {
3946
15.4k
      OnigDistance d;
3947
15.4k
      QtfrNode* qn = NQTFR(node);
3948
15.4k
      Node* target = qn->target;
3949
3950
15.4k
      if ((state & IN_REPEAT) != 0) {
3951
1.00k
  qn->state |= NST_IN_REPEAT;
3952
1.00k
      }
3953
3954
15.4k
      if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3955
15.4k
  r = get_min_match_length(target, &d, env);
3956
15.4k
  if (r) break;
3957
15.4k
  if (d == 0) {
3958
0
    qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3959
0
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3960
0
    r = quantifiers_memory_node_info(target);
3961
0
    if (r < 0) break;
3962
0
    if (r > 0) {
3963
0
      qn->target_empty_info = r;
3964
0
    }
3965
0
#endif
3966
#if 0
3967
    r = get_max_match_length(target, &d, env);
3968
    if (r == 0 && d == 0) {
3969
      /*  ()* ==> ()?, ()+ ==> ()  */
3970
      qn->upper = 1;
3971
      if (qn->lower > 1) qn->lower = 1;
3972
      if (NTYPE(target) == NT_STR) {
3973
        qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
3974
      }
3975
    }
3976
#endif
3977
0
  }
3978
15.4k
      }
3979
3980
15.4k
      state |= IN_REPEAT;
3981
15.4k
      if (qn->lower != qn->upper)
3982
15.4k
  state |= IN_VAR_REPEAT;
3983
15.4k
      r = setup_tree(target, reg, state, env);
3984
15.4k
      if (r) break;
3985
3986
      /* expand string */
3987
15.4k
#define EXPAND_STRING_MAX_LENGTH  100
3988
15.4k
      if (NTYPE(target) == NT_STR) {
3989
537
  if (qn->lower > 1) {
3990
0
    int i, n = qn->lower;
3991
0
    OnigDistance len = NSTRING_LEN(target);
3992
0
    StrNode* sn = NSTR(target);
3993
0
    Node* np;
3994
3995
0
    np = onig_node_new_str(sn->s, sn->end);
3996
0
    if (IS_NULL(np)) return ONIGERR_MEMORY;
3997
0
    NSTR(np)->flag = sn->flag;
3998
3999
0
    for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4000
0
      r = onig_node_str_cat(np, sn->s, sn->end);
4001
0
      if (r) {
4002
0
        onig_node_free(np);
4003
0
        return r;
4004
0
      }
4005
0
    }
4006
0
    if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4007
0
      Node *np1, *np2;
4008
4009
0
      qn->lower -= i;
4010
0
      if (! IS_REPEAT_INFINITE(qn->upper))
4011
0
        qn->upper -= i;
4012
4013
0
      np1 = onig_node_new_list(np, NULL);
4014
0
      if (IS_NULL(np1)) {
4015
0
        onig_node_free(np);
4016
0
        return ONIGERR_MEMORY;
4017
0
      }
4018
0
      swap_node(np1, node);
4019
0
      np2 = onig_node_list_add(node, np1);
4020
0
      if (IS_NULL(np2)) {
4021
0
        onig_node_free(np1);
4022
0
        return ONIGERR_MEMORY;
4023
0
      }
4024
0
    }
4025
0
    else {
4026
0
      swap_node(np, node);
4027
0
      onig_node_free(np);
4028
0
    }
4029
0
    break; /* break case NT_QTFR: */
4030
0
  }
4031
537
      }
4032
4033
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4034
      if (qn->greedy && (qn->target_empty_info != 0)) {
4035
  if (NTYPE(target) == NT_QTFR) {
4036
    QtfrNode* tqn = NQTFR(target);
4037
    if (IS_NOT_NULL(tqn->head_exact)) {
4038
      qn->head_exact  = tqn->head_exact;
4039
      tqn->head_exact = NULL;
4040
    }
4041
  }
4042
  else {
4043
    qn->head_exact = get_head_value_node(qn->target, 1, reg);
4044
  }
4045
      }
4046
#endif
4047
15.4k
    }
4048
15.4k
    break;
4049
4050
15.4k
  case NT_ENCLOSE:
4051
3.27k
    {
4052
3.27k
      EncloseNode* en = NENCLOSE(node);
4053
4054
3.27k
      switch (en->type) {
4055
534
      case ENCLOSE_OPTION:
4056
534
  {
4057
534
    OnigOptionType options = reg->options;
4058
534
    reg->options = NENCLOSE(node)->option;
4059
534
    r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4060
534
    reg->options = options;
4061
534
  }
4062
534
  break;
4063
4064
2.74k
      case ENCLOSE_MEMORY:
4065
2.74k
  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4066
0
    BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4067
    /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4068
0
  }
4069
2.74k
  if (IS_ENCLOSE_CALLED(en))
4070
0
    state |= IN_CALL;
4071
2.74k
  if (IS_ENCLOSE_RECURSION(en))
4072
0
    state |= IN_RECCALL;
4073
2.74k
  else if ((state & IN_RECCALL) != 0)
4074
0
    SET_CALL_RECURSION(node);
4075
2.74k
  r = setup_tree(en->target, reg, state, env);
4076
2.74k
  break;
4077
4078
0
      case ENCLOSE_STOP_BACKTRACK:
4079
0
  {
4080
0
    Node* target = en->target;
4081
0
    r = setup_tree(target, reg, state, env);
4082
0
    if (NTYPE(target) == NT_QTFR) {
4083
0
      QtfrNode* tqn = NQTFR(target);
4084
0
      if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4085
0
    tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
4086
0
        int qtype = NTYPE(tqn->target);
4087
0
        if (IS_NODE_TYPE_SIMPLE(qtype))
4088
0
    SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4089
0
      }
4090
0
    }
4091
0
  }
4092
0
  break;
4093
4094
0
      case ENCLOSE_CONDITION:
4095
0
#ifdef USE_NAMED_GROUP
4096
0
  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4097
0
      env->num_named > 0 &&
4098
0
      IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4099
0
      !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4100
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4101
0
  }
4102
0
#endif
4103
0
  if (NENCLOSE(node)->regnum > env->num_mem)
4104
0
    return ONIGERR_INVALID_BACKREF;
4105
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4106
0
  break;
4107
4108
0
      case ENCLOSE_ABSENT:
4109
0
  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4110
0
  break;
4111
3.27k
      }
4112
3.27k
    }
4113
3.27k
    break;
4114
4115
15.0k
  case NT_ANCHOR:
4116
15.0k
    {
4117
15.0k
      AnchorNode* an = NANCHOR(node);
4118
4119
15.0k
      switch (an->type) {
4120
0
      case ANCHOR_PREC_READ:
4121
0
  r = setup_tree(an->target, reg, state, env);
4122
0
  break;
4123
0
      case ANCHOR_PREC_READ_NOT:
4124
0
  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4125
0
  break;
4126
4127
/* allowed node types in look-behind */
4128
0
#define ALLOWED_TYPE_IN_LB  \
4129
0
  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4130
0
    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4131
4132
0
#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4133
0
#define ALLOWED_ENCLOSE_IN_LB_NOT   ENCLOSE_OPTION
4134
4135
0
#define ALLOWED_ANCHOR_IN_LB \
4136
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4137
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4138
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4139
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4140
0
#define ALLOWED_ANCHOR_IN_LB_NOT \
4141
0
( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4142
0
  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4143
0
  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4144
0
  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4145
4146
0
      case ANCHOR_LOOK_BEHIND:
4147
0
  {
4148
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4149
0
            ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4150
0
    if (r < 0) return r;
4151
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4152
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4153
0
    r = setup_tree(an->target, reg, (state | IN_LOOK_BEHIND), env);
4154
0
    if (r != 0) return r;
4155
0
    r = setup_look_behind(node, reg, env);
4156
0
  }
4157
0
  break;
4158
4159
0
      case ANCHOR_LOOK_BEHIND_NOT:
4160
0
  {
4161
0
    r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4162
0
          ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4163
0
    if (r < 0) return r;
4164
0
    if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4165
0
    if (NTYPE(node) != NT_ANCHOR) goto restart;
4166
0
    r = setup_tree(an->target, reg, (state | IN_NOT | IN_LOOK_BEHIND),
4167
0
       env);
4168
0
    if (r != 0) return r;
4169
0
    r = setup_look_behind(node, reg, env);
4170
0
  }
4171
0
  break;
4172
15.0k
      }
4173
15.0k
    }
4174
15.0k
    break;
4175
4176
15.0k
  default:
4177
0
    break;
4178
89.6k
  }
4179
4180
89.6k
  return r;
4181
89.6k
}
4182
4183
/* set skip map for Sunday's quick search */
4184
static int
4185
set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4186
      UChar skip[], int ignore_case)
4187
4.17k
{
4188
4.17k
  OnigDistance i, len;
4189
4.17k
  int clen, flen, n, j, k;
4190
4.17k
  UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4191
4.17k
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4192
4.17k
  OnigEncoding enc = reg->enc;
4193
4194
4.17k
  len = end - s;
4195
4.17k
  if (len >= ONIG_CHAR_TABLE_SIZE) {
4196
    /* This should not happen. */
4197
0
    return ONIGERR_TYPE_BUG;
4198
0
  }
4199
4200
4.17k
  if (ignore_case) {
4201
0
    for (i = 0; i < len; i += clen) {
4202
0
      p = s + i;
4203
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4204
0
    p, end, items);
4205
0
      clen = enclen(enc, p, end);
4206
0
      if (p + clen > end)
4207
0
  clen = (int )(end - p);
4208
4209
0
      for (j = 0; j < n; j++) {
4210
0
  if ((items[j].code_len != 1) || (items[j].byte_len != clen)) {
4211
    /* Different length isn't supported. Stop optimization at here. */
4212
0
    end = p;
4213
0
    goto endcheck;
4214
0
  }
4215
0
  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf);
4216
0
  if (flen != clen) {
4217
    /* Different length isn't supported. Stop optimization at here. */
4218
0
    end = p;
4219
0
    goto endcheck;
4220
0
  }
4221
0
      }
4222
0
    }
4223
0
endcheck:
4224
0
    len = end - s;
4225
0
  }
4226
4227
1.07M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4228
1.06M
    skip[i] = (UChar )(len + 1);
4229
4.17k
  n = 0;
4230
64.5k
  for (i = 0; i < len; i += clen) {
4231
60.3k
    p = s + i;
4232
60.3k
    if (ignore_case)
4233
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4234
60.3k
               p, end, items);
4235
60.3k
    clen = enclen(enc, p, end);
4236
60.3k
    if (p + clen > end)
4237
0
      clen = (int )(end - p);
4238
4239
120k
    for (j = 0; j < clen; j++) {
4240
60.3k
      skip[s[i + j]] = (UChar )(len - i - j);
4241
60.3k
      for (k = 0; k < n; k++) {
4242
0
  ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
4243
0
  skip[buf[j]] = (UChar )(len - i - j);
4244
0
      }
4245
60.3k
    }
4246
60.3k
  }
4247
4248
4.17k
  return (int )len;
4249
4.17k
}
4250
4251
typedef struct {
4252
  OnigDistance min;  /* min byte length */
4253
  OnigDistance max;  /* max byte length */
4254
} MinMaxLen;
4255
4256
typedef struct {
4257
  MinMaxLen        mmd;
4258
  OnigEncoding     enc;
4259
  OnigOptionType   options;
4260
  OnigCaseFoldType case_fold_flag;
4261
  ScanEnv*         scan_env;
4262
} OptEnv;
4263
4264
typedef struct {
4265
  int left_anchor;
4266
  int right_anchor;
4267
} OptAncInfo;
4268
4269
typedef struct {
4270
  MinMaxLen  mmd; /* info position */
4271
  OptAncInfo anc;
4272
4273
  int   reach_end;
4274
  int   ignore_case;  /* -1: unset, 0: case sensitive, 1: ignore case */
4275
  int   len;
4276
  UChar s[OPT_EXACT_MAXLEN];
4277
} OptExactInfo;
4278
4279
typedef struct {
4280
  MinMaxLen mmd; /* info position */
4281
  OptAncInfo anc;
4282
4283
  int   value;      /* weighted value */
4284
  UChar map[ONIG_CHAR_TABLE_SIZE];
4285
} OptMapInfo;
4286
4287
typedef struct {
4288
  MinMaxLen    len;
4289
4290
  OptAncInfo   anc;
4291
  OptExactInfo exb;    /* boundary */
4292
  OptExactInfo exm;    /* middle */
4293
  OptExactInfo expr;   /* prec read (?=...) */
4294
4295
  OptMapInfo   map;   /* boundary */
4296
} NodeOptInfo;
4297
4298
4299
static int
4300
map_position_value(OnigEncoding enc, int i)
4301
76.8k
{
4302
76.8k
  static const short int ByteValTable[] = {
4303
76.8k
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
4304
76.8k
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
4305
76.8k
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
4306
76.8k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
4307
76.8k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4308
76.8k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
4309
76.8k
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
4310
76.8k
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
4311
76.8k
  };
4312
4313
76.8k
  if (i < numberof(ByteValTable)) {
4314
76.8k
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4315
0
      return 20;
4316
76.8k
    else
4317
76.8k
      return (int )ByteValTable[i];
4318
76.8k
  }
4319
0
  else
4320
0
    return 4;   /* Take it easy. */
4321
76.8k
}
4322
4323
static int
4324
distance_value(MinMaxLen* mm)
4325
52.0k
{
4326
  /* 1000 / (min-max-dist + 1) */
4327
52.0k
  static const short int dist_vals[] = {
4328
52.0k
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
4329
52.0k
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
4330
52.0k
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
4331
52.0k
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
4332
52.0k
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
4333
52.0k
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
4334
52.0k
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
4335
52.0k
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
4336
52.0k
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
4337
52.0k
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
4338
52.0k
  };
4339
4340
52.0k
  OnigDistance d;
4341
4342
52.0k
  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4343
4344
19.0k
  d = mm->max - mm->min;
4345
19.0k
  if (d < numberof(dist_vals))
4346
    /* return dist_vals[d] * 16 / (mm->min + 12); */
4347
19.0k
    return (int )dist_vals[d];
4348
0
  else
4349
0
    return 1;
4350
19.0k
}
4351
4352
static int
4353
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4354
26.0k
{
4355
26.0k
  if (v2 <= 0) return -1;
4356
26.0k
  if (v1 <= 0) return  1;
4357
4358
26.0k
  v1 *= distance_value(d1);
4359
26.0k
  v2 *= distance_value(d2);
4360
4361
26.0k
  if (v2 > v1) return  1;
4362
23.9k
  if (v2 < v1) return -1;
4363
4364
12.6k
  if (d2->min < d1->min) return  1;
4365
12.1k
  if (d2->min > d1->min) return -1;
4366
2.61k
  return 0;
4367
12.1k
}
4368
4369
static int
4370
is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4371
3.75k
{
4372
3.75k
  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4373
3.75k
}
4374
4375
4376
static void
4377
set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4378
55.6k
{
4379
55.6k
  mml->min = min;
4380
55.6k
  mml->max = max;
4381
55.6k
}
4382
4383
static void
4384
clear_mml(MinMaxLen* mml)
4385
407k
{
4386
407k
  mml->min = mml->max = 0;
4387
407k
}
4388
4389
static void
4390
copy_mml(MinMaxLen* to, MinMaxLen* from)
4391
288k
{
4392
288k
  to->min = from->min;
4393
288k
  to->max = from->max;
4394
288k
}
4395
4396
static void
4397
add_mml(MinMaxLen* to, MinMaxLen* from)
4398
102k
{
4399
102k
  to->min = distance_add(to->min, from->min);
4400
102k
  to->max = distance_add(to->max, from->max);
4401
102k
}
4402
4403
#if 0
4404
static void
4405
add_len_mml(MinMaxLen* to, OnigDistance len)
4406
{
4407
  to->min = distance_add(to->min, len);
4408
  to->max = distance_add(to->max, len);
4409
}
4410
#endif
4411
4412
static void
4413
alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4414
8.58k
{
4415
8.58k
  if (to->min > from->min) to->min = from->min;
4416
8.58k
  if (to->max < from->max) to->max = from->max;
4417
8.58k
}
4418
4419
static void
4420
copy_opt_env(OptEnv* to, OptEnv* from)
4421
12.4k
{
4422
12.4k
  *to = *from;
4423
12.4k
}
4424
4425
static void
4426
clear_opt_anc_info(OptAncInfo* anc)
4427
450k
{
4428
450k
  anc->left_anchor  = 0;
4429
450k
  anc->right_anchor = 0;
4430
450k
}
4431
4432
static void
4433
copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4434
55.0k
{
4435
55.0k
  *to = *from;
4436
55.0k
}
4437
4438
static void
4439
concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4440
        OnigDistance left_len, OnigDistance right_len)
4441
55.0k
{
4442
55.0k
  clear_opt_anc_info(to);
4443
4444
55.0k
  to->left_anchor = left->left_anchor;
4445
55.0k
  if (left_len == 0) {
4446
26.3k
    to->left_anchor |= right->left_anchor;
4447
26.3k
  }
4448
4449
55.0k
  to->right_anchor = right->right_anchor;
4450
55.0k
  if (right_len == 0) {
4451
15.6k
    to->right_anchor |= left->right_anchor;
4452
15.6k
  }
4453
39.4k
  else {
4454
39.4k
    to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4455
39.4k
  }
4456
55.0k
}
4457
4458
static int
4459
is_left_anchor(int anc)
4460
14.5k
{
4461
14.5k
  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4462
14.5k
      anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4463
14.5k
      anc == ANCHOR_PREC_READ_NOT)
4464
4.19k
    return 0;
4465
4466
10.3k
  return 1;
4467
14.5k
}
4468
4469
static int
4470
is_set_opt_anc_info(OptAncInfo* to, int anc)
4471
2.74k
{
4472
2.74k
  if ((to->left_anchor & anc) != 0) return 1;
4473
4474
2.74k
  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4475
2.74k
}
4476
4477
static void
4478
add_opt_anc_info(OptAncInfo* to, int anc)
4479
14.5k
{
4480
14.5k
  if (is_left_anchor(anc))
4481
10.3k
    to->left_anchor |= anc;
4482
4.19k
  else
4483
4.19k
    to->right_anchor |= anc;
4484
14.5k
}
4485
4486
static void
4487
remove_opt_anc_info(OptAncInfo* to, int anc)
4488
0
{
4489
0
  if (is_left_anchor(anc))
4490
0
    to->left_anchor &= ~anc;
4491
0
  else
4492
0
    to->right_anchor &= ~anc;
4493
0
}
4494
4495
static void
4496
alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4497
11.8k
{
4498
11.8k
  to->left_anchor  &= add->left_anchor;
4499
11.8k
  to->right_anchor &= add->right_anchor;
4500
11.8k
}
4501
4502
static int
4503
is_full_opt_exact_info(OptExactInfo* ex)
4504
0
{
4505
0
  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4506
0
}
4507
4508
static void
4509
clear_opt_exact_info(OptExactInfo* ex)
4510
298k
{
4511
298k
  clear_mml(&ex->mmd);
4512
298k
  clear_opt_anc_info(&ex->anc);
4513
298k
  ex->reach_end   = 0;
4514
298k
  ex->ignore_case = -1;   /* unset */
4515
298k
  ex->len         = 0;
4516
298k
  ex->s[0]        = '\0';
4517
298k
}
4518
4519
static void
4520
copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4521
17.5k
{
4522
17.5k
  *to = *from;
4523
17.5k
}
4524
4525
static void
4526
concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4527
550
{
4528
550
  int i, j, len;
4529
550
  UChar *p, *end;
4530
550
  OptAncInfo tanc;
4531
4532
550
  if (to->ignore_case < 0)
4533
0
    to->ignore_case = add->ignore_case;
4534
550
  else if (to->ignore_case != add->ignore_case)
4535
0
    return ;  /* avoid */
4536
4537
550
  p = add->s;
4538
550
  end = p + add->len;
4539
2.20k
  for (i = to->len; p < end; ) {
4540
1.65k
    len = enclen(enc, p, end);
4541
1.65k
    if (i + len > OPT_EXACT_MAXLEN) break;
4542
3.30k
    for (j = 0; j < len && p < end; j++)
4543
1.65k
      to->s[i++] = *p++;
4544
1.65k
  }
4545
4546
550
  to->len = i;
4547
550
  to->reach_end = (p == end ? add->reach_end : 0);
4548
4549
550
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4550
550
  if (! to->reach_end) tanc.right_anchor = 0;
4551
550
  copy_opt_anc_info(&to->anc, &tanc);
4552
550
}
4553
4554
static void
4555
concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4556
        int raw ARG_UNUSED, OnigEncoding enc)
4557
22.5k
{
4558
22.5k
  int i, j, len;
4559
22.5k
  UChar *p;
4560
4561
186k
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4562
164k
    len = enclen(enc, p, end);
4563
164k
    if (i + len > OPT_EXACT_MAXLEN) break;
4564
328k
    for (j = 0; j < len && p < end; j++)
4565
164k
      to->s[i++] = *p++;
4566
164k
  }
4567
4568
22.5k
  to->len = i;
4569
22.5k
}
4570
4571
static void
4572
alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4573
12.8k
{
4574
12.8k
  int i, j, len;
4575
4576
12.8k
  if (add->len == 0 || to->len == 0) {
4577
9.12k
    clear_opt_exact_info(to);
4578
9.12k
    return ;
4579
9.12k
  }
4580
4581
3.75k
  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4582
508
    clear_opt_exact_info(to);
4583
508
    return ;
4584
508
  }
4585
4586
5.43k
  for (i = 0; i < to->len && i < add->len; ) {
4587
5.43k
    if (to->s[i] != add->s[i]) break;
4588
2.19k
    len = enclen(env->enc, to->s + i, to->s + to->len);
4589
4590
2.19k
    for (j = 1; j < len; j++) {
4591
0
      if (to->s[i+j] != add->s[i+j]) break;
4592
0
    }
4593
2.19k
    if (j < len) break;
4594
2.19k
    i += len;
4595
2.19k
  }
4596
4597
3.24k
  if (! add->reach_end || i < add->len || i < to->len) {
4598
3.24k
    to->reach_end = 0;
4599
3.24k
  }
4600
3.24k
  to->len = i;
4601
3.24k
  if (to->ignore_case < 0)
4602
0
    to->ignore_case = add->ignore_case;
4603
3.24k
  else if (add->ignore_case >= 0)
4604
3.24k
    to->ignore_case |= add->ignore_case;
4605
4606
3.24k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4607
3.24k
  if (! to->reach_end) to->anc.right_anchor = 0;
4608
3.24k
}
4609
4610
static void
4611
select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4612
111k
{
4613
111k
  int v1, v2;
4614
4615
111k
  v1 = now->len;
4616
111k
  v2 = alt->len;
4617
4618
111k
  if (v2 == 0) {
4619
88.9k
    return ;
4620
88.9k
  }
4621
22.2k
  else if (v1 == 0) {
4622
17.5k
    copy_opt_exact_info(now, alt);
4623
17.5k
    return ;
4624
17.5k
  }
4625
4.69k
  else if (v1 <= 2 && v2 <= 2) {
4626
    /* ByteValTable[x] is big value --> low price */
4627
530
    v2 = map_position_value(enc, now->s[0]);
4628
530
    v1 = map_position_value(enc, alt->s[0]);
4629
4630
530
    if (now->len > 1) v1 += 5;
4631
530
    if (alt->len > 1) v2 += 5;
4632
530
  }
4633
4634
4.69k
  if (now->ignore_case <= 0) v1 *= 2;
4635
4.69k
  if (alt->ignore_case <= 0) v2 *= 2;
4636
4637
4.69k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4638
0
    copy_opt_exact_info(now, alt);
4639
4.69k
}
4640
4641
static void
4642
clear_opt_map_info(OptMapInfo* map)
4643
96.2k
{
4644
96.2k
  static const OptMapInfo clean_info = {
4645
96.2k
    {0, 0}, {0, 0}, 0,
4646
96.2k
    {
4647
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4648
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4649
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4650
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4651
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4652
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4653
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4654
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4655
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4656
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4657
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4658
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4659
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4660
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4661
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4662
96.2k
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4663
96.2k
    }
4664
96.2k
  };
4665
4666
96.2k
  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4667
96.2k
}
4668
4669
static void
4670
copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4671
10.8k
{
4672
10.8k
  *to = *from;
4673
10.8k
}
4674
4675
static void
4676
add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4677
67.7k
{
4678
67.7k
  if (map->map[c] == 0) {
4679
67.7k
    map->map[c] = 1;
4680
67.7k
    map->value += map_position_value(enc, c);
4681
67.7k
  }
4682
67.7k
}
4683
4684
static int
4685
add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4686
                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4687
0
{
4688
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4689
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4690
0
  int i, n;
4691
4692
0
  add_char_opt_map_info(map, p[0], enc);
4693
4694
0
  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4695
0
  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4696
0
  if (n < 0) return n;
4697
4698
0
  for (i = 0; i < n; i++) {
4699
0
    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4700
0
    add_char_opt_map_info(map, buf[0], enc);
4701
0
  }
4702
4703
0
  return 0;
4704
0
}
4705
4706
static void
4707
select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4708
51.4k
{
4709
51.4k
  const int z = 1<<15; /* 32768: something big value */
4710
4711
51.4k
  int v1, v2;
4712
4713
51.4k
  if (alt->value == 0) return ;
4714
23.8k
  if (now->value == 0) {
4715
10.8k
    copy_opt_map_info(now, alt);
4716
10.8k
    return ;
4717
10.8k
  }
4718
4719
13.0k
  v1 = z / now->value;
4720
13.0k
  v2 = z / alt->value;
4721
13.0k
  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4722
0
    copy_opt_map_info(now, alt);
4723
13.0k
}
4724
4725
static int
4726
comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4727
8.30k
{
4728
16.6k
#define COMP_EM_BASE  20
4729
8.30k
  int ve, vm;
4730
4731
8.30k
  if (m->value <= 0) return -1;
4732
4733
8.30k
  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4734
8.30k
  vm = COMP_EM_BASE * 5 * 2 / m->value;
4735
8.30k
  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4736
8.30k
}
4737
4738
static void
4739
alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4740
4.29k
{
4741
4.29k
  int i, val;
4742
4743
  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4744
4.29k
  if (to->value == 0) return ;
4745
4.29k
  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4746
0
    clear_opt_map_info(to);
4747
0
    return ;
4748
0
  }
4749
4750
4.29k
  alt_merge_mml(&to->mmd, &add->mmd);
4751
4752
4.29k
  val = 0;
4753
1.10M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4754
1.09M
    if (add->map[i])
4755
4.29k
      to->map[i] = 1;
4756
4757
1.09M
    if (to->map[i])
4758
8.03k
      val += map_position_value(enc, i);
4759
1.09M
  }
4760
4.29k
  to->value = val;
4761
4762
4.29k
  alt_merge_opt_anc_info(&to->anc, &add->anc);
4763
4.29k
}
4764
4765
static void
4766
set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4767
96.2k
{
4768
96.2k
  copy_mml(&(opt->exb.mmd),  mmd);
4769
96.2k
  copy_mml(&(opt->expr.mmd), mmd);
4770
96.2k
  copy_mml(&(opt->map.mmd),  mmd);
4771
96.2k
}
4772
4773
static void
4774
clear_node_opt_info(NodeOptInfo* opt)
4775
96.2k
{
4776
96.2k
  clear_mml(&opt->len);
4777
96.2k
  clear_opt_anc_info(&opt->anc);
4778
96.2k
  clear_opt_exact_info(&opt->exb);
4779
96.2k
  clear_opt_exact_info(&opt->exm);
4780
96.2k
  clear_opt_exact_info(&opt->expr);
4781
96.2k
  clear_opt_map_info(&opt->map);
4782
96.2k
}
4783
4784
static void
4785
copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4786
12.4k
{
4787
12.4k
  *to = *from;
4788
12.4k
}
4789
4790
static void
4791
concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4792
51.4k
{
4793
51.4k
  int exb_reach, exm_reach;
4794
51.4k
  OptAncInfo tanc;
4795
4796
51.4k
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4797
51.4k
  copy_opt_anc_info(&to->anc, &tanc);
4798
4799
51.4k
  if (add->exb.len > 0 && to->len.max == 0) {
4800
3.07k
    concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4801
3.07k
      to->len.max, add->len.max);
4802
3.07k
    copy_opt_anc_info(&add->exb.anc, &tanc);
4803
3.07k
  }
4804
4805
51.4k
  if (add->map.value > 0 && to->len.max == 0) {
4806
5.65k
    if (add->map.mmd.max == 0)
4807
5.65k
      add->map.anc.left_anchor |= to->anc.left_anchor;
4808
5.65k
  }
4809
4810
51.4k
  exb_reach = to->exb.reach_end;
4811
51.4k
  exm_reach = to->exm.reach_end;
4812
4813
51.4k
  if (add->len.max != 0)
4814
35.8k
    to->exb.reach_end = to->exm.reach_end = 0;
4815
4816
51.4k
  if (add->exb.len > 0) {
4817
15.0k
    if (exb_reach) {
4818
0
      concat_opt_exact_info(&to->exb, &add->exb, enc);
4819
0
      clear_opt_exact_info(&add->exb);
4820
0
    }
4821
15.0k
    else if (exm_reach) {
4822
550
      concat_opt_exact_info(&to->exm, &add->exb, enc);
4823
550
      clear_opt_exact_info(&add->exb);
4824
550
    }
4825
15.0k
  }
4826
51.4k
  select_opt_exact_info(enc, &to->exm, &add->exb);
4827
51.4k
  select_opt_exact_info(enc, &to->exm, &add->exm);
4828
4829
51.4k
  if (to->expr.len > 0) {
4830
0
    if (add->len.max > 0) {
4831
0
      if (to->expr.len > (int )add->len.max)
4832
0
  to->expr.len = (int )add->len.max;
4833
4834
0
      if (to->expr.mmd.max == 0)
4835
0
  select_opt_exact_info(enc, &to->exb, &to->expr);
4836
0
      else
4837
0
  select_opt_exact_info(enc, &to->exm, &to->expr);
4838
0
    }
4839
0
  }
4840
51.4k
  else if (add->expr.len > 0) {
4841
0
    copy_opt_exact_info(&to->expr, &add->expr);
4842
0
  }
4843
4844
51.4k
  select_opt_map_info(&to->map, &add->map);
4845
4846
51.4k
  add_mml(&to->len, &add->len);
4847
51.4k
}
4848
4849
static void
4850
alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4851
4.29k
{
4852
4.29k
  alt_merge_opt_anc_info  (&to->anc,  &add->anc);
4853
4.29k
  alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
4854
4.29k
  alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
4855
4.29k
  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4856
4.29k
  alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
4857
4858
4.29k
  alt_merge_mml(&to->len, &add->len);
4859
4.29k
}
4860
4861
4862
2.74k
#define MAX_NODE_OPT_INFO_REF_COUNT    5
4863
4864
static int
4865
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4866
96.2k
{
4867
96.2k
  int type;
4868
96.2k
  int r = 0;
4869
4870
96.2k
  clear_node_opt_info(opt);
4871
96.2k
  set_bound_node_opt_info(opt, &env->mmd);
4872
4873
96.2k
  type = NTYPE(node);
4874
96.2k
  switch (type) {
4875
12.4k
  case NT_LIST:
4876
12.4k
    {
4877
12.4k
      OptEnv nenv;
4878
12.4k
      NodeOptInfo nopt;
4879
12.4k
      Node* nd = node;
4880
4881
12.4k
      copy_opt_env(&nenv, env);
4882
51.4k
      do {
4883
51.4k
  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4884
51.4k
  if (r == 0) {
4885
51.4k
    add_mml(&nenv.mmd, &nopt.len);
4886
51.4k
    concat_left_node_opt_info(env->enc, opt, &nopt);
4887
51.4k
  }
4888
51.4k
      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4889
12.4k
    }
4890
12.4k
    break;
4891
4892
3.21k
  case NT_ALT:
4893
3.21k
    {
4894
3.21k
      NodeOptInfo nopt;
4895
3.21k
      Node* nd = node;
4896
4897
7.50k
      do {
4898
7.50k
  r = optimize_node_left(NCAR(nd), &nopt, env);
4899
7.50k
  if (r == 0) {
4900
7.50k
    if (nd == node) copy_node_opt_info(opt, &nopt);
4901
4.29k
    else            alt_merge_node_opt_info(opt, &nopt, env);
4902
7.50k
  }
4903
7.50k
      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4904
3.21k
    }
4905
3.21k
    break;
4906
4907
22.5k
  case NT_STR:
4908
22.5k
    {
4909
22.5k
      StrNode* sn = NSTR(node);
4910
22.5k
      OnigDistance slen = sn->end - sn->s;
4911
22.5k
      int is_raw = NSTRING_IS_RAW(node);
4912
4913
22.5k
      if (! NSTRING_IS_AMBIG(node)) {
4914
22.5k
  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4915
22.5k
          is_raw, env->enc);
4916
22.5k
  opt->exb.ignore_case = 0;
4917
22.5k
  if (slen > 0) {
4918
22.0k
    add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4919
22.0k
  }
4920
22.5k
  set_mml(&opt->len, slen, slen);
4921
22.5k
      }
4922
0
      else {
4923
0
  OnigDistance max;
4924
4925
0
  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4926
0
    int n = onigenc_strlen(env->enc, sn->s, sn->end);
4927
0
    max = (OnigDistance )ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4928
0
  }
4929
0
  else {
4930
0
    concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4931
0
            is_raw, env->enc);
4932
0
    opt->exb.ignore_case = 1;
4933
4934
0
    if (slen > 0) {
4935
0
      r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4936
0
            env->enc, env->case_fold_flag);
4937
0
      if (r != 0) break;
4938
0
    }
4939
4940
0
    max = slen;
4941
0
  }
4942
4943
0
  set_mml(&opt->len, slen, max);
4944
0
      }
4945
4946
22.5k
      if ((OnigDistance )opt->exb.len == slen)
4947
20.4k
  opt->exb.reach_end = 1;
4948
22.5k
    }
4949
0
    break;
4950
4951
12.3k
  case NT_CCLASS:
4952
12.3k
    {
4953
12.3k
      int i, z;
4954
12.3k
      CClassNode* cc = NCCLASS(node);
4955
4956
      /* no need to check ignore case. (set in setup_tree()) */
4957
4958
12.3k
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4959
3.00k
  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
4960
3.00k
  OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4961
4962
3.00k
  set_mml(&opt->len, min, max);
4963
3.00k
      }
4964
9.35k
      else {
4965
2.40M
  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4966
2.39M
    z = BITSET_AT(cc->bs, i);
4967
2.39M
    if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
4968
45.7k
      add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4969
45.7k
    }
4970
2.39M
  }
4971
9.35k
  set_mml(&opt->len, 1, 1);
4972
9.35k
      }
4973
12.3k
    }
4974
12.3k
    break;
4975
4976
0
  case NT_CTYPE:
4977
0
    {
4978
0
      int i, min, max;
4979
0
      int maxcode;
4980
4981
0
      max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
4982
4983
0
      if (max == 1) {
4984
0
  min = 1;
4985
4986
0
  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4987
0
  switch (NCTYPE(node)->ctype) {
4988
0
  case ONIGENC_CTYPE_WORD:
4989
0
    if (NCTYPE(node)->not != 0) {
4990
0
      for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
4991
0
        if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
4992
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
4993
0
        }
4994
0
      }
4995
0
    }
4996
0
    else {
4997
0
      for (i = 0; i < maxcode; i++) {
4998
0
        if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
4999
0
    add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5000
0
        }
5001
0
      }
5002
0
    }
5003
0
    break;
5004
0
  }
5005
0
      }
5006
0
      else {
5007
0
  min = ONIGENC_MBC_MINLEN(env->enc);
5008
0
      }
5009
0
      set_mml(&opt->len, min, max);
5010
0
    }
5011
0
    break;
5012
5013
5.24k
  case NT_CANY:
5014
5.24k
    {
5015
5.24k
      OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5016
5.24k
      OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5017
5.24k
      set_mml(&opt->len, min, max);
5018
5.24k
    }
5019
5.24k
    break;
5020
5021
15.0k
  case NT_ANCHOR:
5022
15.0k
    switch (NANCHOR(node)->type) {
5023
0
    case ANCHOR_BEGIN_BUF:
5024
0
    case ANCHOR_BEGIN_POSITION:
5025
10.3k
    case ANCHOR_BEGIN_LINE:
5026
10.3k
    case ANCHOR_END_BUF:
5027
10.3k
    case ANCHOR_SEMI_END_BUF:
5028
14.5k
    case ANCHOR_END_LINE:
5029
14.5k
    case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5030
14.5k
    case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5031
14.5k
      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5032
14.5k
      break;
5033
5034
0
    case ANCHOR_PREC_READ:
5035
0
      {
5036
0
  NodeOptInfo nopt;
5037
5038
0
  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5039
0
  if (r == 0) {
5040
0
    if (nopt.exb.len > 0)
5041
0
      copy_opt_exact_info(&opt->expr, &nopt.exb);
5042
0
    else if (nopt.exm.len > 0)
5043
0
      copy_opt_exact_info(&opt->expr, &nopt.exm);
5044
5045
0
    opt->expr.reach_end = 0;
5046
5047
0
    if (nopt.map.value > 0)
5048
0
      copy_opt_map_info(&opt->map, &nopt.map);
5049
0
  }
5050
0
      }
5051
0
      break;
5052
5053
0
    case ANCHOR_LOOK_BEHIND_NOT:
5054
0
      break;
5055
15.0k
    }
5056
15.0k
    break;
5057
5058
15.0k
  case NT_BREF:
5059
0
    {
5060
0
      int i;
5061
0
      int* backs;
5062
0
      OnigDistance min, max, tmin, tmax;
5063
0
      Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5064
0
      BRefNode* br = NBREF(node);
5065
5066
0
      if (br->state & NST_RECURSION) {
5067
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5068
0
  break;
5069
0
      }
5070
0
      backs = BACKREFS_P(br);
5071
0
      r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5072
0
      if (r != 0) break;
5073
0
      r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5074
0
      if (r != 0) break;
5075
0
      for (i = 1; i < br->back_num; i++) {
5076
0
  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5077
0
  if (r != 0) break;
5078
0
  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5079
0
  if (r != 0) break;
5080
0
  if (min > tmin) min = tmin;
5081
0
  if (max < tmax) max = tmax;
5082
0
      }
5083
0
      if (r == 0) set_mml(&opt->len, min, max);
5084
0
    }
5085
0
    break;
5086
5087
0
#ifdef USE_SUBEXP_CALL
5088
0
  case NT_CALL:
5089
0
    if (IS_CALL_RECURSION(NCALL(node)))
5090
0
      set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5091
0
    else {
5092
0
      OnigOptionType save = env->options;
5093
0
      env->options = NENCLOSE(NCALL(node)->target)->option;
5094
0
      r = optimize_node_left(NCALL(node)->target, opt, env);
5095
0
      env->options = save;
5096
0
    }
5097
0
    break;
5098
0
#endif
5099
5100
15.4k
  case NT_QTFR:
5101
15.4k
    {
5102
15.4k
      int i;
5103
15.4k
      OnigDistance min, max;
5104
15.4k
      NodeOptInfo nopt;
5105
15.4k
      QtfrNode* qn = NQTFR(node);
5106
5107
15.4k
      r = optimize_node_left(qn->target, &nopt, env);
5108
15.4k
      if (r) break;
5109
5110
15.4k
      if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
5111
5.74k
  if (env->mmd.max == 0 &&
5112
5.74k
      NTYPE(qn->target) == NT_CANY && qn->greedy) {
5113
0
    if (IS_MULTILINE(env->options))
5114
      /* implicit anchor: /.*a/ ==> /\A.*a/ */
5115
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5116
0
    else
5117
0
      add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5118
0
  }
5119
5.74k
      }
5120
9.73k
      else {
5121
9.73k
  if (qn->lower > 0) {
5122
9.19k
    copy_node_opt_info(opt, &nopt);
5123
9.19k
    if (nopt.exb.len > 0) {
5124
0
      if (nopt.exb.reach_end) {
5125
0
        for (i = 2; i <= qn->lower &&
5126
0
        ! is_full_opt_exact_info(&opt->exb); i++) {
5127
0
    concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5128
0
        }
5129
0
        if (i < qn->lower) {
5130
0
    opt->exb.reach_end = 0;
5131
0
        }
5132
0
      }
5133
0
    }
5134
5135
9.19k
    if (qn->lower != qn->upper) {
5136
9.19k
      opt->exb.reach_end = 0;
5137
9.19k
      opt->exm.reach_end = 0;
5138
9.19k
    }
5139
9.19k
    if (qn->lower > 1)
5140
0
      opt->exm.reach_end = 0;
5141
9.19k
  }
5142
9.73k
      }
5143
5144
15.4k
      min = distance_multiply(nopt.len.min, qn->lower);
5145
15.4k
      if (IS_REPEAT_INFINITE(qn->upper))
5146
14.9k
  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5147
537
      else
5148
537
  max = distance_multiply(nopt.len.max, qn->upper);
5149
5150
15.4k
      set_mml(&opt->len, min, max);
5151
15.4k
    }
5152
0
    break;
5153
5154
9.90k
  case NT_ENCLOSE:
5155
9.90k
    {
5156
9.90k
      EncloseNode* en = NENCLOSE(node);
5157
5158
9.90k
      switch (en->type) {
5159
534
      case ENCLOSE_OPTION:
5160
534
  {
5161
534
    OnigOptionType save = env->options;
5162
5163
534
    env->options = en->option;
5164
534
    r = optimize_node_left(en->target, opt, env);
5165
534
    env->options = save;
5166
534
  }
5167
534
  break;
5168
5169
2.74k
      case ENCLOSE_MEMORY:
5170
2.74k
#ifdef USE_SUBEXP_CALL
5171
2.74k
  en->opt_count++;
5172
2.74k
  if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5173
0
    OnigDistance min, max;
5174
5175
0
    min = 0;
5176
0
    max = ONIG_INFINITE_DISTANCE;
5177
0
    if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5178
0
    if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5179
0
    set_mml(&opt->len, min, max);
5180
0
  }
5181
2.74k
  else
5182
2.74k
#endif
5183
2.74k
  {
5184
2.74k
    r = optimize_node_left(en->target, opt, env);
5185
5186
2.74k
    if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5187
0
      if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5188
0
        remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5189
0
    }
5190
2.74k
  }
5191
2.74k
  break;
5192
5193
6.63k
      case ENCLOSE_STOP_BACKTRACK:
5194
6.63k
      case ENCLOSE_CONDITION:
5195
6.63k
  r = optimize_node_left(en->target, opt, env);
5196
6.63k
  break;
5197
5198
0
      case ENCLOSE_ABSENT:
5199
0
  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5200
0
  break;
5201
9.90k
      }
5202
9.90k
    }
5203
9.90k
    break;
5204
5205
9.90k
  default:
5206
#ifdef ONIG_DEBUG
5207
    fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5208
      NTYPE(node));
5209
#endif
5210
0
    r = ONIGERR_TYPE_BUG;
5211
0
    break;
5212
96.2k
  }
5213
5214
96.2k
  return r;
5215
96.2k
}
5216
5217
static int
5218
set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5219
5.70k
{
5220
5.70k
  int allow_reverse;
5221
5222
5.70k
  if (e->len == 0) return 0;
5223
5224
5.70k
  reg->exact = (UChar* )xmalloc(e->len);
5225
5.70k
  CHECK_NULL_RETURN_MEMERR(reg->exact);
5226
5.70k
  xmemcpy(reg->exact, e->s, e->len);
5227
5.70k
  reg->exact_end = reg->exact + e->len;
5228
5229
5.70k
  allow_reverse =
5230
5.70k
  ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5231
5232
5.70k
  if (e->ignore_case > 0) {
5233
0
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5234
0
      e->len = set_bm_skip(reg->exact, reg->exact_end, reg,
5235
0
          reg->map, 1);
5236
0
      reg->exact_end = reg->exact + e->len;
5237
0
      if (e->len >= 3) {
5238
0
  reg->optimize = (allow_reverse != 0
5239
0
       ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5240
0
      }
5241
0
      else if (e->len > 0) {
5242
0
  reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5243
0
      }
5244
0
      else
5245
0
  return 0;
5246
0
    }
5247
0
    else {
5248
0
      reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5249
0
    }
5250
0
  }
5251
5.70k
  else {
5252
5.70k
    if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5253
4.17k
      set_bm_skip(reg->exact, reg->exact_end, reg,
5254
4.17k
      reg->map, 0);
5255
4.17k
      reg->optimize = (allow_reverse != 0
5256
4.17k
         ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5257
4.17k
    }
5258
1.52k
    else {
5259
1.52k
      reg->optimize = ONIG_OPTIMIZE_EXACT;
5260
1.52k
    }
5261
5.70k
  }
5262
5263
5.70k
  reg->dmin = e->mmd.min;
5264
5.70k
  reg->dmax = e->mmd.max;
5265
5266
5.70k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5267
5.70k
    reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5268
5.70k
  }
5269
5270
5.70k
  return 0;
5271
5.70k
}
5272
5273
static void
5274
set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5275
4.15k
{
5276
4.15k
  int i;
5277
5278
1.06M
  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5279
1.06M
    reg->map[i] = m->map[i];
5280
5281
4.15k
  reg->optimize   = ONIG_OPTIMIZE_MAP;
5282
4.15k
  reg->dmin       = m->mmd.min;
5283
4.15k
  reg->dmax       = m->mmd.max;
5284
5285
4.15k
  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5286
4.15k
    reg->threshold_len = (int )(reg->dmin + 1);
5287
4.15k
  }
5288
4.15k
}
5289
5290
static void
5291
set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5292
9.86k
{
5293
9.86k
  reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
5294
9.86k
  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5295
9.86k
}
5296
5297
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5298
static void print_optimize_info(FILE* f, regex_t* reg);
5299
#endif
5300
5301
static int
5302
set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5303
11.9k
{
5304
5305
11.9k
  int r;
5306
11.9k
  NodeOptInfo opt;
5307
11.9k
  OptEnv env;
5308
5309
11.9k
  env.enc            = reg->enc;
5310
11.9k
  env.options        = reg->options;
5311
11.9k
  env.case_fold_flag = reg->case_fold_flag;
5312
11.9k
  env.scan_env   = scan_env;
5313
11.9k
  clear_mml(&env.mmd);
5314
5315
11.9k
  r = optimize_node_left(node, &opt, &env);
5316
11.9k
  if (r) return r;
5317
5318
11.9k
  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5319
11.9k
        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5320
11.9k
        ANCHOR_LOOK_BEHIND);
5321
5322
11.9k
  if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5323
0
    reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5324
5325
11.9k
  reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5326
11.9k
  ANCHOR_PREC_READ_NOT);
5327
5328
11.9k
  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5329
0
    reg->anchor_dmin = opt.len.min;
5330
0
    reg->anchor_dmax = opt.len.max;
5331
0
  }
5332
5333
11.9k
  if (opt.exb.len > 0 || opt.exm.len > 0) {
5334
8.30k
    select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5335
8.30k
    if (opt.map.value > 0 &&
5336
8.30k
  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5337
2.60k
      goto set_map;
5338
2.60k
    }
5339
5.70k
    else {
5340
5.70k
      r = set_optimize_exact_info(reg, &opt.exb);
5341
5.70k
      set_sub_anchor(reg, &opt.exb.anc);
5342
5.70k
    }
5343
8.30k
  }
5344
3.61k
  else if (opt.map.value > 0) {
5345
4.15k
  set_map:
5346
4.15k
    set_optimize_map_info(reg, &opt.map);
5347
4.15k
    set_sub_anchor(reg, &opt.map.anc);
5348
4.15k
  }
5349
2.06k
  else {
5350
2.06k
    reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5351
2.06k
    if (opt.len.max == 0)
5352
1.02k
      reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5353
2.06k
  }
5354
5355
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5356
  print_optimize_info(stderr, reg);
5357
#endif
5358
11.9k
  return r;
5359
11.9k
}
5360
5361
static void
5362
clear_optimize_info(regex_t* reg)
5363
11.9k
{
5364
11.9k
  reg->optimize      = ONIG_OPTIMIZE_NONE;
5365
11.9k
  reg->anchor        = 0;
5366
11.9k
  reg->anchor_dmin   = 0;
5367
11.9k
  reg->anchor_dmax   = 0;
5368
11.9k
  reg->sub_anchor    = 0;
5369
11.9k
  reg->exact_end     = (UChar* )NULL;
5370
11.9k
  reg->threshold_len = 0;
5371
11.9k
  if (IS_NOT_NULL(reg->exact)) {
5372
0
    xfree(reg->exact);
5373
0
    reg->exact = (UChar* )NULL;
5374
0
  }
5375
11.9k
}
5376
5377
#ifdef ONIG_DEBUG
5378
5379
static void print_enc_string(FILE* fp, OnigEncoding enc,
5380
           const UChar *s, const UChar *end)
5381
{
5382
  fprintf(fp, "\nPATTERN: /");
5383
5384
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5385
    const UChar *p;
5386
    OnigCodePoint code;
5387
5388
    p = s;
5389
    while (p < end) {
5390
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
5391
      if (code >= 0x80) {
5392
  fprintf(fp, " 0x%04x ", (int )code);
5393
      }
5394
      else {
5395
  fputc((int )code, fp);
5396
      }
5397
5398
      p += enclen(enc, p, end);
5399
    }
5400
  }
5401
  else {
5402
    while (s < end) {
5403
      fputc((int )*s, fp);
5404
      s++;
5405
    }
5406
  }
5407
5408
  fprintf(fp, "/ (%s)\n", enc->name);
5409
}
5410
#endif  /* ONIG_DEBUG */
5411
5412
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413
static void
5414
print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5415
{
5416
  if (a == ONIG_INFINITE_DISTANCE)
5417
    fputs("inf", f);
5418
  else
5419
    fprintf(f, "(%"PRIuPTR")", a);
5420
5421
  fputs("-", f);
5422
5423
  if (b == ONIG_INFINITE_DISTANCE)
5424
    fputs("inf", f);
5425
  else
5426
    fprintf(f, "(%"PRIuPTR")", b);
5427
}
5428
5429
static void
5430
print_anchor(FILE* f, int anchor)
5431
{
5432
  int q = 0;
5433
5434
  fprintf(f, "[");
5435
5436
  if (anchor & ANCHOR_BEGIN_BUF) {
5437
    fprintf(f, "begin-buf");
5438
    q = 1;
5439
  }
5440
  if (anchor & ANCHOR_BEGIN_LINE) {
5441
    if (q) fprintf(f, ", ");
5442
    q = 1;
5443
    fprintf(f, "begin-line");
5444
  }
5445
  if (anchor & ANCHOR_BEGIN_POSITION) {
5446
    if (q) fprintf(f, ", ");
5447
    q = 1;
5448
    fprintf(f, "begin-pos");
5449
  }
5450
  if (anchor & ANCHOR_END_BUF) {
5451
    if (q) fprintf(f, ", ");
5452
    q = 1;
5453
    fprintf(f, "end-buf");
5454
  }
5455
  if (anchor & ANCHOR_SEMI_END_BUF) {
5456
    if (q) fprintf(f, ", ");
5457
    q = 1;
5458
    fprintf(f, "semi-end-buf");
5459
  }
5460
  if (anchor & ANCHOR_END_LINE) {
5461
    if (q) fprintf(f, ", ");
5462
    q = 1;
5463
    fprintf(f, "end-line");
5464
  }
5465
  if (anchor & ANCHOR_ANYCHAR_STAR) {
5466
    if (q) fprintf(f, ", ");
5467
    q = 1;
5468
    fprintf(f, "anychar-star");
5469
  }
5470
  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5471
    if (q) fprintf(f, ", ");
5472
    fprintf(f, "anychar-star-ml");
5473
  }
5474
5475
  fprintf(f, "]");
5476
}
5477
5478
static void
5479
print_optimize_info(FILE* f, regex_t* reg)
5480
{
5481
  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5482
                              "EXACT_IC", "MAP",
5483
                              "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5484
5485
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5486
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
5487
  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5488
    print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5489
  fprintf(f, "\n");
5490
5491
  if (reg->optimize) {
5492
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
5493
    fprintf(f, "\n");
5494
  }
5495
  fprintf(f, "\n");
5496
5497
  if (reg->exact) {
5498
    UChar *p;
5499
    fprintf(f, "exact: [");
5500
    for (p = reg->exact; p < reg->exact_end; p++) {
5501
      fputc(*p, f);
5502
    }
5503
    fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5504
  }
5505
  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5506
    int c, i, n = 0;
5507
5508
    for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5509
      if (reg->map[i]) n++;
5510
5511
    fprintf(f, "map: n=%d\n", n);
5512
    if (n > 0) {
5513
      c = 0;
5514
      fputc('[', f);
5515
      for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5516
  if (reg->map[i] != 0) {
5517
    if (c > 0)  fputs(", ", f);
5518
    c++;
5519
    if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5520
        ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5521
      fputc(i, f);
5522
    else
5523
      fprintf(f, "%d", i);
5524
  }
5525
      }
5526
      fprintf(f, "]\n");
5527
    }
5528
  }
5529
}
5530
#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5531
5532
5533
extern void
5534
onig_free_body(regex_t* reg)
5535
11.9k
{
5536
11.9k
  if (IS_NOT_NULL(reg)) {
5537
11.9k
    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
5538
11.9k
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
5539
11.9k
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
5540
11.9k
    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
5541
5542
11.9k
#ifdef USE_NAMED_GROUP
5543
11.9k
    onig_names_free(reg);
5544
11.9k
#endif
5545
11.9k
  }
5546
11.9k
}
5547
5548
extern void
5549
onig_free(regex_t* reg)
5550
11.9k
{
5551
11.9k
  if (IS_NOT_NULL(reg)) {
5552
11.9k
    onig_free_body(reg);
5553
11.9k
    xfree(reg);
5554
11.9k
  }
5555
11.9k
}
5556
5557
#ifdef RUBY
5558
size_t
5559
onig_memsize(const regex_t *reg)
5560
{
5561
    size_t size = sizeof(regex_t);
5562
    if (IS_NULL(reg)) return 0;
5563
    if (IS_NOT_NULL(reg->p))                size += reg->alloc;
5564
    if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
5565
    if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5566
    if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
5567
5568
    return size;
5569
}
5570
5571
size_t
5572
onig_region_memsize(const OnigRegion *regs)
5573
{
5574
    size_t size = sizeof(*regs);
5575
    if (IS_NULL(regs)) return 0;
5576
    size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5577
    return size;
5578
}
5579
#endif
5580
5581
#define REGEX_TRANSFER(to,from) do {\
5582
  onig_free_body(to);\
5583
  xmemcpy(to, from, sizeof(regex_t));\
5584
  xfree(from);\
5585
} while (0)
5586
5587
#if 0
5588
extern void
5589
onig_transfer(regex_t* to, regex_t* from)
5590
{
5591
  REGEX_TRANSFER(to, from);
5592
}
5593
#endif
5594
5595
#ifdef ONIG_DEBUG_COMPILE
5596
static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5597
#endif
5598
#ifdef ONIG_DEBUG_PARSE_TREE
5599
static void print_tree(FILE* f, Node* node);
5600
#endif
5601
5602
#ifdef RUBY
5603
extern int
5604
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5605
       OnigErrorInfo* einfo)
5606
{
5607
  return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5608
}
5609
#endif
5610
5611
#ifdef RUBY
5612
extern int
5613
onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5614
        OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5615
#else
5616
extern int
5617
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5618
       OnigErrorInfo* einfo)
5619
#endif
5620
11.9k
{
5621
11.9k
#define COMPILE_INIT_SIZE  20
5622
5623
11.9k
  int r;
5624
11.9k
  OnigDistance init_size;
5625
11.9k
  Node*  root;
5626
11.9k
  ScanEnv  scan_env = {0};
5627
11.9k
#ifdef USE_SUBEXP_CALL
5628
11.9k
  UnsetAddrList  uslist;
5629
11.9k
#endif
5630
5631
11.9k
  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5632
5633
#ifdef RUBY
5634
  scan_env.sourcefile = sourcefile;
5635
  scan_env.sourceline = sourceline;
5636
#endif
5637
5638
#ifdef ONIG_DEBUG
5639
  print_enc_string(stderr, reg->enc, pattern, pattern_end);
5640
#endif
5641
5642
11.9k
  if (reg->alloc == 0) {
5643
11.9k
    init_size = (pattern_end - pattern) * 2;
5644
11.9k
    if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5645
11.9k
    r = BBUF_INIT(reg, init_size);
5646
11.9k
    if (r != 0) goto end;
5647
11.9k
  }
5648
0
  else
5649
0
    reg->used = 0;
5650
5651
11.9k
  reg->num_mem            = 0;
5652
11.9k
  reg->num_repeat         = 0;
5653
11.9k
  reg->num_null_check     = 0;
5654
11.9k
  reg->repeat_range_alloc = 0;
5655
11.9k
  reg->repeat_range       = (OnigRepeatRange* )NULL;
5656
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5657
  reg->num_comb_exp_check = 0;
5658
#endif
5659
5660
11.9k
  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5661
11.9k
  if (r != 0) goto err;
5662
5663
#ifdef ONIG_DEBUG_PARSE_TREE
5664
# if 0
5665
  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5666
  print_tree(stderr, root);
5667
# endif
5668
#endif
5669
5670
11.9k
#ifdef USE_NAMED_GROUP
5671
  /* mixed use named group and no-named group */
5672
11.9k
  if (scan_env.num_named > 0 &&
5673
11.9k
      IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5674
11.9k
      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5675
550
    if (scan_env.num_named != scan_env.num_mem)
5676
0
      r = disable_noname_group_capture(&root, reg, &scan_env);
5677
550
    else
5678
550
      r = numbered_ref_check(root);
5679
5680
550
    if (r != 0) goto err;
5681
550
  }
5682
11.9k
#endif
5683
5684
11.9k
#ifdef USE_SUBEXP_CALL
5685
11.9k
  if (scan_env.num_call > 0) {
5686
0
    r = unset_addr_list_init(&uslist, scan_env.num_call);
5687
0
    if (r != 0) goto err;
5688
0
    scan_env.unset_addr_list = &uslist;
5689
0
    r = setup_subexp_call(root, &scan_env);
5690
0
    if (r != 0) goto err_unset;
5691
0
    r = subexp_recursive_check_trav(root, &scan_env);
5692
0
    if (r  < 0) goto err_unset;
5693
0
    r = subexp_inf_recursive_check_trav(root, &scan_env);
5694
0
    if (r != 0) goto err_unset;
5695
5696
0
    reg->num_call = scan_env.num_call;
5697
0
  }
5698
11.9k
  else
5699
11.9k
    reg->num_call = 0;
5700
11.9k
#endif
5701
5702
11.9k
  r = setup_tree(root, reg, 0, &scan_env);
5703
11.9k
  if (r != 0) goto err_unset;
5704
5705
#ifdef ONIG_DEBUG_PARSE_TREE
5706
  print_tree(stderr, root);
5707
#endif
5708
5709
11.9k
  reg->capture_history  = scan_env.capture_history;
5710
11.9k
  reg->bt_mem_start     = scan_env.bt_mem_start;
5711
11.9k
  reg->bt_mem_start    |= reg->capture_history;
5712
11.9k
  if (IS_FIND_CONDITION(reg->options))
5713
0
    BIT_STATUS_ON_ALL(reg->bt_mem_end);
5714
11.9k
  else {
5715
11.9k
    reg->bt_mem_end  = scan_env.bt_mem_end;
5716
11.9k
    reg->bt_mem_end |= reg->capture_history;
5717
11.9k
  }
5718
5719
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5720
  if (scan_env.backrefed_mem == 0
5721
# ifdef USE_SUBEXP_CALL
5722
      || scan_env.num_call == 0
5723
# endif
5724
      ) {
5725
    setup_comb_exp_check(root, 0, &scan_env);
5726
# ifdef USE_SUBEXP_CALL
5727
    if (scan_env.has_recursion != 0) {
5728
      scan_env.num_comb_exp_check = 0;
5729
    }
5730
    else
5731
# endif
5732
    if (scan_env.comb_exp_max_regnum > 0) {
5733
      int i;
5734
      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5735
  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5736
    scan_env.num_comb_exp_check = 0;
5737
    break;
5738
  }
5739
      }
5740
    }
5741
  }
5742
5743
  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5744
#endif
5745
5746
11.9k
  clear_optimize_info(reg);
5747
11.9k
#ifndef ONIG_DONT_OPTIMIZE
5748
11.9k
  r = set_optimize_info_from_tree(root, reg, &scan_env);
5749
11.9k
  if (r != 0) goto err_unset;
5750
11.9k
#endif
5751
5752
11.9k
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5753
0
    xfree(scan_env.mem_nodes_dynamic);
5754
0
    scan_env.mem_nodes_dynamic = (Node** )NULL;
5755
0
  }
5756
5757
11.9k
  r = compile_tree(root, reg);
5758
11.9k
  if (r == 0) {
5759
11.9k
    r = add_opcode(reg, OP_END);
5760
11.9k
#ifdef USE_SUBEXP_CALL
5761
11.9k
    if (scan_env.num_call > 0) {
5762
0
      r = unset_addr_list_fix(&uslist, reg);
5763
0
      unset_addr_list_end(&uslist);
5764
0
      if (r) goto err;
5765
0
    }
5766
11.9k
#endif
5767
5768
11.9k
    if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5769
0
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5770
11.9k
    else {
5771
11.9k
      if (reg->bt_mem_start != 0)
5772
0
  reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5773
11.9k
      else
5774
11.9k
  reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5775
11.9k
    }
5776
11.9k
  }
5777
0
#ifdef USE_SUBEXP_CALL
5778
0
  else if (scan_env.num_call > 0) {
5779
0
    unset_addr_list_end(&uslist);
5780
0
  }
5781
11.9k
#endif
5782
11.9k
  onig_node_free(root);
5783
5784
#ifdef ONIG_DEBUG_COMPILE
5785
# ifdef USE_NAMED_GROUP
5786
  onig_print_names(stderr, reg);
5787
# endif
5788
  print_compiled_byte_code_list(stderr, reg);
5789
#endif
5790
5791
11.9k
 end:
5792
11.9k
  return r;
5793
5794
0
 err_unset:
5795
0
#ifdef USE_SUBEXP_CALL
5796
0
  if (scan_env.num_call > 0) {
5797
0
    unset_addr_list_end(&uslist);
5798
0
  }
5799
0
#endif
5800
0
 err:
5801
0
  if (IS_NOT_NULL(scan_env.error)) {
5802
0
    if (IS_NOT_NULL(einfo)) {
5803
0
      einfo->enc     = scan_env.enc;
5804
0
      einfo->par     = scan_env.error;
5805
0
      einfo->par_end = scan_env.error_end;
5806
0
    }
5807
0
  }
5808
5809
0
  onig_node_free(root);
5810
0
  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5811
0
      xfree(scan_env.mem_nodes_dynamic);
5812
0
  return r;
5813
0
}
5814
5815
static int onig_inited = 0;
5816
5817
extern int
5818
onig_reg_init(regex_t* reg, OnigOptionType option,
5819
        OnigCaseFoldType case_fold_flag,
5820
        OnigEncoding enc, const OnigSyntaxType* syntax)
5821
11.9k
{
5822
11.9k
  if (! onig_inited)
5823
1
    onig_init();
5824
5825
11.9k
  if (IS_NULL(reg))
5826
0
    return ONIGERR_INVALID_ARGUMENT;
5827
5828
11.9k
  (reg)->exact            = (UChar* )NULL;
5829
11.9k
  (reg)->chain            = (regex_t* )NULL;
5830
11.9k
  (reg)->p                = (UChar* )NULL;
5831
11.9k
  (reg)->name_table       = (void* )NULL;
5832
11.9k
  (reg)->repeat_range     = (OnigRepeatRange* )NULL;
5833
5834
11.9k
  if (ONIGENC_IS_UNDEF(enc))
5835
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5836
5837
11.9k
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5838
11.9k
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5839
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5840
0
  }
5841
5842
11.9k
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5843
0
    option |= syntax->options;
5844
0
    option &= ~ONIG_OPTION_SINGLELINE;
5845
0
  }
5846
11.9k
  else
5847
11.9k
    option |= syntax->options;
5848
5849
11.9k
  (reg)->enc              = enc;
5850
11.9k
  (reg)->options          = option;
5851
11.9k
  (reg)->syntax           = syntax;
5852
11.9k
  (reg)->optimize         = 0;
5853
5854
11.9k
  (reg)->alloc            = 0;
5855
11.9k
  (reg)->used             = 0;
5856
5857
11.9k
  (reg)->case_fold_flag   = case_fold_flag;
5858
11.9k
  return 0;
5859
11.9k
}
5860
5861
extern int
5862
onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5863
          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5864
          const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5865
0
{
5866
0
  int r;
5867
5868
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5869
0
  if (r) return r;
5870
5871
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
5872
0
  return r;
5873
0
}
5874
5875
extern int
5876
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5877
    OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5878
    OnigErrorInfo* einfo)
5879
11.9k
{
5880
11.9k
  int r;
5881
5882
11.9k
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5883
11.9k
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5884
5885
11.9k
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5886
11.9k
  if (r) goto err;
5887
5888
11.9k
  r = onig_compile(*reg, pattern, pattern_end, einfo);
5889
11.9k
  if (r) {
5890
0
  err:
5891
0
    onig_free(*reg);
5892
0
    *reg = NULL;
5893
0
  }
5894
11.9k
  return r;
5895
11.9k
}
5896
5897
extern int
5898
onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5899
0
{
5900
0
  return onig_init();
5901
0
}
5902
5903
extern int
5904
onig_init(void)
5905
418
{
5906
418
  if (onig_inited != 0)
5907
417
    return 0;
5908
5909
1
  onig_inited = 1;
5910
5911
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5912
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
5913
#endif
5914
5915
1
  onigenc_init();
5916
  /* onigenc_set_default_caseconv_table((UChar* )0); */
5917
5918
#ifdef ONIG_DEBUG_STATISTICS
5919
  onig_statistics_init();
5920
#endif
5921
5922
1
  return 0;
5923
418
}
5924
5925
5926
static OnigEndCallListItemType* EndCallTop;
5927
5928
extern void onig_add_end_call(void (*func)(void))
5929
0
{
5930
0
  OnigEndCallListItemType* item;
5931
5932
0
  item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
5933
0
  if (item == 0) return ;
5934
5935
0
  item->next = EndCallTop;
5936
0
  item->func = func;
5937
5938
0
  EndCallTop = item;
5939
0
}
5940
5941
static void
5942
exec_end_call_list(void)
5943
0
{
5944
0
  OnigEndCallListItemType* prev;
5945
0
  void (*func)(void);
5946
5947
0
  while (EndCallTop != 0) {
5948
0
    func = EndCallTop->func;
5949
0
    (*func)();
5950
5951
0
    prev = EndCallTop;
5952
0
    EndCallTop = EndCallTop->next;
5953
0
    xfree(prev);
5954
0
  }
5955
0
}
5956
5957
extern int
5958
onig_end(void)
5959
0
{
5960
0
  exec_end_call_list();
5961
5962
#ifdef ONIG_DEBUG_STATISTICS
5963
  onig_print_statistics(stderr);
5964
#endif
5965
5966
#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
5967
  _CrtDumpMemoryLeaks();
5968
#endif
5969
5970
0
  onig_inited = 0;
5971
5972
0
  return 0;
5973
0
}
5974
5975
extern int
5976
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
5977
0
{
5978
0
  OnigCodePoint n, *data;
5979
0
  OnigCodePoint low, high, x;
5980
5981
0
  GET_CODE_POINT(n, p);
5982
0
  data = (OnigCodePoint* )p;
5983
0
  data++;
5984
5985
0
  for (low = 0, high = n; low < high; ) {
5986
0
    x = (low + high) >> 1;
5987
0
    if (code > data[x * 2 + 1])
5988
0
      low = x + 1;
5989
0
    else
5990
0
      high = x;
5991
0
  }
5992
5993
0
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
5994
0
}
5995
5996
extern int
5997
onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
5998
6.63k
{
5999
6.63k
  int found;
6000
6001
6.63k
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6002
0
    if (IS_NULL(cc->mbuf)) {
6003
0
      found = 0;
6004
0
    }
6005
0
    else {
6006
0
      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6007
0
    }
6008
0
  }
6009
6.63k
  else {
6010
6.63k
    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6011
6.63k
  }
6012
6013
6.63k
  if (IS_NCCLASS_NOT(cc))
6014
2.51k
    return !found;
6015
4.12k
  else
6016
4.12k
    return found;
6017
6.63k
}
6018
6019
extern int
6020
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6021
6.63k
{
6022
6.63k
  int len;
6023
6024
6.63k
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6025
0
    len = 2;
6026
0
  }
6027
6.63k
  else {
6028
6.63k
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6029
6.63k
  }
6030
6.63k
  return onig_is_code_in_cc_len(len, code, cc);
6031
6.63k
}
6032
6033
6034
#ifdef ONIG_DEBUG
6035
6036
/* arguments type */
6037
# define ARG_SPECIAL     -1
6038
# define ARG_NON          0
6039
# define ARG_RELADDR      1
6040
# define ARG_ABSADDR      2
6041
# define ARG_LENGTH       3
6042
# define ARG_MEMNUM       4
6043
# define ARG_OPTION       5
6044
# define ARG_STATE_CHECK  6
6045
6046
OnigOpInfoType OnigOpInfo[] = {
6047
  { OP_FINISH,            "finish",          ARG_NON },
6048
  { OP_END,               "end",             ARG_NON },
6049
  { OP_EXACT1,            "exact1",          ARG_SPECIAL },
6050
  { OP_EXACT2,            "exact2",          ARG_SPECIAL },
6051
  { OP_EXACT3,            "exact3",          ARG_SPECIAL },
6052
  { OP_EXACT4,            "exact4",          ARG_SPECIAL },
6053
  { OP_EXACT5,            "exact5",          ARG_SPECIAL },
6054
  { OP_EXACTN,            "exactn",          ARG_SPECIAL },
6055
  { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
6056
  { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
6057
  { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
6058
  { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
6059
  { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
6060
  { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
6061
  { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
6062
  { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
6063
  { OP_CCLASS,            "cclass",          ARG_SPECIAL },
6064
  { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
6065
  { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
6066
  { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
6067
  { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
6068
  { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
6069
  { OP_ANYCHAR,           "anychar",         ARG_NON },
6070
  { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
6071
  { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
6072
  { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
6073
  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6074
  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6075
  { OP_WORD,                "word",            ARG_NON },
6076
  { OP_NOT_WORD,            "not-word",        ARG_NON },
6077
  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
6078
  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
6079
  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
6080
  { OP_WORD_END,            "word-end",        ARG_NON },
6081
  { OP_ASCII_WORD,          "ascii-word",           ARG_NON },
6082
  { OP_NOT_ASCII_WORD,      "not-ascii-word",       ARG_NON },
6083
  { OP_ASCII_WORD_BOUND,    "ascii-word-bound",     ARG_NON },
6084
  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6085
  { OP_ASCII_WORD_BEGIN,    "ascii-word-begin",     ARG_NON },
6086
  { OP_ASCII_WORD_END,      "ascii-word-end",       ARG_NON },
6087
  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
6088
  { OP_END_BUF,             "end-buf",         ARG_NON },
6089
  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
6090
  { OP_END_LINE,            "end-line",        ARG_NON },
6091
  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
6092
  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
6093
  { OP_BACKREF1,            "backref1",             ARG_NON },
6094
  { OP_BACKREF2,            "backref2",             ARG_NON },
6095
  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
6096
  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
6097
  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
6098
  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
6099
  { OP_BACKREF_WITH_LEVEL,  "backref_at_level",     ARG_SPECIAL },
6100
  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
6101
  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
6102
  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
6103
  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
6104
  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
6105
  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
6106
  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
6107
  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
6108
  { OP_KEEP,                "keep",                 ARG_NON },
6109
  { OP_FAIL,                "fail",                 ARG_NON },
6110
  { OP_JUMP,                "jump",                 ARG_RELADDR },
6111
  { OP_PUSH,                "push",                 ARG_RELADDR },
6112
  { OP_POP,                 "pop",                  ARG_NON },
6113
  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
6114
  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
6115
  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
6116
  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
6117
  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
6118
  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
6119
  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
6120
  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
6121
  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
6122
  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
6123
  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
6124
  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
6125
  { OP_PUSH_POS,             "push-pos",             ARG_NON },
6126
  { OP_POP_POS,              "pop-pos",              ARG_NON },
6127
  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
6128
  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
6129
  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
6130
  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
6131
  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
6132
  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6133
  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6134
  { OP_PUSH_ABSENT_POS,      "push-absent-pos",      ARG_NON },
6135
  { OP_ABSENT,               "absent",               ARG_RELADDR },
6136
  { OP_ABSENT_END,           "absent-end",           ARG_NON },
6137
  { OP_CALL,                 "call",                 ARG_ABSADDR },
6138
  { OP_RETURN,               "return",               ARG_NON },
6139
  { OP_CONDITION,            "condition",            ARG_SPECIAL },
6140
  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
6141
  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6142
  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
6143
  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
6144
  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6145
    "state-check-anychar-ml*", ARG_STATE_CHECK },
6146
  { -1, "", ARG_NON }
6147
};
6148
6149
static const char*
6150
op2name(int opcode)
6151
{
6152
  int i;
6153
6154
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6155
    if (opcode == OnigOpInfo[i].opcode)
6156
      return OnigOpInfo[i].name;
6157
  }
6158
  return "";
6159
}
6160
6161
static int
6162
op2arg_type(int opcode)
6163
{
6164
  int i;
6165
6166
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6167
    if (opcode == OnigOpInfo[i].opcode)
6168
      return OnigOpInfo[i].arg_type;
6169
  }
6170
  return ARG_SPECIAL;
6171
}
6172
6173
# ifdef ONIG_DEBUG_PARSE_TREE
6174
static void
6175
Indent(FILE* f, int indent)
6176
{
6177
  int i;
6178
  for (i = 0; i < indent; i++) putc(' ', f);
6179
}
6180
# endif /* ONIG_DEBUG_PARSE_TREE */
6181
6182
static void
6183
p_string(FILE* f, ptrdiff_t len, UChar* s)
6184
{
6185
  fputs(":", f);
6186
  while (len-- > 0) { fputc(*s++, f); }
6187
}
6188
6189
static void
6190
p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6191
{
6192
  int x = len * mb_len;
6193
6194
  fprintf(f, ":%d:", len);
6195
  while (x-- > 0) { fputc(*s++, f); }
6196
}
6197
6198
extern void
6199
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6200
                              OnigEncoding enc)
6201
{
6202
  int i, n, arg_type;
6203
  RelAddrType addr;
6204
  LengthType len;
6205
  MemNumType mem;
6206
  StateCheckNumType scn;
6207
  OnigCodePoint code;
6208
  UChar *q;
6209
6210
  fprintf(f, "[%s", op2name(*bp));
6211
  arg_type = op2arg_type(*bp);
6212
  if (arg_type != ARG_SPECIAL) {
6213
    bp++;
6214
    switch (arg_type) {
6215
    case ARG_NON:
6216
      break;
6217
    case ARG_RELADDR:
6218
      GET_RELADDR_INC(addr, bp);
6219
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6220
      break;
6221
    case ARG_ABSADDR:
6222
      GET_ABSADDR_INC(addr, bp);
6223
      fprintf(f, ":(%d)", addr);
6224
      break;
6225
    case ARG_LENGTH:
6226
      GET_LENGTH_INC(len, bp);
6227
      fprintf(f, ":%d", len);
6228
      break;
6229
    case ARG_MEMNUM:
6230
      mem = *((MemNumType* )bp);
6231
      bp += SIZE_MEMNUM;
6232
      fprintf(f, ":%d", mem);
6233
      break;
6234
    case ARG_OPTION:
6235
      {
6236
  OnigOptionType option = *((OnigOptionType* )bp);
6237
  bp += SIZE_OPTION;
6238
  fprintf(f, ":%d", option);
6239
      }
6240
      break;
6241
6242
    case ARG_STATE_CHECK:
6243
      scn = *((StateCheckNumType* )bp);
6244
      bp += SIZE_STATE_CHECK_NUM;
6245
      fprintf(f, ":%d", scn);
6246
      break;
6247
    }
6248
  }
6249
  else {
6250
    switch (*bp++) {
6251
    case OP_EXACT1:
6252
    case OP_ANYCHAR_STAR_PEEK_NEXT:
6253
    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6254
      p_string(f, 1, bp++); break;
6255
    case OP_EXACT2:
6256
      p_string(f, 2, bp); bp += 2; break;
6257
    case OP_EXACT3:
6258
      p_string(f, 3, bp); bp += 3; break;
6259
    case OP_EXACT4:
6260
      p_string(f, 4, bp); bp += 4; break;
6261
    case OP_EXACT5:
6262
      p_string(f, 5, bp); bp += 5; break;
6263
    case OP_EXACTN:
6264
      GET_LENGTH_INC(len, bp);
6265
      p_len_string(f, len, 1, bp);
6266
      bp += len;
6267
      break;
6268
6269
    case OP_EXACTMB2N1:
6270
      p_string(f, 2, bp); bp += 2; break;
6271
    case OP_EXACTMB2N2:
6272
      p_string(f, 4, bp); bp += 4; break;
6273
    case OP_EXACTMB2N3:
6274
      p_string(f, 6, bp); bp += 6; break;
6275
    case OP_EXACTMB2N:
6276
      GET_LENGTH_INC(len, bp);
6277
      p_len_string(f, len, 2, bp);
6278
      bp += len * 2;
6279
      break;
6280
    case OP_EXACTMB3N:
6281
      GET_LENGTH_INC(len, bp);
6282
      p_len_string(f, len, 3, bp);
6283
      bp += len * 3;
6284
      break;
6285
    case OP_EXACTMBN:
6286
      {
6287
  int mb_len;
6288
6289
  GET_LENGTH_INC(mb_len, bp);
6290
  GET_LENGTH_INC(len, bp);
6291
  fprintf(f, ":%d:%d:", mb_len, len);
6292
  n = len * mb_len;
6293
  while (n-- > 0) { fputc(*bp++, f); }
6294
      }
6295
      break;
6296
6297
    case OP_EXACT1_IC:
6298
      len = enclen(enc, bp, bpend);
6299
      p_string(f, len, bp);
6300
      bp += len;
6301
      break;
6302
    case OP_EXACTN_IC:
6303
      GET_LENGTH_INC(len, bp);
6304
      p_len_string(f, len, 1, bp);
6305
      bp += len;
6306
      break;
6307
6308
    case OP_CCLASS:
6309
      n = bitset_on_num((BitSetRef )bp);
6310
      bp += SIZE_BITSET;
6311
      fprintf(f, ":%d", n);
6312
      break;
6313
6314
    case OP_CCLASS_NOT:
6315
      n = bitset_on_num((BitSetRef )bp);
6316
      bp += SIZE_BITSET;
6317
      fprintf(f, ":%d", n);
6318
      break;
6319
6320
    case OP_CCLASS_MB:
6321
    case OP_CCLASS_MB_NOT:
6322
      GET_LENGTH_INC(len, bp);
6323
      q = bp;
6324
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6325
      ALIGNMENT_RIGHT(q);
6326
# endif
6327
      GET_CODE_POINT(code, q);
6328
      bp += len;
6329
      fprintf(f, ":%d:%d", (int )code, len);
6330
      break;
6331
6332
    case OP_CCLASS_MIX:
6333
    case OP_CCLASS_MIX_NOT:
6334
      n = bitset_on_num((BitSetRef )bp);
6335
      bp += SIZE_BITSET;
6336
      GET_LENGTH_INC(len, bp);
6337
      q = bp;
6338
# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6339
      ALIGNMENT_RIGHT(q);
6340
# endif
6341
      GET_CODE_POINT(code, q);
6342
      bp += len;
6343
      fprintf(f, ":%d:%d:%d", n, (int )code, len);
6344
      break;
6345
6346
    case OP_BACKREFN_IC:
6347
      mem = *((MemNumType* )bp);
6348
      bp += SIZE_MEMNUM;
6349
      fprintf(f, ":%d", mem);
6350
      break;
6351
6352
    case OP_BACKREF_MULTI_IC:
6353
    case OP_BACKREF_MULTI:
6354
      fputs(" ", f);
6355
      GET_LENGTH_INC(len, bp);
6356
      for (i = 0; i < len; i++) {
6357
  GET_MEMNUM_INC(mem, bp);
6358
  if (i > 0) fputs(", ", f);
6359
  fprintf(f, "%d", mem);
6360
      }
6361
      break;
6362
6363
    case OP_BACKREF_WITH_LEVEL:
6364
      {
6365
  OnigOptionType option;
6366
  LengthType level;
6367
6368
  GET_OPTION_INC(option, bp);
6369
  fprintf(f, ":%d", option);
6370
  GET_LENGTH_INC(level, bp);
6371
  fprintf(f, ":%d", level);
6372
6373
  fputs(" ", f);
6374
  GET_LENGTH_INC(len, bp);
6375
  for (i = 0; i < len; i++) {
6376
    GET_MEMNUM_INC(mem, bp);
6377
    if (i > 0) fputs(", ", f);
6378
    fprintf(f, "%d", mem);
6379
  }
6380
      }
6381
      break;
6382
6383
    case OP_REPEAT:
6384
    case OP_REPEAT_NG:
6385
      {
6386
  mem = *((MemNumType* )bp);
6387
  bp += SIZE_MEMNUM;
6388
  addr = *((RelAddrType* )bp);
6389
  bp += SIZE_RELADDR;
6390
  fprintf(f, ":%d:%d", mem, addr);
6391
      }
6392
      break;
6393
6394
    case OP_PUSH_OR_JUMP_EXACT1:
6395
    case OP_PUSH_IF_PEEK_NEXT:
6396
      addr = *((RelAddrType* )bp);
6397
      bp += SIZE_RELADDR;
6398
      fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6399
      p_string(f, 1, bp);
6400
      bp += 1;
6401
      break;
6402
6403
    case OP_LOOK_BEHIND:
6404
      GET_LENGTH_INC(len, bp);
6405
      fprintf(f, ":%d", len);
6406
      break;
6407
6408
    case OP_PUSH_LOOK_BEHIND_NOT:
6409
      GET_RELADDR_INC(addr, bp);
6410
      GET_LENGTH_INC(len, bp);
6411
      fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6412
      break;
6413
6414
    case OP_STATE_CHECK_PUSH:
6415
    case OP_STATE_CHECK_PUSH_OR_JUMP:
6416
      scn = *((StateCheckNumType* )bp);
6417
      bp += SIZE_STATE_CHECK_NUM;
6418
      addr = *((RelAddrType* )bp);
6419
      bp += SIZE_RELADDR;
6420
      fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6421
      break;
6422
6423
    case OP_CONDITION:
6424
      GET_MEMNUM_INC(mem, bp);
6425
      GET_RELADDR_INC(addr, bp);
6426
      fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6427
      break;
6428
6429
    default:
6430
      fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6431
        bp[-1]);
6432
    }
6433
  }
6434
  fputs("]", f);
6435
  if (nextp) *nextp = bp;
6436
}
6437
6438
# ifdef ONIG_DEBUG_COMPILE
6439
static void
6440
print_compiled_byte_code_list(FILE* f, regex_t* reg)
6441
{
6442
  int ncode;
6443
  UChar* bp = reg->p;
6444
  UChar* end = reg->p + reg->used;
6445
6446
  fprintf(f, "code length: %d", reg->used);
6447
6448
  ncode = -1;
6449
  while (bp < end) {
6450
    ncode++;
6451
    if (ncode % 5 == 0)
6452
      fprintf(f, "\n%ld:", bp - reg->p);
6453
    else
6454
      fprintf(f, " %ld:", bp - reg->p);
6455
    onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6456
  }
6457
6458
  fprintf(f, "\n");
6459
}
6460
# endif /* ONIG_DEBUG_COMPILE */
6461
6462
# ifdef ONIG_DEBUG_PARSE_TREE
6463
static void
6464
print_indent_tree(FILE* f, Node* node, int indent)
6465
{
6466
  int i, type, container_p = 0;
6467
  int add = 3;
6468
  UChar* p;
6469
6470
  Indent(f, indent);
6471
  if (IS_NULL(node)) {
6472
    fprintf(f, "ERROR: null node!!!\n");
6473
    exit (0);
6474
  }
6475
6476
  type = NTYPE(node);
6477
  switch (type) {
6478
  case NT_LIST:
6479
  case NT_ALT:
6480
    if (NTYPE(node) == NT_LIST)
6481
      fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6482
    else
6483
      fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6484
6485
    print_indent_tree(f, NCAR(node), indent + add);
6486
    while (IS_NOT_NULL(node = NCDR(node))) {
6487
      if (NTYPE(node) != type) {
6488
  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6489
  exit(0);
6490
      }
6491
      print_indent_tree(f, NCAR(node), indent + add);
6492
    }
6493
    break;
6494
6495
  case NT_STR:
6496
    fprintf(f, "<string%s:%"PRIxPTR">",
6497
      (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6498
    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6499
      if (*p >= 0x20 && *p < 0x7f)
6500
  fputc(*p, f);
6501
      else {
6502
  fprintf(f, " 0x%02x", *p);
6503
      }
6504
    }
6505
    break;
6506
6507
  case NT_CCLASS:
6508
    fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6509
    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6510
    if (NCCLASS(node)->mbuf) {
6511
      BBuf* bbuf = NCCLASS(node)->mbuf;
6512
      OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6513
      OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6514
      fprintf(f, "%d", *data++);
6515
      for (; data < end; data+=2) {
6516
  fprintf(f, ",");
6517
  fprintf(f, "%04x-%04x", data[0], data[1]);
6518
      }
6519
    }
6520
    break;
6521
6522
  case NT_CTYPE:
6523
    fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6524
    switch (NCTYPE(node)->ctype) {
6525
    case ONIGENC_CTYPE_WORD:
6526
      if (NCTYPE(node)->not != 0)
6527
  fputs("not word",       f);
6528
      else
6529
  fputs("word",           f);
6530
      break;
6531
6532
    default:
6533
      fprintf(f, "ERROR: undefined ctype.\n");
6534
      exit(0);
6535
    }
6536
    break;
6537
6538
  case NT_CANY:
6539
    fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6540
    break;
6541
6542
  case NT_ANCHOR:
6543
    fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6544
    switch (NANCHOR(node)->type) {
6545
    case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
6546
    case ANCHOR_END_BUF:        fputs("end buf",        f); break;
6547
    case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
6548
    case ANCHOR_END_LINE:       fputs("end line",       f); break;
6549
    case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
6550
    case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6551
6552
    case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
6553
    case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
6554
#  ifdef USE_WORD_BEGIN_END
6555
    case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
6556
    case ANCHOR_WORD_END:        fputs("word end", f);       break;
6557
#  endif
6558
    case ANCHOR_PREC_READ:       fputs("prec read",      f); container_p = TRUE; break;
6559
    case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); container_p = TRUE; break;
6560
    case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); container_p = TRUE; break;
6561
    case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6562
    case ANCHOR_KEEP:            fputs("keep",f);            break;
6563
6564
    default:
6565
      fprintf(f, "ERROR: undefined anchor type.\n");
6566
      break;
6567
    }
6568
    break;
6569
6570
  case NT_BREF:
6571
    {
6572
      int* p;
6573
      BRefNode* br = NBREF(node);
6574
      p = BACKREFS_P(br);
6575
      fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6576
      for (i = 0; i < br->back_num; i++) {
6577
  if (i > 0) fputs(", ", f);
6578
  fprintf(f, "%d", p[i]);
6579
      }
6580
    }
6581
    break;
6582
6583
#  ifdef USE_SUBEXP_CALL
6584
  case NT_CALL:
6585
    {
6586
      CallNode* cn = NCALL(node);
6587
      fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6588
      p_string(f, cn->name_end - cn->name, cn->name);
6589
    }
6590
    break;
6591
#  endif
6592
6593
  case NT_QTFR:
6594
    fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6595
      NQTFR(node)->lower, NQTFR(node)->upper,
6596
      (NQTFR(node)->greedy ? "" : "?"));
6597
    print_indent_tree(f, NQTFR(node)->target, indent + add);
6598
    break;
6599
6600
  case NT_ENCLOSE:
6601
    fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6602
    switch (NENCLOSE(node)->type) {
6603
    case ENCLOSE_OPTION:
6604
      fprintf(f, "option:%d", NENCLOSE(node)->option);
6605
      break;
6606
    case ENCLOSE_MEMORY:
6607
      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6608
      break;
6609
    case ENCLOSE_STOP_BACKTRACK:
6610
      fprintf(f, "stop-bt");
6611
      break;
6612
    case ENCLOSE_CONDITION:
6613
      fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6614
      break;
6615
    case ENCLOSE_ABSENT:
6616
      fprintf(f, "absent");
6617
      break;
6618
6619
    default:
6620
      break;
6621
    }
6622
    fprintf(f, "\n");
6623
    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6624
    break;
6625
6626
  default:
6627
    fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6628
    break;
6629
  }
6630
6631
  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6632
      type != NT_ENCLOSE)
6633
    fprintf(f, "\n");
6634
6635
  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6636
6637
  fflush(f);
6638
}
6639
6640
static void
6641
print_tree(FILE* f, Node* node)
6642
{
6643
  print_indent_tree(f, node, 0);
6644
}
6645
# endif /* ONIG_DEBUG_PARSE_TREE */
6646
#endif /* ONIG_DEBUG */