Coverage Report

Created: 2023-11-27 06:38

/src/jq/modules/oniguruma/src/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "regparse.h"
31
32
0
#define OPS_INIT_SIZE  8
33
34
#define ND_IS_REAL_IGNORECASE(node) \
35
0
  (ND_IS_IGNORECASE(node) && !ND_STRING_IS_CRUDE(node))
36
37
typedef struct {
38
  OnigLen min;
39
  OnigLen max;
40
} MinMaxLen;
41
42
typedef struct {
43
  OnigLen min;
44
  OnigLen max;
45
  int min_is_sure;
46
} MinMaxCharLen;
47
48
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
49
50
static OnigLen node_min_byte_len(Node* node, ParseEnv* env);
51
52
static int
53
ops_init(regex_t* reg, int init_alloc_size)
54
0
{
55
0
  Operation* p;
56
0
  size_t size;
57
58
0
  if (init_alloc_size <= 0)
59
0
    return ONIGERR_PARSER_BUG;
60
61
0
  size = sizeof(Operation) * init_alloc_size;
62
0
  p = (Operation* )xrealloc(reg->ops, size);
63
0
  CHECK_NULL_RETURN_MEMERR(p);
64
0
  reg->ops = p;
65
0
#ifdef USE_DIRECT_THREADED_CODE
66
0
  {
67
0
    enum OpCode* cp;
68
0
    size = sizeof(enum OpCode) * init_alloc_size;
69
0
    cp = (enum OpCode* )xrealloc(reg->ocs, size);
70
0
    CHECK_NULL_RETURN_MEMERR(cp);
71
0
    reg->ocs = cp;
72
0
  }
73
0
#endif
74
75
0
  reg->ops_curr  = 0; /* !!! not yet done ops_new() */
76
0
  reg->ops_alloc = init_alloc_size;
77
0
  reg->ops_used  = 0;
78
79
0
  return ONIG_NORMAL;
80
0
}
81
82
static int
83
ops_resize(regex_t* reg, int n)
84
0
{
85
0
#ifdef USE_DIRECT_THREADED_CODE
86
0
  enum OpCode* cp;
87
0
#endif
88
0
  Operation* p;
89
0
  size_t size;
90
91
0
  if (n == reg->ops_alloc) return ONIG_NORMAL;
92
0
  if (n <= 0) return ONIGERR_PARSER_BUG;
93
94
0
  size = sizeof(Operation) * n;
95
0
  p = (Operation* )xrealloc(reg->ops, size);
96
0
  CHECK_NULL_RETURN_MEMERR(p);
97
0
  reg->ops = p;
98
99
0
#ifdef USE_DIRECT_THREADED_CODE
100
0
  size = sizeof(enum OpCode) * n;
101
0
  cp = (enum OpCode* )xrealloc(reg->ocs, size);
102
0
  CHECK_NULL_RETURN_MEMERR(cp);
103
0
  reg->ocs = cp;
104
0
#endif
105
106
0
  reg->ops_alloc = n;
107
0
  if (reg->ops_used == 0)
108
0
    reg->ops_curr = 0;
109
0
  else
110
0
    reg->ops_curr = reg->ops + (reg->ops_used - 1);
111
112
0
  return ONIG_NORMAL;
113
0
}
114
115
static int
116
ops_new(regex_t* reg)
117
0
{
118
0
  if (reg->ops_used >= reg->ops_alloc) {
119
0
    int r = ops_resize(reg, reg->ops_alloc << 1);
120
0
    if (r != ONIG_NORMAL) return r;
121
0
  }
122
123
0
  reg->ops_curr = reg->ops + reg->ops_used;
124
0
  reg->ops_used++;
125
126
0
  xmemset(reg->ops_curr, 0, sizeof(Operation));
127
0
  return ONIG_NORMAL;
128
0
}
129
130
static int
131
is_in_string_pool(regex_t* reg, UChar* s)
132
0
{
133
0
  return (s >= reg->string_pool && s < reg->string_pool_end);
134
0
}
135
136
static void
137
ops_free(regex_t* reg)
138
0
{
139
0
  int i;
140
141
0
  if (IS_NULL(reg->ops)) return ;
142
143
0
  for (i = 0; i < (int )reg->ops_used; i++) {
144
0
    enum OpCode opcode;
145
0
    Operation* op;
146
147
0
    op = reg->ops + i;
148
149
0
#ifdef USE_DIRECT_THREADED_CODE
150
0
    opcode = *(reg->ocs + i);
151
#else
152
    opcode = op->opcode;
153
#endif
154
155
0
    switch (opcode) {
156
0
    case OP_STR_MBN:
157
0
      if (! is_in_string_pool(reg, op->exact_len_n.s))
158
0
        xfree(op->exact_len_n.s);
159
0
      break;
160
0
    case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N:
161
0
      if (! is_in_string_pool(reg, op->exact_n.s))
162
0
        xfree(op->exact_n.s);
163
0
      break;
164
0
    case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
165
0
    case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
166
0
    case OP_STR_MB2N3:
167
0
      break;
168
169
0
    case OP_CCLASS_NOT: case OP_CCLASS:
170
0
      xfree(op->cclass.bsp);
171
0
      break;
172
173
0
    case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
174
0
      xfree(op->cclass_mb.mb);
175
0
      break;
176
0
    case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
177
0
      xfree(op->cclass_mix.mb);
178
0
      xfree(op->cclass_mix.bsp);
179
0
      break;
180
181
0
    case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
182
0
      break;
183
0
    case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC:
184
0
    case OP_BACKREF_CHECK:
185
0
#ifdef USE_BACKREF_WITH_LEVEL
186
0
    case OP_BACKREF_WITH_LEVEL:
187
0
    case OP_BACKREF_WITH_LEVEL_IC:
188
0
    case OP_BACKREF_CHECK_WITH_LEVEL:
189
0
#endif
190
0
      if (op->backref_general.num != 1)
191
0
        xfree(op->backref_general.ns);
192
0
      break;
193
194
0
    default:
195
0
      break;
196
0
    }
197
0
  }
198
199
0
  xfree(reg->ops);
200
0
#ifdef USE_DIRECT_THREADED_CODE
201
0
  xfree(reg->ocs);
202
0
  reg->ocs = 0;
203
0
#endif
204
205
0
  reg->ops = 0;
206
0
  reg->ops_curr  = 0;
207
0
  reg->ops_alloc = 0;
208
0
  reg->ops_used  = 0;
209
0
}
210
211
static int
212
ops_calc_size_of_string_pool(regex_t* reg)
213
0
{
214
0
  int i;
215
0
  int total;
216
217
0
  if (IS_NULL(reg->ops)) return 0;
218
219
0
  total = 0;
220
0
  for (i = 0; i < (int )reg->ops_used; i++) {
221
0
    enum OpCode opcode;
222
0
    Operation* op;
223
224
0
    op = reg->ops + i;
225
0
#ifdef USE_DIRECT_THREADED_CODE
226
0
    opcode = *(reg->ocs + i);
227
#else
228
    opcode = op->opcode;
229
#endif
230
231
0
    switch (opcode) {
232
0
    case OP_STR_MBN:
233
0
      total += op->exact_len_n.len * op->exact_len_n.n;
234
0
      break;
235
0
    case OP_STR_N:
236
0
    case OP_STR_MB2N:
237
0
      total += op->exact_n.n * 2;
238
0
      break;
239
0
    case OP_STR_MB3N:
240
0
      total += op->exact_n.n * 3;
241
0
      break;
242
243
0
    default:
244
0
      break;
245
0
    }
246
0
  }
247
248
0
  return total;
249
0
}
250
251
static int
252
ops_make_string_pool(regex_t* reg)
253
0
{
254
0
  int i;
255
0
  int len;
256
0
  int size;
257
0
  UChar* pool;
258
0
  UChar* curr;
259
260
0
  size = ops_calc_size_of_string_pool(reg);
261
0
  if (size <= 0) {
262
0
    return 0;
263
0
  }
264
265
0
  curr = pool = (UChar* )xmalloc((size_t )size);
266
0
  CHECK_NULL_RETURN_MEMERR(pool);
267
268
0
  for (i = 0; i < (int )reg->ops_used; i++) {
269
0
    enum OpCode opcode;
270
0
    Operation* op;
271
272
0
    op = reg->ops + i;
273
0
#ifdef USE_DIRECT_THREADED_CODE
274
0
    opcode = *(reg->ocs + i);
275
#else
276
    opcode = op->opcode;
277
#endif
278
279
0
    switch (opcode) {
280
0
    case OP_STR_MBN:
281
0
      len = op->exact_len_n.len * op->exact_len_n.n;
282
0
      xmemcpy(curr, op->exact_len_n.s, len);
283
0
      xfree(op->exact_len_n.s);
284
0
      op->exact_len_n.s = curr;
285
0
      curr += len;
286
0
      break;
287
0
    case OP_STR_N:
288
0
      len = op->exact_n.n;
289
0
    copy:
290
0
      xmemcpy(curr, op->exact_n.s, len);
291
0
      xfree(op->exact_n.s);
292
0
      op->exact_n.s = curr;
293
0
      curr += len;
294
0
      break;
295
0
    case OP_STR_MB2N:
296
0
      len = op->exact_n.n * 2;
297
0
      goto copy;
298
0
      break;
299
0
    case OP_STR_MB3N:
300
0
      len = op->exact_n.n * 3;
301
0
      goto copy;
302
0
      break;
303
304
0
    default:
305
0
      break;
306
0
    }
307
0
  }
308
309
0
  reg->string_pool     = pool;
310
0
  reg->string_pool_end = pool + size;
311
0
  return 0;
312
0
}
313
314
extern OnigCaseFoldType
315
onig_get_default_case_fold_flag(void)
316
0
{
317
0
  return OnigDefaultCaseFoldFlag;
318
0
}
319
320
extern int
321
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
322
0
{
323
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
324
0
  return 0;
325
0
}
326
327
static int
328
len_multiply_cmp(OnigLen x, int y, OnigLen v)
329
0
{
330
0
  if (x == 0 || y == 0) return -1;
331
332
0
  if (x < INFINITE_LEN / y) {
333
0
    OnigLen xy = x * (OnigLen )y;
334
0
    if (xy > v) return 1;
335
0
    else {
336
0
      if (xy == v) return 0;
337
0
      else return -1;
338
0
    }
339
0
  }
340
0
  else
341
0
    return v == INFINITE_LEN ? 0 : 1;
342
0
}
343
344
extern int
345
onig_positive_int_multiply(int x, int y)
346
0
{
347
0
  if (x == 0 || y == 0) return 0;
348
349
0
  if (x < ONIG_INT_MAX / y)
350
0
    return x * y;
351
0
  else
352
0
    return -1;
353
0
}
354
355
356
static void
357
node_swap(Node* a, Node* b)
358
0
{
359
0
  Node c;
360
361
0
  c = *a; *a = *b; *b = c;
362
363
0
  if (ND_TYPE(a) == ND_STRING) {
364
0
    StrNode* sn = STR_(a);
365
0
    if (sn->capacity == 0) {
366
0
      int len = (int )(sn->end - sn->s);
367
0
      sn->s   = sn->buf;
368
0
      sn->end = sn->s + len;
369
0
    }
370
0
  }
371
372
0
  if (ND_TYPE(b) == ND_STRING) {
373
0
    StrNode* sn = STR_(b);
374
0
    if (sn->capacity == 0) {
375
0
      int len = (int )(sn->end - sn->s);
376
0
      sn->s   = sn->buf;
377
0
      sn->end = sn->s + len;
378
0
    }
379
0
  }
380
0
}
381
382
static int
383
node_list_len(Node* list)
384
0
{
385
0
  int len;
386
387
0
  len = 1;
388
0
  while (IS_NOT_NULL(ND_CDR(list))) {
389
0
    list = ND_CDR(list);
390
0
    len++;
391
0
  }
392
393
0
  return len;
394
0
}
395
396
static Node*
397
node_list_add(Node* list, Node* x)
398
0
{
399
0
  Node *n;
400
401
0
  n = onig_node_new_list(x, NULL);
402
0
  if (IS_NULL(n)) return NULL_NODE;
403
404
0
  if (IS_NOT_NULL(list)) {
405
0
    while (IS_NOT_NULL(ND_CDR(list)))
406
0
      list = ND_CDR(list);
407
408
0
    ND_CDR(list) = n;
409
0
  }
410
411
0
  return n;
412
0
}
413
414
static int
415
node_str_node_cat(Node* node, Node* add)
416
0
{
417
0
  int r;
418
419
0
  if (ND_STATUS(node) != ND_STATUS(add))
420
0
    return ONIGERR_TYPE_BUG;
421
422
0
  if (STR_(node)->flag != STR_(add)->flag)
423
0
    return ONIGERR_TYPE_BUG;
424
425
0
  r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
426
0
  if (r != 0) return r;
427
428
0
  return 0;
429
0
}
430
431
static void
432
node_conv_to_str_node(Node* node, Node* ref_node)
433
0
{
434
0
  xmemset(node, 0, sizeof(*node));
435
0
  ND_SET_TYPE(node, ND_STRING);
436
0
  ND_STATUS(node) = ND_STATUS(ref_node);
437
438
0
  STR_(node)->flag     = STR_(ref_node)->flag;
439
0
  STR_(node)->s        = STR_(node)->buf;
440
0
  STR_(node)->end      = STR_(node)->buf;
441
0
  STR_(node)->capacity = 0;
442
0
}
443
444
static OnigLen
445
distance_add(OnigLen d1, OnigLen d2)
446
0
{
447
0
  if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
448
0
    return INFINITE_LEN;
449
0
  else {
450
0
    if (d1 <= INFINITE_LEN - d2) return d1 + d2;
451
0
    else return INFINITE_LEN;
452
0
  }
453
0
}
454
455
static OnigLen
456
distance_multiply(OnigLen d, int m)
457
0
{
458
0
  if (m == 0) return 0;
459
460
0
  if (d < INFINITE_LEN / m)
461
0
    return d * m;
462
0
  else
463
0
    return INFINITE_LEN;
464
0
}
465
466
static int
467
bitset_is_empty(BitSetRef bs)
468
0
{
469
0
  int i;
470
471
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {
472
0
    if (bs[i] != 0) return 0;
473
0
  }
474
0
  return 1;
475
0
}
476
477
#ifdef USE_CALL
478
479
static int
480
unset_addr_list_init(UnsetAddrList* list, int size)
481
0
{
482
0
  UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
483
0
  CHECK_NULL_RETURN_MEMERR(p);
484
485
0
  list->num   = 0;
486
0
  list->alloc = size;
487
0
  list->us    = p;
488
0
  return 0;
489
0
}
490
491
static void
492
unset_addr_list_end(UnsetAddrList* list)
493
0
{
494
0
  if (IS_NOT_NULL(list->us))
495
0
    xfree(list->us);
496
0
}
497
498
static int
499
unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
500
0
{
501
0
  UnsetAddr* p;
502
0
  int size;
503
504
0
  if (list->num >= list->alloc) {
505
0
    size = list->alloc * 2;
506
0
    p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size);
507
0
    CHECK_NULL_RETURN_MEMERR(p);
508
0
    list->alloc = size;
509
0
    list->us    = p;
510
0
  }
511
512
0
  list->us[list->num].offset = offset;
513
0
  list->us[list->num].target = node;
514
0
  list->num++;
515
0
  return 0;
516
0
}
517
#endif /* USE_CALL */
518
519
enum CharLenReturnType {
520
  CHAR_LEN_NORMAL = 0,       /* fixed or variable */
521
  CHAR_LEN_TOP_ALT_FIXED = 1
522
};
523
524
static int
525
mmcl_fixed(MinMaxCharLen* c)
526
0
{
527
0
  return (c->min == c->max && c->min != INFINITE_LEN);
528
0
}
529
530
static void
531
mmcl_set(MinMaxCharLen* l, OnigLen len)
532
0
{
533
0
  l->min = len;
534
0
  l->max = len;
535
0
  l->min_is_sure = TRUE;
536
0
}
537
538
static void
539
mmcl_set_min_max(MinMaxCharLen* l, OnigLen min, OnigLen max, int min_is_sure)
540
0
{
541
0
  l->min = min;
542
0
  l->max = max;
543
0
  l->min_is_sure = min_is_sure;
544
0
}
545
546
static void
547
mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)
548
0
{
549
0
  to->min = distance_add(to->min, add->min);
550
0
  to->max = distance_add(to->max, add->max);
551
552
0
  to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;
553
0
}
554
555
static void
556
mmcl_multiply(MinMaxCharLen* to, int m)
557
0
{
558
0
  to->min = distance_multiply(to->min, m);
559
0
  to->max = distance_multiply(to->max, m);
560
0
}
561
562
static void
563
mmcl_repeat_range_multiply(MinMaxCharLen* to, int mlow, int mhigh)
564
0
{
565
0
  to->min = distance_multiply(to->min, mlow);
566
567
0
  if (IS_INFINITE_REPEAT(mhigh))
568
0
    to->max = INFINITE_LEN;
569
0
  else
570
0
    to->max = distance_multiply(to->max, mhigh);
571
0
}
572
573
static void
574
mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)
575
0
{
576
0
  if (to->min > alt->min) {
577
0
    to->min         = alt->min;
578
0
    to->min_is_sure = alt->min_is_sure;
579
0
  }
580
0
  else if (to->min == alt->min) {
581
0
    if (alt->min_is_sure != FALSE)
582
0
      to->min_is_sure = TRUE;
583
0
  }
584
585
0
  if (to->max < alt->max) to->max = alt->max;
586
0
}
587
588
#ifndef ONIG_DONT_OPTIMIZE
589
590
static int
591
mml_is_equal(MinMaxLen* a, MinMaxLen* b)
592
0
{
593
0
  return a->min == b->min && a->max == b->max;
594
0
}
595
596
static void
597
mml_set_min_max(MinMaxLen* l, OnigLen min, OnigLen max)
598
0
{
599
0
  l->min = min;
600
0
  l->max = max;
601
0
}
602
603
static void
604
mml_clear(MinMaxLen* l)
605
0
{
606
0
  l->min = l->max = 0;
607
0
}
608
609
static void
610
mml_copy(MinMaxLen* to, MinMaxLen* from)
611
0
{
612
0
  to->min = from->min;
613
0
  to->max = from->max;
614
0
}
615
616
static void
617
mml_add(MinMaxLen* to, MinMaxLen* add)
618
0
{
619
0
  to->min = distance_add(to->min, add->min);
620
0
  to->max = distance_add(to->max, add->max);
621
0
}
622
623
static void
624
mml_alt_merge(MinMaxLen* to, MinMaxLen* alt)
625
0
{
626
0
  if (to->min > alt->min) to->min = alt->min;
627
0
  if (to->max < alt->max) to->max = alt->max;
628
0
}
629
630
#endif
631
632
/* fixed size pattern node only */
633
static int
634
node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env,
635
               int level)
636
0
{
637
0
  MinMaxCharLen tci;
638
0
  int r = CHAR_LEN_NORMAL;
639
640
0
  level++;
641
642
0
  switch (ND_TYPE(node)) {
643
0
  case ND_LIST:
644
0
    {
645
0
      int first = TRUE;
646
0
      do {
647
0
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
648
0
        if (r < 0) break;
649
0
        if (first == TRUE) {
650
0
          *ci = tci;
651
0
          first = FALSE;
652
0
        }
653
0
        else
654
0
          mmcl_add(ci, &tci);
655
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
656
0
    }
657
0
    break;
658
659
0
  case ND_ALT:
660
0
    {
661
0
      int fixed;
662
663
0
      r = node_char_len1(ND_CAR(node), reg, ci, env, level);
664
0
      if (r < 0) break;
665
666
0
      fixed = TRUE;
667
0
      while (IS_NOT_NULL(node = ND_CDR(node))) {
668
0
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
669
0
        if (r < 0) break;
670
0
        if (! mmcl_fixed(&tci))
671
0
          fixed = FALSE;
672
0
        mmcl_alt_merge(ci, &tci);
673
0
      }
674
0
      if (r < 0) break;
675
676
0
      r = CHAR_LEN_NORMAL;
677
0
      if (mmcl_fixed(ci)) break;
678
679
0
      if (fixed == TRUE && level == 1) {
680
0
        r = CHAR_LEN_TOP_ALT_FIXED;
681
0
      }
682
0
    }
683
0
    break;
684
685
0
  case ND_STRING:
686
0
    {
687
0
      OnigLen clen;
688
0
      StrNode* sn = STR_(node);
689
0
      UChar *s = sn->s;
690
691
0
      if (ND_IS_REAL_IGNORECASE(node) &&
692
0
          CASE_FOLD_IS_NOT_ASCII_ONLY(env->case_fold_flag)) {
693
        /* Such a case is possible.
694
           ex. /(?i)(?<=\1)(a)/
695
           Backref node refer to capture group, but it doesn't tune yet.
696
         */
697
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
698
0
        break;
699
0
      }
700
701
0
      clen = 0;
702
0
      while (s < sn->end) {
703
0
        s += enclen(reg->enc, s);
704
0
        clen = distance_add(clen, 1);
705
0
      }
706
0
      mmcl_set(ci, clen);
707
0
    }
708
0
    break;
709
710
0
  case ND_QUANT:
711
0
    {
712
0
      QuantNode* qn = QUANT_(node);
713
714
0
      if (qn->lower == qn->upper) {
715
0
        if (qn->upper == 0) {
716
0
          mmcl_set(ci, 0);
717
0
        }
718
0
        else {
719
0
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
720
0
          if (r < 0) break;
721
0
          mmcl_multiply(ci, qn->lower);
722
0
        }
723
0
      }
724
0
      else {
725
0
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
726
0
        if (r < 0) break;
727
0
        mmcl_repeat_range_multiply(ci, qn->lower, qn->upper);
728
0
      }
729
0
    }
730
0
    break;
731
732
0
#ifdef USE_CALL
733
0
  case ND_CALL:
734
0
    if (ND_IS_RECURSION(node))
735
0
      mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
736
0
    else
737
0
      r = node_char_len1(ND_BODY(node), reg, ci, env, level);
738
0
    break;
739
0
#endif
740
741
0
  case ND_CTYPE:
742
0
  case ND_CCLASS:
743
0
    mmcl_set(ci, 1);
744
0
    break;
745
746
0
  case ND_BAG:
747
0
    {
748
0
      BagNode* en = BAG_(node);
749
750
0
      switch (en->type) {
751
0
      case BAG_MEMORY:
752
0
        if (ND_IS_FIXED_CLEN(node)) {
753
0
          mmcl_set_min_max(ci, en->min_char_len, en->max_char_len,
754
0
                           ND_IS_FIXED_CLEN_MIN_SURE(node));
755
0
        }
756
0
        else {
757
0
          if (ND_IS_MARK1(node)) {
758
0
            mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
759
0
          }
760
0
          else {
761
0
            ND_STATUS_ADD(node, MARK1);
762
0
            r = node_char_len1(ND_BODY(node), reg, ci, env, level);
763
0
            ND_STATUS_REMOVE(node, MARK1);
764
0
            if (r < 0) break;
765
766
0
            en->min_char_len = ci->min;
767
0
            en->max_char_len = ci->max;
768
0
            ND_STATUS_ADD(node, FIXED_CLEN);
769
0
            if (ci->min_is_sure != FALSE)
770
0
              ND_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);
771
0
          }
772
0
        }
773
        /* can't optimize look-behind if capture exists. */
774
0
        ci->min_is_sure = FALSE;
775
0
        break;
776
0
      case BAG_OPTION:
777
0
      case BAG_STOP_BACKTRACK:
778
0
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
779
0
        break;
780
0
      case BAG_IF_ELSE:
781
0
        {
782
0
          MinMaxCharLen eci;
783
784
0
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
785
0
          if (r < 0) break;
786
787
0
          if (IS_NOT_NULL(en->te.Then)) {
788
0
            r = node_char_len1(en->te.Then, reg, &tci, env, level);
789
0
            if (r < 0) break;
790
0
            mmcl_add(ci, &tci);
791
0
          }
792
793
0
          if (IS_NOT_NULL(en->te.Else)) {
794
0
            r = node_char_len1(en->te.Else, reg, &eci, env, level);
795
0
            if (r < 0) break;
796
0
          }
797
0
          else {
798
0
            mmcl_set(&eci, 0);
799
0
          }
800
801
0
          mmcl_alt_merge(ci, &eci);
802
0
        }
803
0
        break;
804
0
      default: /* never come here */
805
0
        r = ONIGERR_PARSER_BUG;
806
0
        break;
807
0
      }
808
0
    }
809
0
    break;
810
811
0
  case ND_GIMMICK:
812
0
    mmcl_set(ci, 0);
813
0
    break;
814
815
0
  case ND_ANCHOR:
816
0
  zero:
817
0
    mmcl_set(ci, 0);
818
    /* can't optimize look-behind if anchor exists. */
819
0
    ci->min_is_sure = FALSE;
820
0
    break;
821
822
0
  case ND_BACKREF:
823
0
    if (ND_IS_CHECKER(node))
824
0
      goto zero;
825
826
0
    if (ND_IS_RECURSION(node)) {
827
0
#ifdef USE_BACKREF_WITH_LEVEL
828
0
      if (ND_IS_NEST_LEVEL(node)) {
829
0
        mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
830
0
        break;
831
0
      }
832
0
#endif
833
834
0
      mmcl_set_min_max(ci, 0, 0, FALSE);
835
0
      break;
836
0
    }
837
838
0
    {
839
0
      int i;
840
0
      int* backs;
841
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
842
0
      BackRefNode* br = BACKREF_(node);
843
844
0
      backs = BACKREFS_P(br);
845
0
      r = node_char_len1(mem_env[backs[0]].mem_node, reg, ci, env, level);
846
0
      if (r < 0) break;
847
0
      if (! mmcl_fixed(ci)) ci->min_is_sure = FALSE;
848
849
0
      for (i = 1; i < br->back_num; i++) {
850
0
        r = node_char_len1(mem_env[backs[i]].mem_node, reg, &tci, env, level);
851
0
        if (r < 0) break;
852
0
        if (! mmcl_fixed(&tci)) tci.min_is_sure = FALSE;
853
0
        mmcl_alt_merge(ci, &tci);
854
0
      }
855
0
    }
856
0
    break;
857
858
0
  default: /* never come here */
859
0
    r = ONIGERR_PARSER_BUG;
860
0
    break;
861
0
  }
862
863
0
  return r;
864
0
}
865
866
static int
867
node_char_len(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env)
868
0
{
869
0
  return node_char_len1(node, reg, ci, env, 0);
870
0
}
871
872
873
static int
874
add_op(regex_t* reg, int opcode)
875
0
{
876
0
  int r;
877
878
0
  r = ops_new(reg);
879
0
  if (r != ONIG_NORMAL) return r;
880
881
0
#ifdef USE_DIRECT_THREADED_CODE
882
0
  *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
883
#else
884
  reg->ops_curr->opcode = opcode;
885
#endif
886
887
0
  return 0;
888
0
}
889
890
static int compile_length_tree(Node* node, regex_t* reg, ParseEnv* env);
891
static int compile_tree(Node* node, regex_t* reg, ParseEnv* env);
892
893
894
#define IS_NEED_STR_LEN_OP(op) \
895
0
   ((op) == OP_STR_N    || (op) == OP_STR_MB2N ||\
896
0
    (op) == OP_STR_MB3N || (op) == OP_STR_MBN)
897
898
static int
899
select_str_opcode(int mb_len, int str_len)
900
0
{
901
0
  int op;
902
903
0
  switch (mb_len) {
904
0
  case 1:
905
0
    switch (str_len) {
906
0
    case 1:  op = OP_STR_1; break;
907
0
    case 2:  op = OP_STR_2; break;
908
0
    case 3:  op = OP_STR_3; break;
909
0
    case 4:  op = OP_STR_4; break;
910
0
    case 5:  op = OP_STR_5; break;
911
0
    default: op = OP_STR_N; break;
912
0
    }
913
0
    break;
914
915
0
  case 2:
916
0
    switch (str_len) {
917
0
    case 1:  op = OP_STR_MB2N1; break;
918
0
    case 2:  op = OP_STR_MB2N2; break;
919
0
    case 3:  op = OP_STR_MB2N3; break;
920
0
    default: op = OP_STR_MB2N;  break;
921
0
    }
922
0
    break;
923
924
0
  case 3:
925
0
    op = OP_STR_MB3N;
926
0
    break;
927
928
0
  default:
929
0
    op = OP_STR_MBN;
930
0
    break;
931
0
  }
932
933
0
  return op;
934
0
}
935
936
static int
937
is_strict_real_node(Node* node)
938
0
{
939
0
  switch (ND_TYPE(node)) {
940
0
  case ND_STRING:
941
0
    {
942
0
      StrNode* sn = STR_(node);
943
0
      return (sn->end != sn->s);
944
0
    }
945
0
    break;
946
947
0
  case ND_CCLASS:
948
0
  case ND_CTYPE:
949
0
    return 1;
950
0
    break;
951
952
0
  default:
953
0
    return 0;
954
0
    break;
955
0
  }
956
0
}
957
958
static int
959
compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ParseEnv* env)
960
0
{
961
0
  int r;
962
0
  int saved_num_empty_check;
963
0
  int emptiness;
964
0
  Node* body;
965
966
0
  body = ND_BODY((Node* )qn);
967
0
  emptiness = qn->emptiness;
968
0
  saved_num_empty_check = reg->num_empty_check;
969
970
0
  if (emptiness != BODY_IS_NOT_EMPTY) {
971
0
    r = add_op(reg, OP_EMPTY_CHECK_START);
972
0
    if (r != 0) return r;
973
0
    COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
974
0
    reg->num_empty_check++;
975
0
  }
976
977
0
  r = compile_tree(body, reg, env);
978
0
  if (r != 0) return r;
979
980
0
  if (emptiness != BODY_IS_NOT_EMPTY) {
981
0
    if (emptiness == BODY_MAY_BE_EMPTY)
982
0
      r = add_op(reg, OP_EMPTY_CHECK_END);
983
0
    else if (emptiness == BODY_MAY_BE_EMPTY_MEM) {
984
0
      if (ND_IS_EMPTY_STATUS_CHECK(qn) != 0 && qn->empty_status_mem != 0) {
985
0
        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
986
0
        if (r != 0) return r;
987
0
        COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
988
0
      }
989
0
      else
990
0
        r = add_op(reg, OP_EMPTY_CHECK_END);
991
0
    }
992
0
#ifdef USE_CALL
993
0
    else if (emptiness == BODY_MAY_BE_EMPTY_REC) {
994
0
      r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
995
0
      if (r != 0) return r;
996
0
      COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
997
0
    }
998
0
#endif
999
1000
0
    if (r != 0) return r;
1001
0
    COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
1002
0
  }
1003
0
  return r;
1004
0
}
1005
1006
#ifdef USE_CALL
1007
static int
1008
compile_call(CallNode* node, regex_t* reg, ParseEnv* env)
1009
0
{
1010
0
  int r;
1011
0
  int offset;
1012
1013
0
  r = add_op(reg, OP_CALL);
1014
0
  if (r != 0) return r;
1015
1016
0
  COP(reg)->call.addr = 0; /* dummy addr. */
1017
#ifdef ONIG_DEBUG_MATCH_COUNTER
1018
  COP(reg)->call.called_mem = node->called_gnum;
1019
#endif
1020
1021
0
  offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
1022
0
  r = unset_addr_list_add(env->unset_addr_list, offset, ND_CALL_BODY(node));
1023
0
  return r;
1024
0
}
1025
#endif
1026
1027
static int
1028
compile_tree_n_times(Node* node, int n, regex_t* reg, ParseEnv* env)
1029
0
{
1030
0
  int i, r;
1031
1032
0
  for (i = 0; i < n; i++) {
1033
0
    r = compile_tree(node, reg, env);
1034
0
    if (r != 0) return r;
1035
0
  }
1036
0
  return 0;
1037
0
}
1038
1039
static int
1040
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
1041
                          regex_t* reg ARG_UNUSED)
1042
0
{
1043
0
  return 1;
1044
0
}
1045
1046
static int
1047
add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
1048
0
{
1049
0
  int op;
1050
0
  int r;
1051
0
  int byte_len;
1052
0
  UChar* p;
1053
0
  UChar* end;
1054
1055
0
  op = select_str_opcode(mb_len, str_len);
1056
0
  r = add_op(reg, op);
1057
0
  if (r != 0) return r;
1058
1059
0
  byte_len = mb_len * str_len;
1060
0
  end = s + byte_len;
1061
1062
0
  if (op == OP_STR_MBN) {
1063
0
    p = onigenc_strdup(reg->enc, s, end);
1064
0
    CHECK_NULL_RETURN_MEMERR(p);
1065
1066
0
    COP(reg)->exact_len_n.len = mb_len;
1067
0
    COP(reg)->exact_len_n.n   = str_len;
1068
0
    COP(reg)->exact_len_n.s   = p;
1069
0
  }
1070
0
  else if (IS_NEED_STR_LEN_OP(op)) {
1071
0
    p = onigenc_strdup(reg->enc, s, end);
1072
0
    CHECK_NULL_RETURN_MEMERR(p);
1073
0
    COP(reg)->exact_n.n = str_len;
1074
0
    COP(reg)->exact_n.s = p;
1075
0
  }
1076
0
  else {
1077
0
    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
1078
0
    xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
1079
0
  }
1080
1081
0
  return 0;
1082
0
}
1083
1084
static int
1085
compile_length_string_node(Node* node, regex_t* reg)
1086
0
{
1087
0
  int rlen, r, len, prev_len, slen;
1088
0
  UChar *p, *prev;
1089
0
  StrNode* sn;
1090
0
  OnigEncoding enc = reg->enc;
1091
1092
0
  sn = STR_(node);
1093
0
  if (sn->end <= sn->s)
1094
0
    return 0;
1095
1096
0
  p = prev = sn->s;
1097
0
  prev_len = enclen(enc, p);
1098
0
  p += prev_len;
1099
0
  slen = 1;
1100
0
  rlen = 0;
1101
1102
0
  for (; p < sn->end; ) {
1103
0
    len = enclen(enc, p);
1104
0
    if (len == prev_len) {
1105
0
      slen++;
1106
0
    }
1107
0
    else {
1108
0
      r = add_compile_string_length(prev, prev_len, slen, reg);
1109
0
      rlen += r;
1110
0
      prev = p;
1111
0
      slen = 1;
1112
0
      prev_len = len;
1113
0
    }
1114
0
    p += len;
1115
0
  }
1116
1117
0
  r = add_compile_string_length(prev, prev_len, slen, reg);
1118
0
  rlen += r;
1119
0
  return rlen;
1120
0
}
1121
1122
static int
1123
compile_length_string_crude_node(StrNode* sn, regex_t* reg)
1124
0
{
1125
0
  if (sn->end <= sn->s)
1126
0
    return 0;
1127
1128
0
  return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
1129
0
                                   reg);
1130
0
}
1131
1132
static int
1133
compile_string_node(Node* node, regex_t* reg)
1134
0
{
1135
0
  int r, len, prev_len, slen;
1136
0
  UChar *p, *prev, *end;
1137
0
  StrNode* sn;
1138
0
  OnigEncoding enc = reg->enc;
1139
1140
0
  sn = STR_(node);
1141
0
  if (sn->end <= sn->s)
1142
0
    return 0;
1143
1144
0
  end = sn->end;
1145
1146
0
  p = prev = sn->s;
1147
0
  prev_len = enclen(enc, p);
1148
0
  p += prev_len;
1149
0
  slen = 1;
1150
1151
0
  for (; p < end; ) {
1152
0
    len = enclen(enc, p);
1153
0
    if (len == prev_len) {
1154
0
      slen++;
1155
0
    }
1156
0
    else {
1157
0
      r = add_compile_string(prev, prev_len, slen, reg);
1158
0
      if (r != 0) return r;
1159
1160
0
      prev  = p;
1161
0
      slen  = 1;
1162
0
      prev_len = len;
1163
0
    }
1164
1165
0
    p += len;
1166
0
  }
1167
1168
0
  return add_compile_string(prev, prev_len, slen, reg);
1169
0
}
1170
1171
static int
1172
compile_string_crude_node(StrNode* sn, regex_t* reg)
1173
0
{
1174
0
  if (sn->end <= sn->s)
1175
0
    return 0;
1176
1177
0
  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
1178
0
}
1179
1180
static void*
1181
set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
1182
0
{
1183
0
  size_t len;
1184
0
  void* p;
1185
1186
0
  len = (size_t )mbuf->used;
1187
0
  p = xmalloc(len);
1188
0
  if (IS_NULL(p)) return NULL;
1189
1190
0
  xmemcpy(p, mbuf->p, len);
1191
0
  return p;
1192
0
}
1193
1194
static int
1195
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
1196
0
{
1197
0
  return 1;
1198
0
}
1199
1200
static int
1201
compile_cclass_node(CClassNode* cc, regex_t* reg)
1202
0
{
1203
0
  int r;
1204
1205
0
  if (IS_NULL(cc->mbuf)) {
1206
0
    r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
1207
0
    if (r != 0) return r;
1208
1209
0
    COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
1210
0
    CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
1211
0
    xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
1212
0
  }
1213
0
  else {
1214
0
    void* p;
1215
1216
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
1217
0
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
1218
0
      if (r != 0) return r;
1219
1220
0
      p = set_multi_byte_cclass(cc->mbuf, reg);
1221
0
      CHECK_NULL_RETURN_MEMERR(p);
1222
0
      COP(reg)->cclass_mb.mb = p;
1223
0
    }
1224
0
    else {
1225
0
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
1226
0
      if (r != 0) return r;
1227
1228
0
      COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
1229
0
      CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
1230
0
      xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
1231
1232
0
      p = set_multi_byte_cclass(cc->mbuf, reg);
1233
0
      CHECK_NULL_RETURN_MEMERR(p);
1234
0
      COP(reg)->cclass_mix.mb = p;
1235
0
    }
1236
0
  }
1237
1238
0
  return 0;
1239
0
}
1240
1241
static void
1242
set_addr_in_repeat_range(regex_t* reg)
1243
0
{
1244
0
  int i;
1245
1246
0
  for (i = 0; i < reg->num_repeat; i++) {
1247
0
    RepeatRange* p = reg->repeat_range + i;
1248
0
    int offset = p->u.offset;
1249
0
    p->u.pcode = reg->ops + offset;
1250
0
  }
1251
0
}
1252
1253
static int
1254
entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
1255
0
{
1256
0
#define REPEAT_RANGE_ALLOC  4
1257
1258
0
  RepeatRange* p;
1259
1260
0
  if (reg->repeat_range_alloc == 0) {
1261
0
    p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
1262
0
    CHECK_NULL_RETURN_MEMERR(p);
1263
0
    reg->repeat_range = p;
1264
0
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
1265
0
  }
1266
0
  else if (reg->repeat_range_alloc <= id) {
1267
0
    int n;
1268
0
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
1269
0
    p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
1270
0
    CHECK_NULL_RETURN_MEMERR(p);
1271
0
    reg->repeat_range = p;
1272
0
    reg->repeat_range_alloc = n;
1273
0
  }
1274
0
  else {
1275
0
    p = reg->repeat_range;
1276
0
  }
1277
1278
0
  p[id].lower    = lower;
1279
0
  p[id].upper    = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
1280
0
  p[id].u.offset = ops_index;
1281
0
  return 0;
1282
0
}
1283
1284
static int
1285
compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
1286
                          regex_t* reg, ParseEnv* env)
1287
0
{
1288
0
  int r;
1289
0
  int num_repeat = reg->num_repeat++;
1290
1291
0
  r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
1292
0
  if (r != 0) return r;
1293
1294
0
  COP(reg)->repeat.id   = num_repeat;
1295
0
  COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
1296
1297
0
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
1298
0
                         COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
1299
0
  if (r != 0) return r;
1300
1301
0
  r = compile_quant_body_with_empty_check(qn, reg, env);
1302
0
  if (r != 0) return r;
1303
1304
0
  r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
1305
0
  if (r != 0) return r;
1306
1307
0
  COP(reg)->repeat_inc.id = num_repeat;
1308
0
  return r;
1309
0
}
1310
1311
static int
1312
is_anychar_infinite_greedy(QuantNode* qn)
1313
0
{
1314
0
  if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
1315
0
      ND_IS_ANYCHAR(ND_QUANT_BODY(qn)))
1316
0
    return 1;
1317
0
  else
1318
0
    return 0;
1319
0
}
1320
1321
0
#define QUANTIFIER_EXPAND_LIMIT_SIZE   10
1322
#define CKN_ON   (ckn > 0)
1323
1324
static int
1325
compile_length_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1326
0
{
1327
0
  int len, mod_tlen;
1328
0
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1329
0
  enum BodyEmptyType emptiness = qn->emptiness;
1330
0
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1331
1332
0
  if (tlen < 0) return tlen;
1333
0
  if (tlen == 0) return 0;
1334
1335
  /* anychar repeat */
1336
0
  if (is_anychar_infinite_greedy(qn)) {
1337
0
    if (qn->lower <= 1 ||
1338
0
        len_multiply_cmp((OnigLen )tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
1339
0
      if (IS_NOT_NULL(qn->next_head_exact))
1340
0
        return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
1341
0
      else
1342
0
        return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
1343
0
    }
1344
0
  }
1345
1346
0
  mod_tlen = tlen;
1347
0
  if (emptiness != BODY_IS_NOT_EMPTY)
1348
0
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1349
1350
0
  if (infinite &&
1351
0
      (qn->lower <= 1 ||
1352
0
       len_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1353
0
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1354
0
      len = OPSIZE_JUMP;
1355
0
    }
1356
0
    else {
1357
0
      len = tlen * qn->lower;
1358
0
    }
1359
1360
0
    if (qn->greedy) {
1361
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1362
0
      if (IS_NOT_NULL(qn->head_exact))
1363
0
        len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
1364
0
      else
1365
0
#endif
1366
0
      if (IS_NOT_NULL(qn->next_head_exact))
1367
0
        len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
1368
0
      else
1369
0
        len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
1370
0
    }
1371
0
    else
1372
0
      len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
1373
0
  }
1374
0
  else if (qn->upper == 0) {
1375
0
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1376
0
      len = OPSIZE_JUMP + tlen;
1377
0
    }
1378
0
    else
1379
0
      len = 0;
1380
0
  }
1381
0
  else if (!infinite && qn->greedy &&
1382
0
           (qn->upper == 1 ||
1383
0
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1384
0
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1385
0
    len = tlen * qn->lower;
1386
0
    len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
1387
0
  }
1388
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1389
0
    len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
1390
0
  }
1391
0
  else {
1392
0
    len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
1393
0
  }
1394
1395
0
  return len;
1396
0
}
1397
1398
static int
1399
compile_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1400
0
{
1401
0
  int i, r, mod_tlen;
1402
0
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1403
0
  enum BodyEmptyType emptiness = qn->emptiness;
1404
0
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1405
1406
0
  if (tlen < 0) return tlen;
1407
0
  if (tlen == 0) return 0;
1408
1409
0
  if (is_anychar_infinite_greedy(qn) &&
1410
0
      (qn->lower <= 1 ||
1411
0
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1412
0
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1413
0
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1414
0
    if (r != 0) return r;
1415
0
    if (IS_NOT_NULL(qn->next_head_exact)) {
1416
0
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1417
0
                 OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
1418
0
      if (r != 0) return r;
1419
1420
0
      COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
1421
0
      return 0;
1422
0
    }
1423
0
    else {
1424
0
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1425
0
                 OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
1426
0
      return r;
1427
0
    }
1428
0
  }
1429
1430
0
  mod_tlen = tlen;
1431
0
  if (emptiness != BODY_IS_NOT_EMPTY)
1432
0
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1433
1434
0
  if (infinite &&
1435
0
      (qn->lower <= 1 ||
1436
0
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1437
0
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1438
0
    int addr;
1439
1440
0
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1441
0
      r = add_op(reg, OP_JUMP);
1442
0
      if (r != 0) return r;
1443
0
      if (qn->greedy) {
1444
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1445
0
        if (IS_NOT_NULL(qn->head_exact))
1446
0
          COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
1447
0
        else
1448
0
#endif
1449
0
        if (IS_NOT_NULL(qn->next_head_exact))
1450
0
          COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
1451
0
        else
1452
0
          COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
1453
0
      }
1454
0
      else {
1455
0
        COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
1456
0
      }
1457
0
    }
1458
0
    else {
1459
0
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1460
0
      if (r != 0) return r;
1461
0
    }
1462
1463
0
    if (qn->greedy) {
1464
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1465
0
      if (IS_NOT_NULL(qn->head_exact)) {
1466
0
        r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
1467
0
        if (r != 0) return r;
1468
0
        COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1469
0
        COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0];
1470
1471
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1472
0
        if (r != 0) return r;
1473
1474
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
1475
0
      }
1476
0
      else
1477
0
#endif
1478
0
      if (IS_NOT_NULL(qn->next_head_exact)) {
1479
0
        r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
1480
0
        if (r != 0) return r;
1481
0
        COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1482
0
        COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0];
1483
1484
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1485
0
        if (r != 0) return r;
1486
1487
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
1488
0
      }
1489
0
      else {
1490
0
        r = add_op(reg, OP_PUSH);
1491
0
        if (r != 0) return r;
1492
0
        COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1493
1494
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1495
0
        if (r != 0) return r;
1496
1497
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH);
1498
0
      }
1499
1500
0
      r = add_op(reg, OP_JUMP);
1501
0
      if (r != 0) return r;
1502
0
      COP(reg)->jump.addr = addr;
1503
0
    }
1504
0
    else {
1505
0
      r = add_op(reg, OP_JUMP);
1506
0
      if (r != 0) return r;
1507
0
      COP(reg)->jump.addr = mod_tlen + SIZE_INC;
1508
1509
0
      r = compile_quant_body_with_empty_check(qn, reg, env);
1510
0
      if (r != 0) return r;
1511
1512
0
      r = add_op(reg, OP_PUSH);
1513
0
      if (r != 0) return r;
1514
0
      COP(reg)->push.addr = -mod_tlen;
1515
0
    }
1516
0
  }
1517
0
  else if (qn->upper == 0) {
1518
0
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1519
0
      r = add_op(reg, OP_JUMP);
1520
0
      if (r != 0) return r;
1521
0
      COP(reg)->jump.addr = tlen + SIZE_INC;
1522
1523
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1524
0
    }
1525
0
    else {
1526
      /* Nothing output */
1527
0
      r = 0;
1528
0
    }
1529
0
  }
1530
0
  else if (! infinite && qn->greedy &&
1531
0
           (qn->upper == 1 ||
1532
0
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1533
0
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1534
0
    int n = qn->upper - qn->lower;
1535
1536
0
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1537
0
    if (r != 0) return r;
1538
1539
0
    for (i = 0; i < n; i++) {
1540
0
      int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
1541
0
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1542
1543
0
      r = add_op(reg, OP_PUSH);
1544
0
      if (r != 0) return r;
1545
0
      COP(reg)->push.addr = v;
1546
1547
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1548
0
      if (r != 0) return r;
1549
0
    }
1550
0
  }
1551
0
  else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1552
0
    r = add_op(reg, OP_PUSH);
1553
0
    if (r != 0) return r;
1554
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
1555
1556
0
    r = add_op(reg, OP_JUMP);
1557
0
    if (r != 0) return r;
1558
0
    COP(reg)->jump.addr = tlen + SIZE_INC;
1559
1560
0
    r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1561
0
  }
1562
0
  else {
1563
0
    r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
1564
0
  }
1565
0
  return r;
1566
0
}
1567
1568
static int
1569
compile_length_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1570
0
{
1571
0
  int tlen;
1572
1573
0
  tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1574
1575
0
  return tlen;
1576
0
}
1577
1578
static int
1579
compile_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1580
0
{
1581
0
  int r;
1582
1583
0
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1584
1585
0
  return r;
1586
0
}
1587
1588
static int
1589
compile_length_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1590
0
{
1591
0
  int len;
1592
0
  int tlen;
1593
1594
0
  if (node->type == BAG_OPTION)
1595
0
    return compile_length_option_node(node, reg, env);
1596
1597
0
  if (ND_BAG_BODY(node)) {
1598
0
    tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1599
0
    if (tlen < 0) return tlen;
1600
0
  }
1601
0
  else
1602
0
    tlen = 0;
1603
1604
0
  switch (node->type) {
1605
0
  case BAG_MEMORY:
1606
0
#ifdef USE_CALL
1607
1608
0
    if (node->m.regnum == 0 && ND_IS_CALLED(node)) {
1609
0
      len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1610
0
      return len;
1611
0
    }
1612
1613
0
    if (ND_IS_CALLED(node)) {
1614
0
      len = OPSIZE_MEM_START_PUSH + tlen
1615
0
        + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1616
0
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1617
0
        len += (ND_IS_RECURSION(node)
1618
0
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1619
0
      else
1620
0
        len += (ND_IS_RECURSION(node)
1621
0
                ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1622
0
    }
1623
0
    else if (ND_IS_RECURSION(node)) {
1624
0
      len = OPSIZE_MEM_START_PUSH;
1625
0
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1626
0
                     ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
1627
0
    }
1628
0
    else
1629
0
#endif
1630
0
    {
1631
0
      if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1632
0
        len = OPSIZE_MEM_START_PUSH;
1633
0
      else
1634
0
        len = OPSIZE_MEM_START;
1635
1636
0
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1637
0
                     ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
1638
0
    }
1639
0
    break;
1640
1641
0
  case BAG_STOP_BACKTRACK:
1642
0
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1643
0
      int v;
1644
0
      QuantNode* qn;
1645
1646
0
      qn = QUANT_(ND_BAG_BODY(node));
1647
0
      tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1648
0
      if (tlen < 0) return tlen;
1649
1650
0
      v = onig_positive_int_multiply(qn->lower, tlen);
1651
0
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1652
0
      len = v + OPSIZE_PUSH + tlen + OPSIZE_POP + OPSIZE_JUMP;
1653
0
    }
1654
0
    else {
1655
0
      len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1656
0
    }
1657
0
    break;
1658
1659
0
  case BAG_IF_ELSE:
1660
0
    {
1661
0
      Node* cond = ND_BAG_BODY(node);
1662
0
      Node* Then = node->te.Then;
1663
0
      Node* Else = node->te.Else;
1664
1665
0
      len = compile_length_tree(cond, reg, env);
1666
0
      if (len < 0) return len;
1667
0
      len += OPSIZE_PUSH + OPSIZE_MARK + OPSIZE_CUT_TO_MARK;
1668
1669
0
      if (IS_NOT_NULL(Then)) {
1670
0
        tlen = compile_length_tree(Then, reg, env);
1671
0
        if (tlen < 0) return tlen;
1672
0
        len += tlen;
1673
0
      }
1674
1675
0
      len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK;
1676
1677
0
      if (IS_NOT_NULL(Else)) {
1678
0
        tlen = compile_length_tree(Else, reg, env);
1679
0
        if (tlen < 0) return tlen;
1680
0
        len += tlen;
1681
0
      }
1682
0
    }
1683
0
    break;
1684
1685
0
  case BAG_OPTION:
1686
    /* never come here, but set for escape warning */
1687
0
    len = 0;
1688
0
    break;
1689
1690
0
  default:
1691
0
    return ONIGERR_TYPE_BUG;
1692
0
    break;
1693
0
  }
1694
1695
0
  return len;
1696
0
}
1697
1698
static int
1699
compile_bag_memory_node(BagNode* node, regex_t* reg, ParseEnv* env)
1700
0
{
1701
0
  int r;
1702
1703
0
#ifdef USE_CALL
1704
0
  if (ND_IS_CALLED(node)) {
1705
0
    int len;
1706
1707
0
    r = add_op(reg, OP_CALL);
1708
0
    if (r != 0) return r;
1709
1710
0
    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
1711
0
    ND_STATUS_ADD(node, FIXED_ADDR);
1712
0
    COP(reg)->call.addr = (int )node->m.called_addr;
1713
1714
0
    if (node->m.regnum == 0) {
1715
0
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1716
0
      len += OPSIZE_RETURN;
1717
1718
0
      r = add_op(reg, OP_JUMP);
1719
0
      if (r != 0) return r;
1720
0
      COP(reg)->jump.addr = len + SIZE_INC;
1721
1722
0
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1723
0
      if (r != 0) return r;
1724
1725
0
      r = add_op(reg, OP_RETURN);
1726
0
      return r;
1727
0
    }
1728
0
    else {
1729
0
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1730
0
      len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
1731
0
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1732
0
        len += (ND_IS_RECURSION(node)
1733
0
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1734
0
      else
1735
0
        len += (ND_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1736
1737
0
      r = add_op(reg, OP_JUMP);
1738
0
      if (r != 0) return r;
1739
0
      COP(reg)->jump.addr = len + SIZE_INC;
1740
0
    }
1741
0
  }
1742
0
#endif
1743
1744
0
  if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1745
0
    r = add_op(reg, OP_MEM_START_PUSH);
1746
0
  else
1747
0
    r = add_op(reg, OP_MEM_START);
1748
0
  if (r != 0) return r;
1749
0
  COP(reg)->memory_start.num = node->m.regnum;
1750
1751
0
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1752
0
  if (r != 0) return r;
1753
1754
0
#ifdef USE_CALL
1755
0
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1756
0
    r = add_op(reg, (ND_IS_RECURSION(node)
1757
0
                     ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
1758
0
  else
1759
0
    r = add_op(reg, (ND_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
1760
0
  if (r != 0) return r;
1761
0
  COP(reg)->memory_end.num = node->m.regnum;
1762
1763
0
  if (ND_IS_CALLED(node)) {
1764
0
    r = add_op(reg, OP_RETURN);
1765
0
  }
1766
#else
1767
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1768
    r = add_op(reg, OP_MEM_END_PUSH);
1769
  else
1770
    r = add_op(reg, OP_MEM_END);
1771
  if (r != 0) return r;
1772
  COP(reg)->memory_end.num = node->m.regnum;
1773
#endif
1774
1775
0
  return r;
1776
0
}
1777
1778
static int
1779
compile_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1780
0
{
1781
0
  int r, len;
1782
1783
0
  switch (node->type) {
1784
0
  case BAG_MEMORY:
1785
0
    r = compile_bag_memory_node(node, reg, env);
1786
0
    break;
1787
1788
0
  case BAG_OPTION:
1789
0
    r = compile_option_node(node, reg, env);
1790
0
    break;
1791
1792
0
  case BAG_STOP_BACKTRACK:
1793
0
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1794
0
      QuantNode* qn = QUANT_(ND_BAG_BODY(node));
1795
0
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1796
0
      if (r != 0) return r;
1797
1798
0
      len = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1799
0
      if (len < 0) return len;
1800
1801
0
      r = add_op(reg, OP_PUSH);
1802
0
      if (r != 0) return r;
1803
0
      COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP + OPSIZE_JUMP;
1804
1805
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1806
0
      if (r != 0) return r;
1807
0
      r = add_op(reg, OP_POP);
1808
0
      if (r != 0) return r;
1809
1810
0
      r = add_op(reg, OP_JUMP);
1811
0
      if (r != 0) return r;
1812
0
      COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP);
1813
0
    }
1814
0
    else {
1815
0
      MemNumType mid;
1816
1817
0
      ID_ENTRY(env, mid);
1818
0
      r = add_op(reg, OP_MARK);
1819
0
      if (r != 0) return r;
1820
0
      COP(reg)->mark.id = mid;
1821
0
      COP(reg)->mark.save_pos = 0;
1822
1823
0
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1824
0
      if (r != 0) return r;
1825
0
      r = add_op(reg, OP_CUT_TO_MARK);
1826
0
      if (r != 0) return r;
1827
0
      COP(reg)->cut_to_mark.id = mid;
1828
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1829
0
    }
1830
0
    break;
1831
1832
0
  case BAG_IF_ELSE:
1833
0
    {
1834
0
      int cond_len, then_len, else_len, jump_len;
1835
0
      MemNumType mid;
1836
0
      Node* cond = ND_BAG_BODY(node);
1837
0
      Node* Then = node->te.Then;
1838
0
      Node* Else = node->te.Else;
1839
1840
0
      ID_ENTRY(env, mid);
1841
1842
0
      r = add_op(reg, OP_MARK);
1843
0
      if (r != 0) return r;
1844
0
      COP(reg)->mark.id = mid;
1845
0
      COP(reg)->mark.save_pos = 0;
1846
1847
0
      cond_len = compile_length_tree(cond, reg, env);
1848
0
      if (cond_len < 0) return cond_len;
1849
0
      if (IS_NOT_NULL(Then)) {
1850
0
        then_len = compile_length_tree(Then, reg, env);
1851
0
        if (then_len < 0) return then_len;
1852
0
      }
1853
0
      else
1854
0
        then_len = 0;
1855
1856
0
      jump_len = cond_len + then_len + OPSIZE_CUT_TO_MARK + OPSIZE_JUMP;
1857
1858
0
      r = add_op(reg, OP_PUSH);
1859
0
      if (r != 0) return r;
1860
0
      COP(reg)->push.addr = SIZE_INC + jump_len;
1861
1862
0
      r = compile_tree(cond, reg, env);
1863
0
      if (r != 0) return r;
1864
0
      r = add_op(reg, OP_CUT_TO_MARK);
1865
0
      if (r != 0) return r;
1866
0
      COP(reg)->cut_to_mark.id = mid;
1867
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1868
1869
0
      if (IS_NOT_NULL(Then)) {
1870
0
        r = compile_tree(Then, reg, env);
1871
0
        if (r != 0) return r;
1872
0
      }
1873
1874
0
      if (IS_NOT_NULL(Else)) {
1875
0
        else_len = compile_length_tree(Else, reg, env);
1876
0
        if (else_len < 0) return else_len;
1877
0
      }
1878
0
      else
1879
0
        else_len = 0;
1880
1881
0
      r = add_op(reg, OP_JUMP);
1882
0
      if (r != 0) return r;
1883
0
      COP(reg)->jump.addr = OPSIZE_CUT_TO_MARK + else_len + SIZE_INC;
1884
1885
0
      r = add_op(reg, OP_CUT_TO_MARK);
1886
0
      if (r != 0) return r;
1887
0
      COP(reg)->cut_to_mark.id = mid;
1888
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1889
1890
0
      if (IS_NOT_NULL(Else)) {
1891
0
        r = compile_tree(Else, reg, env);
1892
0
      }
1893
0
    }
1894
0
    break;
1895
1896
0
  default:
1897
0
    return ONIGERR_TYPE_BUG;
1898
0
    break;
1899
0
  }
1900
1901
0
  return r;
1902
0
}
1903
1904
static int
1905
compile_length_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1906
0
{
1907
0
  int len;
1908
0
  int tlen = 0;
1909
1910
0
  if (IS_NOT_NULL(ND_ANCHOR_BODY(node))) {
1911
0
    tlen = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
1912
0
    if (tlen < 0) return tlen;
1913
0
  }
1914
1915
0
  switch (node->type) {
1916
0
  case ANCR_PREC_READ:
1917
0
    len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1918
0
    break;
1919
0
  case ANCR_PREC_READ_NOT:
1920
0
    len = OPSIZE_PUSH + OPSIZE_MARK + tlen + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
1921
0
    break;
1922
0
  case ANCR_LOOK_BEHIND:
1923
0
    if (node->char_min_len == node->char_max_len)
1924
0
      len = OPSIZE_MARK + OPSIZE_STEP_BACK_START + tlen + OPSIZE_CUT_TO_MARK;
1925
0
    else {
1926
0
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_UPDATE_VAR + OPSIZE_FAIL + OPSIZE_JUMP + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_CUT_TO_MARK + OPSIZE_UPDATE_VAR;
1927
1928
0
      if (IS_NOT_NULL(node->lead_node)) {
1929
0
        int llen = compile_length_tree(node->lead_node, reg, env);
1930
0
        if (llen < 0) return llen;
1931
1932
0
        len += OPSIZE_MOVE + llen;
1933
0
      }
1934
1935
0
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1936
0
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1937
0
    }
1938
0
    break;
1939
0
  case ANCR_LOOK_BEHIND_NOT:
1940
0
    if (node->char_min_len == node->char_max_len)
1941
0
      len = OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + tlen + OPSIZE_POP_TO_MARK + OPSIZE_FAIL + OPSIZE_POP;
1942
0
    else {
1943
0
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_POP;
1944
1945
0
      if (IS_NOT_NULL(node->lead_node)) {
1946
0
        int llen = compile_length_tree(node->lead_node, reg, env);
1947
0
        if (llen < 0) return llen;
1948
1949
0
        len += OPSIZE_MOVE + llen;
1950
0
      }
1951
1952
0
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1953
0
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1954
0
    }
1955
0
    break;
1956
1957
0
  case ANCR_WORD_BOUNDARY:
1958
0
  case ANCR_NO_WORD_BOUNDARY:
1959
0
#ifdef USE_WORD_BEGIN_END
1960
0
  case ANCR_WORD_BEGIN:
1961
0
  case ANCR_WORD_END:
1962
0
#endif
1963
0
    len = OPSIZE_WORD_BOUNDARY;
1964
0
    break;
1965
1966
0
  case ANCR_TEXT_SEGMENT_BOUNDARY:
1967
0
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1968
0
    len = SIZE_OPCODE;
1969
0
    break;
1970
1971
0
  default:
1972
0
    len = SIZE_OPCODE;
1973
0
    break;
1974
0
  }
1975
1976
0
  return len;
1977
0
}
1978
1979
static int
1980
compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1981
0
{
1982
0
  int r;
1983
1984
0
  if (node->char_min_len == node->char_max_len) {
1985
0
    MemNumType mid;
1986
1987
0
    ID_ENTRY(env, mid);
1988
0
    r = add_op(reg, OP_MARK);
1989
0
    if (r != 0) return r;
1990
0
    COP(reg)->mark.id = mid;
1991
0
    COP(reg)->mark.save_pos = FALSE;
1992
1993
0
    r = add_op(reg, OP_STEP_BACK_START);
1994
0
    if (r != 0) return r;
1995
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
1996
0
    COP(reg)->step_back_start.remaining = 0;
1997
0
    COP(reg)->step_back_start.addr      = 1;
1998
1999
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2000
0
    if (r != 0) return r;
2001
2002
0
    r = add_op(reg, OP_CUT_TO_MARK);
2003
0
    if (r != 0) return r;
2004
0
    COP(reg)->cut_to_mark.id = mid;
2005
0
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2006
0
  }
2007
0
  else {
2008
0
    MemNumType mid1, mid2, mid3;
2009
0
    OnigLen diff;
2010
2011
0
    if (IS_NOT_NULL(node->lead_node)) {
2012
0
      MinMaxCharLen ci;
2013
2014
0
      r = node_char_len(node->lead_node, reg, &ci, env);
2015
0
      if (r < 0) return r;
2016
0
      r = add_op(reg, OP_MOVE);
2017
0
      if (r != 0) return r;
2018
0
      COP(reg)->move.n = -((RelPositionType )ci.min);
2019
0
      r = compile_tree(node->lead_node, reg, env);
2020
0
      if (r != 0) return r;
2021
0
    }
2022
2023
0
    ID_ENTRY(env, mid1);
2024
0
    r = add_op(reg, OP_SAVE_VAL);
2025
0
    if (r != 0) return r;
2026
0
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2027
0
    COP(reg)->save_val.id   = mid1;
2028
2029
0
    r = add_op(reg, OP_UPDATE_VAR);
2030
0
    if (r != 0) return r;
2031
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2032
2033
0
    ID_ENTRY(env, mid2);
2034
0
    r = add_op(reg, OP_MARK);
2035
0
    if (r != 0) return r;
2036
0
    COP(reg)->mark.id = mid2;
2037
0
    COP(reg)->mark.save_pos = FALSE;
2038
2039
0
    r = add_op(reg, OP_PUSH);
2040
0
    if (r != 0) return r;
2041
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
2042
2043
0
    r = add_op(reg, OP_JUMP);
2044
0
    if (r != 0) return r;
2045
0
    COP(reg)->jump.addr = SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL;
2046
2047
0
    r = add_op(reg, OP_UPDATE_VAR);
2048
0
    if (r != 0) return r;
2049
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2050
0
    COP(reg)->update_var.id    = mid1;
2051
0
    COP(reg)->update_var.clear = FALSE;
2052
0
    r = add_op(reg, OP_FAIL);
2053
0
    if (r != 0) return r;
2054
2055
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2056
0
      ID_ENTRY(env, mid3);
2057
0
      r = add_op(reg, OP_SAVE_VAL);
2058
0
      if (r != 0) return r;
2059
0
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2060
0
      COP(reg)->save_val.id   = mid3;
2061
0
    }
2062
2063
0
    r = add_op(reg, OP_STEP_BACK_START);
2064
0
    if (r != 0) return r;
2065
2066
0
    if (node->char_max_len != INFINITE_LEN)
2067
0
      diff = node->char_max_len - node->char_min_len;
2068
0
    else
2069
0
      diff = INFINITE_LEN;
2070
2071
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2072
0
    COP(reg)->step_back_start.remaining = diff;
2073
0
    COP(reg)->step_back_start.addr      = 2;
2074
2075
0
    r = add_op(reg, OP_STEP_BACK_NEXT);
2076
0
    if (r != 0) return r;
2077
2078
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2079
0
    if (r != 0) return r;
2080
2081
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2082
0
      r = add_op(reg, OP_UPDATE_VAR);
2083
0
      if (r != 0) return r;
2084
0
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2085
0
      COP(reg)->update_var.id    = mid3;
2086
0
      COP(reg)->update_var.clear = FALSE;
2087
0
    }
2088
2089
0
    r = add_op(reg, OP_CHECK_POSITION);
2090
0
    if (r != 0) return r;
2091
0
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2092
2093
0
    r = add_op(reg, OP_CUT_TO_MARK);
2094
0
    if (r != 0) return r;
2095
0
    COP(reg)->cut_to_mark.id = mid2;
2096
0
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2097
2098
0
    r = add_op(reg, OP_UPDATE_VAR);
2099
0
    if (r != 0) return r;
2100
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2101
0
    COP(reg)->update_var.id    = mid1;
2102
0
    COP(reg)->update_var.clear = TRUE;
2103
0
  }
2104
2105
0
  return r;
2106
0
}
2107
2108
static int
2109
compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg,
2110
                                    ParseEnv* env)
2111
0
{
2112
0
  int r;
2113
0
  int len;
2114
2115
0
  len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2116
2117
0
  if (node->char_min_len == node->char_max_len) {
2118
0
    MemNumType mid;
2119
2120
0
    ID_ENTRY(env, mid);
2121
0
    r = add_op(reg, OP_MARK);
2122
0
    if (r != 0) return r;
2123
0
    COP(reg)->mark.id = mid;
2124
0
    COP(reg)->mark.save_pos = FALSE;
2125
2126
0
    r = add_op(reg, OP_PUSH);
2127
0
    if (r != 0) return r;
2128
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL;
2129
2130
0
    r = add_op(reg, OP_STEP_BACK_START);
2131
0
    if (r != 0) return r;
2132
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2133
0
    COP(reg)->step_back_start.remaining = 0;
2134
0
    COP(reg)->step_back_start.addr      = 1;
2135
2136
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2137
0
    if (r != 0) return r;
2138
2139
0
    r = add_op(reg, OP_POP_TO_MARK);
2140
0
    if (r != 0) return r;
2141
0
    COP(reg)->pop_to_mark.id = mid;
2142
0
    r = add_op(reg, OP_FAIL);
2143
0
    if (r != 0) return r;
2144
0
    r = add_op(reg, OP_POP);
2145
0
  }
2146
0
  else {
2147
0
    MemNumType mid1, mid2, mid3;
2148
0
    OnigLen diff;
2149
2150
0
    ID_ENTRY(env, mid1);
2151
0
    r = add_op(reg, OP_SAVE_VAL);
2152
0
    if (r != 0) return r;
2153
0
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2154
0
    COP(reg)->save_val.id   = mid1;
2155
2156
0
    r = add_op(reg, OP_UPDATE_VAR);
2157
0
    if (r != 0) return r;
2158
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2159
2160
0
    ID_ENTRY(env, mid2);
2161
0
    r = add_op(reg, OP_MARK);
2162
0
    if (r != 0) return r;
2163
0
    COP(reg)->mark.id = mid2;
2164
0
    COP(reg)->mark.save_pos = FALSE;
2165
2166
0
    r = add_op(reg, OP_PUSH);
2167
0
    if (r != 0) return r;
2168
2169
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + len + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL;
2170
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
2171
0
      COP(reg)->push.addr += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
2172
2173
0
    if (IS_NOT_NULL(node->lead_node)) {
2174
0
      int clen;
2175
0
      MinMaxCharLen ci;
2176
2177
0
      clen = compile_length_tree(node->lead_node, reg, env);
2178
0
      COP(reg)->push.addr += OPSIZE_MOVE + clen;
2179
2180
0
      r = node_char_len(node->lead_node, reg, &ci, env);
2181
0
      if (r < 0) return r;
2182
0
      r = add_op(reg, OP_MOVE);
2183
0
      if (r != 0) return r;
2184
0
      COP(reg)->move.n = -((RelPositionType )ci.min);
2185
2186
0
      r = compile_tree(node->lead_node, reg, env);
2187
0
      if (r != 0) return r;
2188
0
    }
2189
2190
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2191
0
      ID_ENTRY(env, mid3);
2192
0
      r = add_op(reg, OP_SAVE_VAL);
2193
0
      if (r != 0) return r;
2194
0
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2195
0
      COP(reg)->save_val.id   = mid3;
2196
0
    }
2197
2198
0
    r = add_op(reg, OP_STEP_BACK_START);
2199
0
    if (r != 0) return r;
2200
2201
0
    if (node->char_max_len != INFINITE_LEN)
2202
0
      diff = node->char_max_len - node->char_min_len;
2203
0
    else
2204
0
      diff = INFINITE_LEN;
2205
2206
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2207
0
    COP(reg)->step_back_start.remaining = diff;
2208
0
    COP(reg)->step_back_start.addr      = 2;
2209
2210
0
    r = add_op(reg, OP_STEP_BACK_NEXT);
2211
0
    if (r != 0) return r;
2212
2213
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2214
0
    if (r != 0) return r;
2215
2216
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2217
0
      r = add_op(reg, OP_UPDATE_VAR);
2218
0
      if (r != 0) return r;
2219
0
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2220
0
      COP(reg)->update_var.id    = mid3;
2221
0
      COP(reg)->update_var.clear = FALSE;
2222
0
    }
2223
2224
0
    r = add_op(reg, OP_CHECK_POSITION);
2225
0
    if (r != 0) return r;
2226
0
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2227
2228
0
    r = add_op(reg, OP_POP_TO_MARK);
2229
0
    if (r != 0) return r;
2230
0
    COP(reg)->pop_to_mark.id = mid2;
2231
2232
0
    r = add_op(reg, OP_UPDATE_VAR);
2233
0
    if (r != 0) return r;
2234
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2235
0
    COP(reg)->update_var.id   = mid1;
2236
0
    COP(reg)->update_var.clear = FALSE;
2237
2238
0
    r = add_op(reg, OP_POP); /* pop save val */
2239
0
    if (r != 0) return r;
2240
0
    r = add_op(reg, OP_FAIL);
2241
0
    if (r != 0) return r;
2242
2243
0
    r = add_op(reg, OP_UPDATE_VAR);
2244
0
    if (r != 0) return r;
2245
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2246
0
    COP(reg)->update_var.id   = mid1;
2247
0
    COP(reg)->update_var.clear = FALSE;
2248
2249
0
    r = add_op(reg, OP_POP); /* pop mark */
2250
0
    if (r != 0) return r;
2251
0
    r = add_op(reg, OP_POP); /* pop save val */
2252
0
  }
2253
2254
0
  return r;
2255
0
}
2256
2257
static int
2258
compile_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
2259
0
{
2260
0
  int r, len;
2261
0
  enum OpCode op;
2262
0
  MemNumType mid;
2263
2264
0
  switch (node->type) {
2265
0
  case ANCR_BEGIN_BUF:      r = add_op(reg, OP_BEGIN_BUF);      break;
2266
0
  case ANCR_END_BUF:        r = add_op(reg, OP_END_BUF);        break;
2267
0
  case ANCR_BEGIN_LINE:     r = add_op(reg, OP_BEGIN_LINE);     break;
2268
0
  case ANCR_END_LINE:       r = add_op(reg, OP_END_LINE);       break;
2269
0
  case ANCR_SEMI_END_BUF:   r = add_op(reg, OP_SEMI_END_BUF);   break;
2270
0
  case ANCR_BEGIN_POSITION:
2271
0
    r = add_op(reg, OP_CHECK_POSITION);
2272
0
    if (r != 0) return r;
2273
0
    COP(reg)->check_position.type = CHECK_POSITION_SEARCH_START;
2274
0
    break;
2275
2276
0
  case ANCR_WORD_BOUNDARY:
2277
0
    op = OP_WORD_BOUNDARY;
2278
0
  word:
2279
0
    r = add_op(reg, op);
2280
0
    if (r != 0) return r;
2281
0
    COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
2282
0
    break;
2283
2284
0
  case ANCR_NO_WORD_BOUNDARY:
2285
0
    op = OP_NO_WORD_BOUNDARY; goto word;
2286
0
    break;
2287
0
#ifdef USE_WORD_BEGIN_END
2288
0
  case ANCR_WORD_BEGIN:
2289
0
    op = OP_WORD_BEGIN; goto word;
2290
0
    break;
2291
0
  case ANCR_WORD_END:
2292
0
    op = OP_WORD_END; goto word;
2293
0
    break;
2294
0
#endif
2295
2296
0
  case ANCR_TEXT_SEGMENT_BOUNDARY:
2297
0
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
2298
0
    {
2299
0
      enum TextSegmentBoundaryType type;
2300
2301
0
      r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
2302
0
      if (r != 0) return r;
2303
2304
0
      type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
2305
0
#ifdef USE_UNICODE_WORD_BREAK
2306
0
      if (ND_IS_TEXT_SEGMENT_WORD(node))
2307
0
        type = WORD_BOUNDARY;
2308
0
#endif
2309
2310
0
      COP(reg)->text_segment_boundary.type = type;
2311
0
      COP(reg)->text_segment_boundary.not =
2312
0
        (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
2313
0
    }
2314
0
    break;
2315
2316
0
  case ANCR_PREC_READ:
2317
0
    {
2318
0
      ID_ENTRY(env, mid);
2319
0
      r = add_op(reg, OP_MARK);
2320
0
      if (r != 0) return r;
2321
0
      COP(reg)->mark.id = mid;
2322
0
      COP(reg)->mark.save_pos = TRUE;
2323
2324
0
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2325
0
      if (r != 0) return r;
2326
2327
0
      r = add_op(reg, OP_CUT_TO_MARK);
2328
0
      if (r != 0) return r;
2329
0
      COP(reg)->cut_to_mark.id = mid;
2330
0
      COP(reg)->cut_to_mark.restore_pos = TRUE;
2331
0
    }
2332
0
    break;
2333
2334
0
  case ANCR_PREC_READ_NOT:
2335
0
    {
2336
0
      len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2337
0
      if (len < 0) return len;
2338
2339
0
      ID_ENTRY(env, mid);
2340
0
      r = add_op(reg, OP_PUSH);
2341
0
      if (r != 0) return r;
2342
0
      COP(reg)->push.addr = SIZE_INC + OPSIZE_MARK + len +
2343
0
                            OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
2344
2345
0
      r = add_op(reg, OP_MARK);
2346
0
      if (r != 0) return r;
2347
0
      COP(reg)->mark.id = mid;
2348
0
      COP(reg)->mark.save_pos = FALSE;
2349
2350
0
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2351
0
      if (r != 0) return r;
2352
2353
0
      r = add_op(reg, OP_POP_TO_MARK);
2354
0
      if (r != 0) return r;
2355
0
      COP(reg)->pop_to_mark.id = mid;
2356
2357
0
      r = add_op(reg, OP_POP);
2358
0
      if (r != 0) return r;
2359
0
      r = add_op(reg, OP_FAIL);
2360
0
    }
2361
0
    break;
2362
2363
0
  case ANCR_LOOK_BEHIND:
2364
0
    r = compile_anchor_look_behind_node(node, reg, env);
2365
0
    break;
2366
2367
0
  case ANCR_LOOK_BEHIND_NOT:
2368
0
    r = compile_anchor_look_behind_not_node(node, reg, env);
2369
0
    break;
2370
2371
0
  default:
2372
0
    return ONIGERR_TYPE_BUG;
2373
0
    break;
2374
0
  }
2375
2376
0
  return r;
2377
0
}
2378
2379
static int
2380
compile_gimmick_node(GimmickNode* node, regex_t* reg)
2381
0
{
2382
0
  int r = 0;
2383
2384
0
  switch (node->type) {
2385
0
  case GIMMICK_FAIL:
2386
0
    r = add_op(reg, OP_FAIL);
2387
0
    break;
2388
2389
0
  case GIMMICK_SAVE:
2390
0
    r = add_op(reg, OP_SAVE_VAL);
2391
0
    if (r != 0) return r;
2392
0
    COP(reg)->save_val.type = node->detail_type;
2393
0
    COP(reg)->save_val.id   = node->id;
2394
0
    break;
2395
2396
0
  case GIMMICK_UPDATE_VAR:
2397
0
    r = add_op(reg, OP_UPDATE_VAR);
2398
0
    if (r != 0) return r;
2399
0
    COP(reg)->update_var.type = node->detail_type;
2400
0
    COP(reg)->update_var.id   = node->id;
2401
0
    COP(reg)->update_var.clear = FALSE;
2402
0
    break;
2403
2404
0
#ifdef USE_CALLOUT
2405
0
  case GIMMICK_CALLOUT:
2406
0
    switch (node->detail_type) {
2407
0
    case ONIG_CALLOUT_OF_CONTENTS:
2408
0
    case ONIG_CALLOUT_OF_NAME:
2409
0
      {
2410
0
        if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
2411
0
          r = add_op(reg, OP_CALLOUT_NAME);
2412
0
          if (r != 0) return r;
2413
0
          COP(reg)->callout_name.id  = node->id;
2414
0
          COP(reg)->callout_name.num = node->num;
2415
0
        }
2416
0
        else {
2417
0
          r = add_op(reg, OP_CALLOUT_CONTENTS);
2418
0
          if (r != 0) return r;
2419
0
          COP(reg)->callout_contents.num = node->num;
2420
0
        }
2421
0
      }
2422
0
      break;
2423
2424
0
    default:
2425
0
      r = ONIGERR_TYPE_BUG;
2426
0
      break;
2427
0
    }
2428
0
#endif
2429
0
  }
2430
2431
0
  return r;
2432
0
}
2433
2434
static int
2435
compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
2436
0
{
2437
0
  int len;
2438
2439
0
  switch (node->type) {
2440
0
  case GIMMICK_FAIL:
2441
0
    len = OPSIZE_FAIL;
2442
0
    break;
2443
2444
0
  case GIMMICK_SAVE:
2445
0
    len = OPSIZE_SAVE_VAL;
2446
0
    break;
2447
2448
0
  case GIMMICK_UPDATE_VAR:
2449
0
    len = OPSIZE_UPDATE_VAR;
2450
0
    break;
2451
2452
0
#ifdef USE_CALLOUT
2453
0
  case GIMMICK_CALLOUT:
2454
0
    switch (node->detail_type) {
2455
0
    case ONIG_CALLOUT_OF_CONTENTS:
2456
0
      len = OPSIZE_CALLOUT_CONTENTS;
2457
0
      break;
2458
0
    case ONIG_CALLOUT_OF_NAME:
2459
0
      len = OPSIZE_CALLOUT_NAME;
2460
0
      break;
2461
2462
0
    default:
2463
0
      len = ONIGERR_TYPE_BUG;
2464
0
      break;
2465
0
    }
2466
0
    break;
2467
0
#endif
2468
2469
0
  default:
2470
0
    return ONIGERR_TYPE_BUG;
2471
0
    break;
2472
0
  }
2473
2474
0
  return len;
2475
0
}
2476
2477
static int
2478
compile_length_tree(Node* node, regex_t* reg, ParseEnv* env)
2479
0
{
2480
0
  int len, r;
2481
2482
0
  switch (ND_TYPE(node)) {
2483
0
  case ND_LIST:
2484
0
    len = 0;
2485
0
    do {
2486
0
      r = compile_length_tree(ND_CAR(node), reg, env);
2487
0
      if (r < 0) return r;
2488
0
      len += r;
2489
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
2490
0
    r = len;
2491
0
    break;
2492
2493
0
  case ND_ALT:
2494
0
    {
2495
0
      int n;
2496
2497
0
      n = r = 0;
2498
0
      do {
2499
0
        r += compile_length_tree(ND_CAR(node), reg, env);
2500
0
        n++;
2501
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2502
0
      r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
2503
0
    }
2504
0
    break;
2505
2506
0
  case ND_STRING:
2507
0
    if (ND_STRING_IS_CRUDE(node))
2508
0
      r = compile_length_string_crude_node(STR_(node), reg);
2509
0
    else
2510
0
      r = compile_length_string_node(node, reg);
2511
0
    break;
2512
2513
0
  case ND_CCLASS:
2514
0
    r = compile_length_cclass_node(CCLASS_(node), reg);
2515
0
    break;
2516
2517
0
  case ND_CTYPE:
2518
0
    r = SIZE_OPCODE;
2519
0
    break;
2520
2521
0
  case ND_BACKREF:
2522
0
    r = OPSIZE_BACKREF;
2523
0
    break;
2524
2525
0
#ifdef USE_CALL
2526
0
  case ND_CALL:
2527
0
    r = OPSIZE_CALL;
2528
0
    break;
2529
0
#endif
2530
2531
0
  case ND_QUANT:
2532
0
    r = compile_length_quantifier_node(QUANT_(node), reg, env);
2533
0
    break;
2534
2535
0
  case ND_BAG:
2536
0
    r = compile_length_bag_node(BAG_(node), reg, env);
2537
0
    break;
2538
2539
0
  case ND_ANCHOR:
2540
0
    r = compile_length_anchor_node(ANCHOR_(node), reg, env);
2541
0
    break;
2542
2543
0
  case ND_GIMMICK:
2544
0
    r = compile_length_gimmick_node(GIMMICK_(node), reg);
2545
0
    break;
2546
2547
0
  default:
2548
0
    return ONIGERR_TYPE_BUG;
2549
0
    break;
2550
0
  }
2551
2552
0
  return r;
2553
0
}
2554
2555
static int
2556
compile_tree(Node* node, regex_t* reg, ParseEnv* env)
2557
0
{
2558
0
  int n, len, pos, r = 0;
2559
2560
0
  switch (ND_TYPE(node)) {
2561
0
  case ND_LIST:
2562
0
    do {
2563
0
      r = compile_tree(ND_CAR(node), reg, env);
2564
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2565
0
    break;
2566
2567
0
  case ND_ALT:
2568
0
    {
2569
0
      Node* x = node;
2570
0
      len = 0;
2571
0
      do {
2572
0
        len += compile_length_tree(ND_CAR(x), reg, env);
2573
0
        if (IS_NOT_NULL(ND_CDR(x))) {
2574
0
          len += OPSIZE_PUSH + OPSIZE_JUMP;
2575
0
        }
2576
0
      } while (IS_NOT_NULL(x = ND_CDR(x)));
2577
0
      pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */
2578
2579
0
      do {
2580
0
        len = compile_length_tree(ND_CAR(node), reg, env);
2581
0
        if (IS_NOT_NULL(ND_CDR(node))) {
2582
0
          enum OpCode push = ND_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
2583
0
          r = add_op(reg, push);
2584
0
          if (r != 0) break;
2585
0
          COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
2586
0
        }
2587
0
        r = compile_tree(ND_CAR(node), reg, env);
2588
0
        if (r != 0) break;
2589
0
        if (IS_NOT_NULL(ND_CDR(node))) {
2590
0
          len = pos - (COP_CURR_OFFSET(reg) + 1);
2591
0
          r = add_op(reg, OP_JUMP);
2592
0
          if (r != 0) break;
2593
0
          COP(reg)->jump.addr = len;
2594
0
        }
2595
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2596
0
    }
2597
0
    break;
2598
2599
0
  case ND_STRING:
2600
0
    if (ND_STRING_IS_CRUDE(node))
2601
0
      r = compile_string_crude_node(STR_(node), reg);
2602
0
    else
2603
0
      r = compile_string_node(node, reg);
2604
0
    break;
2605
2606
0
  case ND_CCLASS:
2607
0
    r = compile_cclass_node(CCLASS_(node), reg);
2608
0
    break;
2609
2610
0
  case ND_CTYPE:
2611
0
    {
2612
0
      int op;
2613
2614
0
      switch (CTYPE_(node)->ctype) {
2615
0
      case CTYPE_ANYCHAR:
2616
0
        r = add_op(reg, ND_IS_MULTILINE(node) ? OP_ANYCHAR_ML : OP_ANYCHAR);
2617
0
        break;
2618
2619
0
      case ONIGENC_CTYPE_WORD:
2620
0
        if (CTYPE_(node)->ascii_mode == 0) {
2621
0
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
2622
0
        }
2623
0
        else {
2624
0
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
2625
0
        }
2626
0
        r = add_op(reg, op);
2627
0
        break;
2628
2629
0
      default:
2630
0
        return ONIGERR_TYPE_BUG;
2631
0
        break;
2632
0
      }
2633
0
    }
2634
0
    break;
2635
2636
0
  case ND_BACKREF:
2637
0
    {
2638
0
      BackRefNode* br = BACKREF_(node);
2639
2640
0
      if (ND_IS_CHECKER(node)) {
2641
0
#ifdef USE_BACKREF_WITH_LEVEL
2642
0
        if (ND_IS_NEST_LEVEL(node)) {
2643
0
          r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
2644
0
          if (r != 0) return r;
2645
0
          COP(reg)->backref_general.nest_level = br->nest_level;
2646
0
        }
2647
0
        else
2648
0
#endif
2649
0
          {
2650
0
            r = add_op(reg, OP_BACKREF_CHECK);
2651
0
            if (r != 0) return r;
2652
0
          }
2653
0
        goto add_bacref_mems;
2654
0
      }
2655
0
      else {
2656
0
#ifdef USE_BACKREF_WITH_LEVEL
2657
0
        if (ND_IS_NEST_LEVEL(node)) {
2658
0
          if (ND_IS_IGNORECASE(node))
2659
0
            r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
2660
0
          else
2661
0
            r = add_op(reg, OP_BACKREF_WITH_LEVEL);
2662
2663
0
          if (r != 0) return r;
2664
0
          COP(reg)->backref_general.nest_level = br->nest_level;
2665
0
          goto add_bacref_mems;
2666
0
        }
2667
0
        else
2668
0
#endif
2669
0
        if (br->back_num == 1) {
2670
0
          n = br->back_static[0];
2671
0
          if (ND_IS_IGNORECASE(node)) {
2672
0
            r = add_op(reg, OP_BACKREF_N_IC);
2673
0
            if (r != 0) return r;
2674
0
            COP(reg)->backref_n.n1 = n;
2675
0
          }
2676
0
          else {
2677
0
            switch (n) {
2678
0
            case 1:  r = add_op(reg, OP_BACKREF1); break;
2679
0
            case 2:  r = add_op(reg, OP_BACKREF2); break;
2680
0
            default:
2681
0
              r = add_op(reg, OP_BACKREF_N);
2682
0
              if (r != 0) return r;
2683
0
              COP(reg)->backref_n.n1 = n;
2684
0
              break;
2685
0
            }
2686
0
          }
2687
0
        }
2688
0
        else {
2689
0
          int num;
2690
0
          int* p;
2691
2692
0
          r = add_op(reg, ND_IS_IGNORECASE(node) ?
2693
0
                     OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
2694
0
          if (r != 0) return r;
2695
2696
0
        add_bacref_mems:
2697
0
          num = br->back_num;
2698
0
          COP(reg)->backref_general.num = num;
2699
0
          if (num == 1) {
2700
0
            COP(reg)->backref_general.n1 = br->back_static[0];
2701
0
          }
2702
0
          else {
2703
0
            int i, j;
2704
0
            MemNumType* ns;
2705
2706
0
            ns = xmalloc(sizeof(MemNumType) * num);
2707
0
            CHECK_NULL_RETURN_MEMERR(ns);
2708
0
            COP(reg)->backref_general.ns = ns;
2709
0
            p = BACKREFS_P(br);
2710
0
            for (i = num - 1, j = 0; i >= 0; i--, j++) {
2711
0
              ns[j] = p[i];
2712
0
            }
2713
0
          }
2714
0
        }
2715
0
      }
2716
0
    }
2717
0
    break;
2718
2719
0
#ifdef USE_CALL
2720
0
  case ND_CALL:
2721
0
    r = compile_call(CALL_(node), reg, env);
2722
0
    break;
2723
0
#endif
2724
2725
0
  case ND_QUANT:
2726
0
    r = compile_quantifier_node(QUANT_(node), reg, env);
2727
0
    break;
2728
2729
0
  case ND_BAG:
2730
0
    r = compile_bag_node(BAG_(node), reg, env);
2731
0
    break;
2732
2733
0
  case ND_ANCHOR:
2734
0
    r = compile_anchor_node(ANCHOR_(node), reg, env);
2735
0
    break;
2736
2737
0
  case ND_GIMMICK:
2738
0
    r = compile_gimmick_node(GIMMICK_(node), reg);
2739
0
    break;
2740
2741
0
  default:
2742
#ifdef ONIG_DEBUG
2743
    fprintf(DBGFP, "compile_tree: undefined node type %d\n", ND_TYPE(node));
2744
#endif
2745
0
    break;
2746
0
  }
2747
2748
0
  return r;
2749
0
}
2750
2751
static int
2752
make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
2753
0
{
2754
0
  int r;
2755
0
  Node* node = *plink;
2756
2757
0
  switch (ND_TYPE(node)) {
2758
0
  case ND_LIST:
2759
0
  case ND_ALT:
2760
0
    do {
2761
0
      r = make_named_capture_number_map(&(ND_CAR(node)), map, counter);
2762
0
    } while (r >= 0 && IS_NOT_NULL(node = ND_CDR(node)));
2763
0
    if (r < 0) return r;
2764
0
    break;
2765
2766
0
  case ND_QUANT:
2767
0
    {
2768
0
      Node** ptarget = &(ND_BODY(node));
2769
0
      r = make_named_capture_number_map(ptarget, map, counter);
2770
0
      if (r < 0) return r;
2771
0
      if (r == 1 && ND_TYPE(*ptarget) == ND_QUANT) {
2772
0
        return onig_reduce_nested_quantifier(node);
2773
0
      }
2774
0
    }
2775
0
    break;
2776
2777
0
  case ND_BAG:
2778
0
    {
2779
0
      BagNode* en = BAG_(node);
2780
0
      if (en->type == BAG_MEMORY) {
2781
0
        if (ND_IS_NAMED_GROUP(node)) {
2782
0
          (*counter)++;
2783
0
          map[en->m.regnum].new_val = *counter;
2784
0
          en->m.regnum = *counter;
2785
0
          r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2786
0
          if (r < 0) return r;
2787
0
        }
2788
0
        else {
2789
0
          *plink = ND_BODY(node);
2790
0
          ND_BODY(node) = NULL_NODE;
2791
0
          onig_node_free(node);
2792
0
          r = make_named_capture_number_map(plink, map, counter);
2793
0
          if (r < 0) return r;
2794
0
          return 1;
2795
0
        }
2796
0
      }
2797
0
      else if (en->type == BAG_IF_ELSE) {
2798
0
        r = make_named_capture_number_map(&(ND_BAG_BODY(en)), map, counter);
2799
0
        if (r < 0) return r;
2800
0
        if (IS_NOT_NULL(en->te.Then)) {
2801
0
          r = make_named_capture_number_map(&(en->te.Then), map, counter);
2802
0
          if (r < 0) return r;
2803
0
        }
2804
0
        if (IS_NOT_NULL(en->te.Else)) {
2805
0
          r = make_named_capture_number_map(&(en->te.Else), map, counter);
2806
0
          if (r < 0) return r;
2807
0
        }
2808
0
      }
2809
0
      else {
2810
0
        r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2811
0
        if (r < 0) return r;
2812
0
      }
2813
0
    }
2814
0
    break;
2815
2816
0
  case ND_ANCHOR:
2817
0
    if (IS_NOT_NULL(ND_BODY(node))) {
2818
0
      r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2819
0
      if (r < 0) return r;
2820
0
    }
2821
0
    break;
2822
2823
0
  default:
2824
0
    break;
2825
0
  }
2826
2827
0
  return 0;
2828
0
}
2829
2830
static int
2831
renumber_backref_node(Node* node, GroupNumMap* map)
2832
0
{
2833
0
  int i, pos, n, old_num;
2834
0
  int *backs;
2835
0
  BackRefNode* bn = BACKREF_(node);
2836
2837
0
  if (! ND_IS_BY_NAME(node))
2838
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2839
2840
0
  old_num = bn->back_num;
2841
0
  if (IS_NULL(bn->back_dynamic))
2842
0
    backs = bn->back_static;
2843
0
  else
2844
0
    backs = bn->back_dynamic;
2845
2846
0
  for (i = 0, pos = 0; i < old_num; i++) {
2847
0
    n = map[backs[i]].new_val;
2848
0
    if (n > 0) {
2849
0
      backs[pos] = n;
2850
0
      pos++;
2851
0
    }
2852
0
  }
2853
2854
0
  bn->back_num = pos;
2855
0
  return 0;
2856
0
}
2857
2858
static int
2859
renumber_backref_traverse(Node* node, GroupNumMap* map)
2860
0
{
2861
0
  int r = 0;
2862
2863
0
  switch (ND_TYPE(node)) {
2864
0
  case ND_LIST:
2865
0
  case ND_ALT:
2866
0
    do {
2867
0
      r = renumber_backref_traverse(ND_CAR(node), map);
2868
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2869
0
    break;
2870
2871
0
  case ND_QUANT:
2872
0
    r = renumber_backref_traverse(ND_BODY(node), map);
2873
0
    break;
2874
2875
0
  case ND_BAG:
2876
0
    {
2877
0
      BagNode* en = BAG_(node);
2878
2879
0
      r = renumber_backref_traverse(ND_BODY(node), map);
2880
0
      if (r != 0) return r;
2881
2882
0
      if (en->type == BAG_IF_ELSE) {
2883
0
        if (IS_NOT_NULL(en->te.Then)) {
2884
0
          r = renumber_backref_traverse(en->te.Then, map);
2885
0
          if (r != 0) return r;
2886
0
        }
2887
0
        if (IS_NOT_NULL(en->te.Else)) {
2888
0
          r = renumber_backref_traverse(en->te.Else, map);
2889
0
          if (r != 0) return r;
2890
0
        }
2891
0
      }
2892
0
    }
2893
0
    break;
2894
2895
0
  case ND_BACKREF:
2896
0
    r = renumber_backref_node(node, map);
2897
0
    break;
2898
2899
0
  case ND_ANCHOR:
2900
0
    if (IS_NOT_NULL(ND_BODY(node)))
2901
0
      r = renumber_backref_traverse(ND_BODY(node), map);
2902
0
    break;
2903
2904
0
  default:
2905
0
    break;
2906
0
  }
2907
2908
0
  return r;
2909
0
}
2910
2911
static int
2912
numbered_ref_check(Node* node)
2913
0
{
2914
0
  int r = 0;
2915
2916
0
  switch (ND_TYPE(node)) {
2917
0
  case ND_LIST:
2918
0
  case ND_ALT:
2919
0
    do {
2920
0
      r = numbered_ref_check(ND_CAR(node));
2921
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2922
0
    break;
2923
2924
0
  case ND_ANCHOR:
2925
0
    if (IS_NULL(ND_BODY(node)))
2926
0
      break;
2927
    /* fall */
2928
0
  case ND_QUANT:
2929
0
    r = numbered_ref_check(ND_BODY(node));
2930
0
    break;
2931
2932
0
  case ND_BAG:
2933
0
    {
2934
0
      BagNode* en = BAG_(node);
2935
2936
0
      r = numbered_ref_check(ND_BODY(node));
2937
0
      if (r != 0) return r;
2938
2939
0
      if (en->type == BAG_IF_ELSE) {
2940
0
        if (IS_NOT_NULL(en->te.Then)) {
2941
0
          r = numbered_ref_check(en->te.Then);
2942
0
          if (r != 0) return r;
2943
0
        }
2944
0
        if (IS_NOT_NULL(en->te.Else)) {
2945
0
          r = numbered_ref_check(en->te.Else);
2946
0
          if (r != 0) return r;
2947
0
        }
2948
0
      }
2949
0
    }
2950
2951
0
    break;
2952
2953
0
  case ND_BACKREF:
2954
0
    if (! ND_IS_BY_NAME(node))
2955
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2956
0
    break;
2957
2958
0
  default:
2959
0
    break;
2960
0
  }
2961
2962
0
  return r;
2963
0
}
2964
2965
static int
2966
disable_noname_group_capture(Node** root, regex_t* reg, ParseEnv* env)
2967
0
{
2968
0
  int r, i, pos, counter;
2969
0
  MemStatusType loc;
2970
0
  GroupNumMap* map;
2971
2972
0
  map = (GroupNumMap* )xalloca(sizeof(GroupNumMap) * (env->num_mem + 1));
2973
0
  CHECK_NULL_RETURN_MEMERR(map);
2974
0
  for (i = 1; i <= env->num_mem; i++) {
2975
0
    map[i].new_val = 0;
2976
0
  }
2977
0
  counter = 0;
2978
0
  r = make_named_capture_number_map(root, map, &counter);
2979
0
  if (r < 0) return r;
2980
2981
0
  r = renumber_backref_traverse(*root, map);
2982
0
  if (r != 0) return r;
2983
2984
0
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2985
0
    if (map[i].new_val > 0) {
2986
0
      PARSEENV_MEMENV(env)[pos] = PARSEENV_MEMENV(env)[i];
2987
0
      pos++;
2988
0
    }
2989
0
  }
2990
2991
0
  loc = env->cap_history;
2992
0
  MEM_STATUS_CLEAR(env->cap_history);
2993
0
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2994
0
    if (MEM_STATUS_AT(loc, i)) {
2995
0
      MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
2996
0
    }
2997
0
  }
2998
2999
0
  env->num_mem = env->num_named;
3000
0
  reg->num_mem = env->num_named;
3001
3002
0
  return onig_renumber_name_table(reg, map);
3003
0
}
3004
3005
#ifdef USE_CALL
3006
static int
3007
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
3008
0
{
3009
0
  int i, offset;
3010
0
  BagNode* en;
3011
0
  AbsAddrType addr;
3012
0
  AbsAddrType* paddr;
3013
3014
0
  for (i = 0; i < uslist->num; i++) {
3015
0
    if (! ND_IS_FIXED_ADDR(uslist->us[i].target)) {
3016
0
      if (ND_IS_CALLED(uslist->us[i].target))
3017
0
        return ONIGERR_PARSER_BUG;
3018
0
      else {
3019
        /* CASE: called node doesn't have called address.
3020
           ex. /((|a\g<1>)(.){0}){0}\g<3>/
3021
           group-1 doesn't called, but compiled into bytecodes,
3022
           because group-3 is referred from outside.
3023
        */
3024
0
        continue;
3025
0
      }
3026
0
    }
3027
3028
0
    en = BAG_(uslist->us[i].target);
3029
0
    addr   = en->m.called_addr;
3030
0
    offset = uslist->us[i].offset;
3031
3032
0
    paddr = (AbsAddrType* )((char* )reg->ops + offset);
3033
0
    *paddr = addr;
3034
0
  }
3035
0
  return 0;
3036
0
}
3037
#endif
3038
3039
/* x is not included y ==>  1 : 0 */
3040
static int
3041
is_exclusive(Node* x, Node* y, regex_t* reg)
3042
0
{
3043
0
  int i, len;
3044
0
  OnigCodePoint code;
3045
0
  UChar *p;
3046
0
  NodeType ytype;
3047
3048
0
 retry:
3049
0
  ytype = ND_TYPE(y);
3050
0
  switch (ND_TYPE(x)) {
3051
0
  case ND_CTYPE:
3052
0
    {
3053
0
      if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
3054
0
          CTYPE_(y)->ctype == CTYPE_ANYCHAR)
3055
0
        break;
3056
3057
0
      switch (ytype) {
3058
0
      case ND_CTYPE:
3059
0
        if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
3060
0
            CTYPE_(y)->not   != CTYPE_(x)->not &&
3061
0
            CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
3062
0
          return 1;
3063
0
        else
3064
0
          return 0;
3065
0
        break;
3066
3067
0
      case ND_CCLASS:
3068
0
      swap:
3069
0
        {
3070
0
          Node* tmp;
3071
0
          tmp = x; x = y; y = tmp;
3072
0
          goto retry;
3073
0
        }
3074
0
        break;
3075
3076
0
      case ND_STRING:
3077
0
        goto swap;
3078
0
        break;
3079
3080
0
      default:
3081
0
        break;
3082
0
      }
3083
0
    }
3084
0
    break;
3085
3086
0
  case ND_CCLASS:
3087
0
    {
3088
0
      int range;
3089
0
      CClassNode* xc = CCLASS_(x);
3090
3091
0
      switch (ytype) {
3092
0
      case ND_CTYPE:
3093
0
        switch (CTYPE_(y)->ctype) {
3094
0
        case CTYPE_ANYCHAR:
3095
0
          return 0;
3096
0
          break;
3097
3098
0
        case ONIGENC_CTYPE_WORD:
3099
0
          if (CTYPE_(y)->not == 0) {
3100
0
            if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
3101
0
              range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3102
0
              for (i = 0; i < range; i++) {
3103
0
                if (BITSET_AT(xc->bs, i)) {
3104
0
                  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
3105
0
                }
3106
0
              }
3107
0
              return 1;
3108
0
            }
3109
0
            return 0;
3110
0
          }
3111
0
          else {
3112
0
            if (IS_NOT_NULL(xc->mbuf)) return 0;
3113
0
            if (IS_NCCLASS_NOT(xc)) return 0;
3114
3115
0
            range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3116
0
            for (i = 0; i < range; i++) {
3117
0
              if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
3118
0
                if (BITSET_AT(xc->bs, i))
3119
0
                  return 0;
3120
0
              }
3121
0
            }
3122
0
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
3123
0
              if (BITSET_AT(xc->bs, i)) return 0;
3124
0
            }
3125
0
            return 1;
3126
0
          }
3127
0
          break;
3128
3129
0
        default:
3130
0
          break;
3131
0
        }
3132
0
        break;
3133
3134
0
      case ND_CCLASS:
3135
0
        {
3136
0
          int v;
3137
0
          CClassNode* yc = CCLASS_(y);
3138
3139
0
          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
3140
0
            v = BITSET_AT(xc->bs, i);
3141
0
            if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
3142
0
              v = BITSET_AT(yc->bs, i);
3143
0
              if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
3144
0
                  (v == 0 && IS_NCCLASS_NOT(yc)))
3145
0
                return 0;
3146
0
            }
3147
0
          }
3148
0
          if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
3149
0
              (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
3150
0
            return 1;
3151
0
          return 0;
3152
0
        }
3153
0
        break;
3154
3155
0
      case ND_STRING:
3156
0
        goto swap;
3157
0
        break;
3158
3159
0
      default:
3160
0
        break;
3161
0
      }
3162
0
    }
3163
0
    break;
3164
3165
0
  case ND_STRING:
3166
0
    {
3167
0
      StrNode* xs = STR_(x);
3168
3169
0
      if (ND_STRING_LEN(x) == 0)
3170
0
        break;
3171
3172
0
      switch (ytype) {
3173
0
      case ND_CTYPE:
3174
0
        switch (CTYPE_(y)->ctype) {
3175
0
        case CTYPE_ANYCHAR:
3176
0
          break;
3177
3178
0
        case ONIGENC_CTYPE_WORD:
3179
0
          if (CTYPE_(y)->ascii_mode == 0) {
3180
0
            if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
3181
0
              return CTYPE_(y)->not;
3182
0
            else
3183
0
              return !(CTYPE_(y)->not);
3184
0
          }
3185
0
          else {
3186
0
            if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
3187
0
              return CTYPE_(y)->not;
3188
0
            else
3189
0
              return !(CTYPE_(y)->not);
3190
0
          }
3191
0
          break;
3192
0
        default:
3193
0
          break;
3194
0
        }
3195
0
        break;
3196
3197
0
      case ND_CCLASS:
3198
0
        {
3199
0
          CClassNode* cc = CCLASS_(y);
3200
3201
0
          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
3202
0
                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
3203
0
          return onig_is_code_in_cc(reg->enc, code, cc) == 0;
3204
0
        }
3205
0
        break;
3206
3207
0
      case ND_STRING:
3208
0
        {
3209
0
          UChar *q;
3210
0
          StrNode* ys = STR_(y);
3211
3212
0
          len = ND_STRING_LEN(x);
3213
0
          if (len > ND_STRING_LEN(y)) len = ND_STRING_LEN(y);
3214
3215
0
          for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
3216
0
            if (*p != *q) return 1;
3217
0
          }
3218
0
        }
3219
0
        break;
3220
3221
0
      default:
3222
0
        break;
3223
0
      }
3224
0
    }
3225
0
    break;
3226
3227
0
  default:
3228
0
    break;
3229
0
  }
3230
3231
0
  return 0;
3232
0
}
3233
3234
static Node*
3235
get_tree_head_literal(Node* node, int exact, regex_t* reg)
3236
0
{
3237
0
  Node* n = NULL_NODE;
3238
3239
0
  switch (ND_TYPE(node)) {
3240
0
  case ND_BACKREF:
3241
0
  case ND_ALT:
3242
0
#ifdef USE_CALL
3243
0
  case ND_CALL:
3244
0
#endif
3245
0
    break;
3246
3247
0
  case ND_CTYPE:
3248
0
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
3249
0
      break;
3250
    /* fall */
3251
0
  case ND_CCLASS:
3252
0
    if (exact == 0) {
3253
0
      n = node;
3254
0
    }
3255
0
    break;
3256
3257
0
  case ND_LIST:
3258
0
    n = get_tree_head_literal(ND_CAR(node), exact, reg);
3259
0
    break;
3260
3261
0
  case ND_STRING:
3262
0
    {
3263
0
      StrNode* sn = STR_(node);
3264
3265
0
      if (sn->end <= sn->s)
3266
0
        break;
3267
3268
0
      if (exact == 0 || !ND_IS_REAL_IGNORECASE(node)) {
3269
0
        n = node;
3270
0
      }
3271
0
    }
3272
0
    break;
3273
3274
0
  case ND_QUANT:
3275
0
    {
3276
0
      QuantNode* qn = QUANT_(node);
3277
0
      if (qn->lower > 0) {
3278
0
        if (IS_NOT_NULL(qn->head_exact))
3279
0
          n = qn->head_exact;
3280
0
        else
3281
0
          n = get_tree_head_literal(ND_BODY(node), exact, reg);
3282
0
      }
3283
0
    }
3284
0
    break;
3285
3286
0
  case ND_BAG:
3287
0
    {
3288
0
      BagNode* en = BAG_(node);
3289
0
      switch (en->type) {
3290
0
      case BAG_OPTION:
3291
0
      case BAG_MEMORY:
3292
0
      case BAG_STOP_BACKTRACK:
3293
0
        n = get_tree_head_literal(ND_BODY(node), exact, reg);
3294
0
        break;
3295
0
      default:
3296
0
        break;
3297
0
      }
3298
0
    }
3299
0
    break;
3300
3301
0
  case ND_ANCHOR:
3302
0
    if (ANCHOR_(node)->type == ANCR_PREC_READ)
3303
0
      n = get_tree_head_literal(ND_BODY(node), exact, reg);
3304
0
    break;
3305
3306
0
  case ND_GIMMICK:
3307
0
  default:
3308
0
    break;
3309
0
  }
3310
3311
0
  return n;
3312
0
}
3313
3314
enum GetValue {
3315
  GET_VALUE_NONE   = -1,
3316
  GET_VALUE_IGNORE =  0,
3317
  GET_VALUE_FOUND  =  1
3318
};
3319
3320
0
#define MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL  16
3321
3322
static int
3323
get_tree_tail_literal(Node* node, Node** rnode, regex_t* reg, int nest_level)
3324
0
{
3325
0
  int r;
3326
3327
0
  nest_level++;
3328
0
  if (nest_level >= MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL) {
3329
0
    return GET_VALUE_NONE;
3330
0
  }
3331
3332
0
  switch (ND_TYPE(node)) {
3333
0
  case ND_LIST:
3334
0
    if (IS_NULL(ND_CDR(node))) {
3335
0
      r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3336
0
    }
3337
0
    else {
3338
0
      r = get_tree_tail_literal(ND_CDR(node), rnode, reg, nest_level);
3339
0
      if (r == GET_VALUE_IGNORE) {
3340
0
        r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3341
0
      }
3342
0
    }
3343
0
    break;
3344
3345
0
#ifdef USE_CALL
3346
0
  case ND_CALL:
3347
0
    r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3348
0
    break;
3349
0
#endif
3350
3351
0
  case ND_CTYPE:
3352
0
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) {
3353
0
      r = GET_VALUE_NONE;
3354
0
      break;
3355
0
    }
3356
    /* fall */
3357
0
  case ND_CCLASS:
3358
0
    *rnode = node;
3359
0
    r = GET_VALUE_FOUND;
3360
0
    break;
3361
3362
0
  case ND_STRING:
3363
0
    {
3364
0
      StrNode* sn = STR_(node);
3365
3366
0
      if (sn->end <= sn->s) {
3367
0
        r = GET_VALUE_IGNORE;
3368
0
        break;
3369
0
      }
3370
3371
0
      if (ND_IS_REAL_IGNORECASE(node)) {
3372
0
        r = GET_VALUE_NONE;
3373
0
        break;
3374
0
      }
3375
3376
0
      *rnode = node;
3377
0
      r = GET_VALUE_FOUND;
3378
0
    }
3379
0
    break;
3380
3381
0
  case ND_QUANT:
3382
0
    {
3383
0
      QuantNode* qn = QUANT_(node);
3384
0
      if (qn->lower != 0) {
3385
0
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3386
0
      }
3387
0
      else
3388
0
        r = GET_VALUE_NONE;
3389
0
    }
3390
0
    break;
3391
3392
0
  case ND_BAG:
3393
0
    {
3394
0
      BagNode* en = BAG_(node);
3395
3396
0
      if (en->type == BAG_MEMORY) {
3397
0
        if (ND_IS_MARK1(node))
3398
0
          r = GET_VALUE_NONE;
3399
0
        else {
3400
0
          ND_STATUS_ADD(node, MARK1);
3401
0
          r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3402
0
          ND_STATUS_REMOVE(node, MARK1);
3403
0
        }
3404
0
      }
3405
0
      else {
3406
0
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3407
0
      }
3408
0
    }
3409
0
    break;
3410
3411
0
  case ND_ANCHOR:
3412
0
  case ND_GIMMICK:
3413
0
    r = GET_VALUE_IGNORE;
3414
0
    break;
3415
3416
0
  case ND_ALT:
3417
0
  case ND_BACKREF:
3418
0
  default:
3419
0
    r = GET_VALUE_NONE;
3420
0
    break;
3421
0
  }
3422
3423
0
  return r;
3424
0
}
3425
3426
static int
3427
check_called_node_in_look_behind(Node* node, int not)
3428
0
{
3429
0
  int r;
3430
3431
0
  r = 0;
3432
3433
0
  switch (ND_TYPE(node)) {
3434
0
  case ND_LIST:
3435
0
  case ND_ALT:
3436
0
    do {
3437
0
      r = check_called_node_in_look_behind(ND_CAR(node), not);
3438
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3439
0
    break;
3440
3441
0
  case ND_QUANT:
3442
0
    r = check_called_node_in_look_behind(ND_BODY(node), not);
3443
0
    break;
3444
3445
0
  case ND_BAG:
3446
0
    {
3447
0
      BagNode* en = BAG_(node);
3448
3449
0
      if (en->type == BAG_MEMORY) {
3450
0
        if (ND_IS_MARK1(node))
3451
0
          return 0;
3452
0
        else {
3453
0
          ND_STATUS_ADD(node, MARK1);
3454
0
          r = check_called_node_in_look_behind(ND_BODY(node), not);
3455
0
          ND_STATUS_REMOVE(node, MARK1);
3456
0
        }
3457
0
      }
3458
0
      else {
3459
0
        r = check_called_node_in_look_behind(ND_BODY(node), not);
3460
0
        if (r == 0 && en->type == BAG_IF_ELSE) {
3461
0
          if (IS_NOT_NULL(en->te.Then)) {
3462
0
            r = check_called_node_in_look_behind(en->te.Then, not);
3463
0
            if (r != 0) break;
3464
0
          }
3465
0
          if (IS_NOT_NULL(en->te.Else)) {
3466
0
            r = check_called_node_in_look_behind(en->te.Else, not);
3467
0
          }
3468
0
        }
3469
0
      }
3470
0
    }
3471
0
    break;
3472
3473
0
  case ND_ANCHOR:
3474
0
    if (IS_NOT_NULL(ND_BODY(node)))
3475
0
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3476
0
    break;
3477
3478
0
  case ND_GIMMICK:
3479
0
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3480
0
      return 1;
3481
0
    break;
3482
3483
0
  default:
3484
0
    break;
3485
0
  }
3486
3487
0
  return r;
3488
0
}
3489
3490
/* allowed node types in look-behind */
3491
#define ALLOWED_TYPE_IN_LB \
3492
0
  ( ND_BIT_LIST | ND_BIT_ALT | ND_BIT_STRING | ND_BIT_CCLASS \
3493
0
  | ND_BIT_CTYPE | ND_BIT_ANCHOR | ND_BIT_BAG | ND_BIT_QUANT \
3494
0
  | ND_BIT_CALL | ND_BIT_BACKREF | ND_BIT_GIMMICK)
3495
3496
0
#define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3497
0
#define ALLOWED_BAG_IN_LB_NOT   ( 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3498
3499
#define ALLOWED_ANCHOR_IN_LB \
3500
0
  ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
3501
0
  | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
3502
0
  | ANCR_WORD_BEGIN | ANCR_WORD_END \
3503
0
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3504
3505
#define ALLOWED_ANCHOR_IN_LB_NOT \
3506
0
  ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
3507
0
  | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
3508
0
  | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
3509
0
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3510
3511
3512
static int
3513
check_node_in_look_behind(Node* node, int not, int* used)
3514
0
{
3515
0
  static unsigned int
3516
0
    bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT };
3517
3518
0
  static unsigned int
3519
0
    anchor_mask[2] = { ALLOWED_ANCHOR_IN_LB, ALLOWED_ANCHOR_IN_LB_NOT };
3520
3521
0
  NodeType type;
3522
0
  int r = 0;
3523
3524
0
  type = ND_TYPE(node);
3525
0
  if ((ND_TYPE2BIT(type) & ALLOWED_TYPE_IN_LB) == 0)
3526
0
    return 1;
3527
3528
0
  switch (type) {
3529
0
  case ND_LIST:
3530
0
  case ND_ALT:
3531
0
    do {
3532
0
      r = check_node_in_look_behind(ND_CAR(node), not, used);
3533
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3534
0
    break;
3535
3536
0
  case ND_QUANT:
3537
0
    r = check_node_in_look_behind(ND_BODY(node), not, used);
3538
0
    break;
3539
3540
0
  case ND_BAG:
3541
0
    {
3542
0
      BagNode* en = BAG_(node);
3543
0
      if (((1<<en->type) & bag_mask[not]) == 0)
3544
0
        return 1;
3545
3546
0
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3547
0
      if (r != 0) break;
3548
3549
0
      if (en->type == BAG_MEMORY) {
3550
0
        if (ND_IS_BACKREF(node) || ND_IS_CALLED(node)
3551
0
         || ND_IS_REFERENCED(node))
3552
0
          *used = TRUE;
3553
0
      }
3554
0
      else if (en->type == BAG_IF_ELSE) {
3555
0
        if (IS_NOT_NULL(en->te.Then)) {
3556
0
          r = check_node_in_look_behind(en->te.Then, not, used);
3557
0
          if (r != 0) break;
3558
0
        }
3559
0
        if (IS_NOT_NULL(en->te.Else)) {
3560
0
          r = check_node_in_look_behind(en->te.Else, not, used);
3561
0
        }
3562
0
      }
3563
0
    }
3564
0
    break;
3565
3566
0
  case ND_ANCHOR:
3567
0
    type = ANCHOR_(node)->type;
3568
0
    if ((type & anchor_mask[not]) == 0)
3569
0
      return 1;
3570
3571
0
    if (IS_NOT_NULL(ND_BODY(node)))
3572
0
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3573
0
    break;
3574
3575
0
  case ND_GIMMICK:
3576
0
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3577
0
      return 1;
3578
3579
0
    {
3580
0
      GimmickNode* g = GIMMICK_(node);
3581
0
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
3582
0
        *used = TRUE;
3583
0
    }
3584
0
    break;
3585
3586
0
  case ND_CALL:
3587
0
    if (ND_IS_RECURSION(node)) {
3588
      /* fix: Issue 38040 in oss-fuzz */
3589
      /* This node should be removed before recursive call check. */
3590
0
      *used = TRUE;
3591
0
    }
3592
0
    else
3593
0
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3594
0
    break;
3595
3596
0
  default:
3597
0
    break;
3598
0
  }
3599
0
  return r;
3600
0
}
3601
3602
static OnigLen
3603
node_min_byte_len(Node* node, ParseEnv* env)
3604
0
{
3605
0
  OnigLen len;
3606
0
  OnigLen tmin;
3607
3608
0
  len = 0;
3609
0
  switch (ND_TYPE(node)) {
3610
0
  case ND_BACKREF:
3611
0
    if (! ND_IS_CHECKER(node)) {
3612
0
      int i;
3613
0
      int* backs;
3614
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3615
0
      BackRefNode* br = BACKREF_(node);
3616
0
      if (ND_IS_RECURSION(node)) break;
3617
3618
0
      backs = BACKREFS_P(br);
3619
0
      len = node_min_byte_len(mem_env[backs[0]].mem_node, env);
3620
0
      for (i = 1; i < br->back_num; i++) {
3621
0
        tmin = node_min_byte_len(mem_env[backs[i]].mem_node, env);
3622
0
        if (len > tmin) len = tmin;
3623
0
      }
3624
0
    }
3625
0
    break;
3626
3627
0
#ifdef USE_CALL
3628
0
  case ND_CALL:
3629
0
    {
3630
0
      Node* t = ND_BODY(node);
3631
0
      if (ND_IS_FIXED_MIN(t))
3632
0
        len = BAG_(t)->min_len;
3633
0
      else
3634
0
        len = node_min_byte_len(t, env);
3635
0
    }
3636
0
    break;
3637
0
#endif
3638
3639
0
  case ND_LIST:
3640
0
    do {
3641
0
      tmin = node_min_byte_len(ND_CAR(node), env);
3642
0
      len = distance_add(len, tmin);
3643
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3644
0
    break;
3645
3646
0
  case ND_ALT:
3647
0
    {
3648
0
      Node *x, *y;
3649
0
      y = node;
3650
0
      do {
3651
0
        x = ND_CAR(y);
3652
0
        tmin = node_min_byte_len(x, env);
3653
0
        if (y == node) len = tmin;
3654
0
        else if (len > tmin) len = tmin;
3655
0
      } while (IS_NOT_NULL(y = ND_CDR(y)));
3656
0
    }
3657
0
    break;
3658
3659
0
  case ND_STRING:
3660
0
    {
3661
0
      StrNode* sn = STR_(node);
3662
0
      len = (int )(sn->end - sn->s);
3663
0
    }
3664
0
    break;
3665
3666
0
  case ND_CTYPE:
3667
0
  case ND_CCLASS:
3668
0
    len = ONIGENC_MBC_MINLEN(env->enc);
3669
0
    break;
3670
3671
0
  case ND_QUANT:
3672
0
    {
3673
0
      QuantNode* qn = QUANT_(node);
3674
3675
0
      if (qn->lower > 0) {
3676
0
        len = node_min_byte_len(ND_BODY(node), env);
3677
0
        len = distance_multiply(len, qn->lower);
3678
0
      }
3679
0
    }
3680
0
    break;
3681
3682
0
  case ND_BAG:
3683
0
    {
3684
0
      BagNode* en = BAG_(node);
3685
0
      switch (en->type) {
3686
0
      case BAG_MEMORY:
3687
0
        if (ND_IS_FIXED_MIN(node))
3688
0
          len = en->min_len;
3689
0
        else {
3690
0
          if (ND_IS_MARK1(node))
3691
0
            len = 0;  /* recursive */
3692
0
          else {
3693
0
            ND_STATUS_ADD(node, MARK1);
3694
0
            len = node_min_byte_len(ND_BODY(node), env);
3695
0
            ND_STATUS_REMOVE(node, MARK1);
3696
3697
0
            en->min_len = len;
3698
0
            ND_STATUS_ADD(node, FIXED_MIN);
3699
0
          }
3700
0
        }
3701
0
        break;
3702
3703
0
      case BAG_OPTION:
3704
0
      case BAG_STOP_BACKTRACK:
3705
0
        len = node_min_byte_len(ND_BODY(node), env);
3706
0
        break;
3707
0
      case BAG_IF_ELSE:
3708
0
        {
3709
0
          OnigLen elen;
3710
3711
0
          len = node_min_byte_len(ND_BODY(node), env);
3712
0
          if (IS_NOT_NULL(en->te.Then))
3713
0
            len += node_min_byte_len(en->te.Then, env);
3714
0
          if (IS_NOT_NULL(en->te.Else))
3715
0
            elen = node_min_byte_len(en->te.Else, env);
3716
0
          else elen = 0;
3717
3718
0
          if (elen < len) len = elen;
3719
0
        }
3720
0
        break;
3721
0
      }
3722
0
    }
3723
0
    break;
3724
3725
0
  case ND_GIMMICK:
3726
0
    {
3727
0
      GimmickNode* g = GIMMICK_(node);
3728
0
      if (g->type == GIMMICK_FAIL) {
3729
0
        len = INFINITE_LEN;
3730
0
        break;
3731
0
      }
3732
0
    }
3733
    /* fall */
3734
0
  case ND_ANCHOR:
3735
0
  default:
3736
0
    break;
3737
0
  }
3738
3739
0
  return len;
3740
0
}
3741
3742
static int
3743
check_backrefs(Node* node, ParseEnv* env)
3744
0
{
3745
0
  int r;
3746
3747
0
  switch (ND_TYPE(node)) {
3748
0
  case ND_LIST:
3749
0
  case ND_ALT:
3750
0
    do {
3751
0
      r = check_backrefs(ND_CAR(node), env);
3752
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3753
0
    break;
3754
3755
0
  case ND_ANCHOR:
3756
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3757
0
      r = 0;
3758
0
      break;
3759
0
    }
3760
    /* fall */
3761
0
  case ND_QUANT:
3762
0
    r = check_backrefs(ND_BODY(node), env);
3763
0
    break;
3764
3765
0
  case ND_BAG:
3766
0
    r = check_backrefs(ND_BODY(node), env);
3767
0
    {
3768
0
      BagNode* en = BAG_(node);
3769
3770
0
      if (en->type == BAG_IF_ELSE) {
3771
0
        if (r != 0) return r;
3772
0
        if (IS_NOT_NULL(en->te.Then)) {
3773
0
          r = check_backrefs(en->te.Then, env);
3774
0
          if (r != 0) return r;
3775
0
        }
3776
0
        if (IS_NOT_NULL(en->te.Else)) {
3777
0
          r = check_backrefs(en->te.Else, env);
3778
0
        }
3779
0
      }
3780
0
    }
3781
0
    break;
3782
3783
0
  case ND_BACKREF:
3784
0
    {
3785
0
      int i;
3786
0
      BackRefNode* br = BACKREF_(node);
3787
0
      int* backs = BACKREFS_P(br);
3788
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3789
3790
0
      for (i = 0; i < br->back_num; i++) {
3791
0
        if (backs[i] > env->num_mem)
3792
0
          return ONIGERR_INVALID_BACKREF;
3793
3794
0
        ND_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
3795
0
      }
3796
0
      r = 0;
3797
0
    }
3798
0
    break;
3799
3800
0
  default:
3801
0
    r = 0;
3802
0
    break;
3803
0
  }
3804
3805
0
  return r;
3806
0
}
3807
3808
static int
3809
set_empty_repeat_node_trav(Node* node, Node* empty, ParseEnv* env)
3810
0
{
3811
0
  int r;
3812
3813
0
  switch (ND_TYPE(node)) {
3814
0
  case ND_LIST:
3815
0
  case ND_ALT:
3816
0
    do {
3817
0
      r = set_empty_repeat_node_trav(ND_CAR(node), empty, env);
3818
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3819
0
    break;
3820
3821
0
  case ND_ANCHOR:
3822
0
    {
3823
0
      AnchorNode* an = ANCHOR_(node);
3824
3825
0
      if (! ANCHOR_HAS_BODY(an)) {
3826
0
        r = 0;
3827
0
        break;
3828
0
      }
3829
3830
0
      switch (an->type) {
3831
0
      case ANCR_PREC_READ:
3832
0
      case ANCR_LOOK_BEHIND:
3833
0
        empty = NULL_NODE;
3834
0
        break;
3835
0
      default:
3836
0
        break;
3837
0
      }
3838
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3839
0
    }
3840
0
    break;
3841
3842
0
  case ND_QUANT:
3843
0
    {
3844
0
      QuantNode* qn = QUANT_(node);
3845
3846
0
      if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
3847
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3848
0
    }
3849
0
    break;
3850
3851
0
  case ND_BAG:
3852
0
    if (IS_NOT_NULL(ND_BODY(node))) {
3853
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3854
0
      if (r != 0) return r;
3855
0
    }
3856
0
    {
3857
0
      BagNode* en = BAG_(node);
3858
3859
0
      r = 0;
3860
0
      if (en->type == BAG_MEMORY) {
3861
0
        if (ND_IS_BACKREF(node)) {
3862
0
          if (IS_NOT_NULL(empty))
3863
0
            PARSEENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
3864
0
        }
3865
0
      }
3866
0
      else if (en->type == BAG_IF_ELSE) {
3867
0
        if (IS_NOT_NULL(en->te.Then)) {
3868
0
          r = set_empty_repeat_node_trav(en->te.Then, empty, env);
3869
0
          if (r != 0) return r;
3870
0
        }
3871
0
        if (IS_NOT_NULL(en->te.Else)) {
3872
0
          r = set_empty_repeat_node_trav(en->te.Else, empty, env);
3873
0
        }
3874
0
      }
3875
0
    }
3876
0
    break;
3877
3878
0
  default:
3879
0
    r = 0;
3880
0
    break;
3881
0
  }
3882
3883
0
  return r;
3884
0
}
3885
3886
static int
3887
is_ancestor_node(Node* node, Node* me)
3888
0
{
3889
0
  Node* parent;
3890
3891
0
  while ((parent = ND_PARENT(me)) != NULL_NODE) {
3892
0
    if (parent == node) return 1;
3893
0
    me = parent;
3894
0
  }
3895
0
  return 0;
3896
0
}
3897
3898
static void
3899
set_empty_status_check_trav(Node* node, ParseEnv* env)
3900
0
{
3901
0
  switch (ND_TYPE(node)) {
3902
0
  case ND_LIST:
3903
0
  case ND_ALT:
3904
0
    do {
3905
0
      set_empty_status_check_trav(ND_CAR(node), env);
3906
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3907
0
    break;
3908
3909
0
  case ND_ANCHOR:
3910
0
    {
3911
0
      AnchorNode* an = ANCHOR_(node);
3912
3913
0
      if (! ANCHOR_HAS_BODY(an)) break;
3914
0
      set_empty_status_check_trav(ND_BODY(node), env);
3915
0
    }
3916
0
    break;
3917
3918
0
  case ND_QUANT:
3919
0
    set_empty_status_check_trav(ND_BODY(node), env);
3920
0
    break;
3921
3922
0
  case ND_BAG:
3923
0
    if (IS_NOT_NULL(ND_BODY(node)))
3924
0
      set_empty_status_check_trav(ND_BODY(node), env);
3925
0
    {
3926
0
      BagNode* en = BAG_(node);
3927
3928
0
      if (en->type == BAG_IF_ELSE) {
3929
0
        if (IS_NOT_NULL(en->te.Then)) {
3930
0
          set_empty_status_check_trav(en->te.Then, env);
3931
0
        }
3932
0
        if (IS_NOT_NULL(en->te.Else)) {
3933
0
          set_empty_status_check_trav(en->te.Else, env);
3934
0
        }
3935
0
      }
3936
0
    }
3937
0
    break;
3938
3939
0
  case ND_BACKREF:
3940
0
    {
3941
0
      int i;
3942
0
      int* backs;
3943
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3944
0
      BackRefNode* br = BACKREF_(node);
3945
0
      backs = BACKREFS_P(br);
3946
0
      for (i = 0; i < br->back_num; i++) {
3947
0
        Node* ernode = mem_env[backs[i]].empty_repeat_node;
3948
0
        if (IS_NOT_NULL(ernode)) {
3949
0
          if (! is_ancestor_node(ernode, node)) {
3950
0
            MEM_STATUS_LIMIT_ON(QUANT_(ernode)->empty_status_mem, backs[i]);
3951
0
            ND_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
3952
0
            ND_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
3953
0
          }
3954
0
        }
3955
0
      }
3956
0
    }
3957
0
    break;
3958
3959
0
  default:
3960
0
    break;
3961
0
  }
3962
0
}
3963
3964
static void
3965
set_parent_node_trav(Node* node, Node* parent)
3966
0
{
3967
0
  ND_PARENT(node) = parent;
3968
3969
0
  switch (ND_TYPE(node)) {
3970
0
  case ND_LIST:
3971
0
  case ND_ALT:
3972
0
    do {
3973
0
      set_parent_node_trav(ND_CAR(node), node);
3974
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3975
0
    break;
3976
3977
0
  case ND_ANCHOR:
3978
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
3979
0
    set_parent_node_trav(ND_BODY(node), node);
3980
0
    break;
3981
3982
0
  case ND_QUANT:
3983
0
    set_parent_node_trav(ND_BODY(node), node);
3984
0
    break;
3985
3986
0
  case ND_BAG:
3987
0
    if (IS_NOT_NULL(ND_BODY(node)))
3988
0
      set_parent_node_trav(ND_BODY(node), node);
3989
0
    {
3990
0
      BagNode* en = BAG_(node);
3991
3992
0
      if (en->type == BAG_IF_ELSE) {
3993
0
        if (IS_NOT_NULL(en->te.Then))
3994
0
          set_parent_node_trav(en->te.Then, node);
3995
0
        if (IS_NOT_NULL(en->te.Else)) {
3996
0
          set_parent_node_trav(en->te.Else, node);
3997
0
        }
3998
0
      }
3999
0
    }
4000
0
    break;
4001
4002
0
  default:
4003
0
    break;
4004
0
  }
4005
0
}
4006
4007
4008
#ifdef USE_CALL
4009
4010
0
#define RECURSION_EXIST        (1<<0)
4011
0
#define RECURSION_MUST         (1<<1)
4012
0
#define RECURSION_INFINITE     (1<<2)
4013
4014
static int
4015
infinite_recursive_call_check(Node* node, ParseEnv* env, int head)
4016
0
{
4017
0
  int ret;
4018
0
  int r = 0;
4019
4020
0
  switch (ND_TYPE(node)) {
4021
0
  case ND_LIST:
4022
0
    {
4023
0
      Node *x;
4024
0
      OnigLen min;
4025
4026
0
      x = node;
4027
0
      do {
4028
0
        ret = infinite_recursive_call_check(ND_CAR(x), env, head);
4029
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4030
0
        r |= ret;
4031
0
        if (head != 0) {
4032
0
          min = node_min_byte_len(ND_CAR(x), env);
4033
0
          if (min != 0) head = 0;
4034
0
        }
4035
0
      } while (IS_NOT_NULL(x = ND_CDR(x)));
4036
0
    }
4037
0
    break;
4038
4039
0
  case ND_ALT:
4040
0
    {
4041
0
      int must;
4042
4043
0
      must = RECURSION_MUST;
4044
0
      do {
4045
0
        ret = infinite_recursive_call_check(ND_CAR(node), env, head);
4046
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4047
4048
0
        r    |= (ret & RECURSION_EXIST);
4049
0
        must &= ret;
4050
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4051
0
      r |= must;
4052
0
    }
4053
0
    break;
4054
4055
0
  case ND_QUANT:
4056
0
    if (QUANT_(node)->upper == 0) break;
4057
4058
0
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4059
0
    if (r < 0) return r;
4060
0
    if ((r & RECURSION_MUST) != 0) {
4061
0
      if (QUANT_(node)->lower == 0)
4062
0
        r &= ~RECURSION_MUST;
4063
0
    }
4064
0
    break;
4065
4066
0
  case ND_ANCHOR:
4067
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
4068
0
      break;
4069
    /* fall */
4070
0
  case ND_CALL:
4071
0
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4072
0
    break;
4073
4074
0
  case ND_BAG:
4075
0
    {
4076
0
      BagNode* en = BAG_(node);
4077
4078
0
      if (en->type == BAG_MEMORY) {
4079
0
        if (ND_IS_MARK2(node))
4080
0
          return 0;
4081
0
        else if (ND_IS_MARK1(node))
4082
0
          return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
4083
0
                  : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
4084
0
        else {
4085
0
          ND_STATUS_ADD(node, MARK2);
4086
0
          r = infinite_recursive_call_check(ND_BODY(node), env, head);
4087
0
          ND_STATUS_REMOVE(node, MARK2);
4088
0
        }
4089
0
      }
4090
0
      else if (en->type == BAG_IF_ELSE) {
4091
0
        int eret;
4092
4093
0
        ret = infinite_recursive_call_check(ND_BODY(node), env, head);
4094
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4095
0
        r |= ret;
4096
0
        if (IS_NOT_NULL(en->te.Then)) {
4097
0
          OnigLen min;
4098
0
          if (head != 0) {
4099
0
            min = node_min_byte_len(ND_BODY(node), env);
4100
0
          }
4101
0
          else min = 0;
4102
4103
0
          ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
4104
0
          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4105
0
          r |= ret;
4106
0
        }
4107
0
        if (IS_NOT_NULL(en->te.Else)) {
4108
0
          eret = infinite_recursive_call_check(en->te.Else, env, head);
4109
0
          if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
4110
0
          r |= (eret & RECURSION_EXIST);
4111
0
          if ((eret & RECURSION_MUST) == 0)
4112
0
            r &= ~RECURSION_MUST;
4113
0
        }
4114
0
        else {
4115
0
          r &= ~RECURSION_MUST;
4116
0
        }
4117
0
      }
4118
0
      else {
4119
0
        r = infinite_recursive_call_check(ND_BODY(node), env, head);
4120
0
      }
4121
0
    }
4122
0
    break;
4123
4124
0
  default:
4125
0
    break;
4126
0
  }
4127
4128
0
  return r;
4129
0
}
4130
4131
static int
4132
infinite_recursive_call_check_trav(Node* node, ParseEnv* env)
4133
0
{
4134
0
  int r;
4135
4136
0
  switch (ND_TYPE(node)) {
4137
0
  case ND_LIST:
4138
0
  case ND_ALT:
4139
0
    do {
4140
0
      r = infinite_recursive_call_check_trav(ND_CAR(node), env);
4141
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4142
0
    break;
4143
4144
0
  case ND_ANCHOR:
4145
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4146
0
      r = 0;
4147
0
      break;
4148
0
    }
4149
    /* fall */
4150
0
  case ND_QUANT:
4151
0
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4152
0
    break;
4153
4154
0
  case ND_BAG:
4155
0
    {
4156
0
      BagNode* en = BAG_(node);
4157
4158
0
      if (en->type == BAG_MEMORY) {
4159
0
        if (ND_IS_RECURSION(node) && ND_IS_CALLED(node)) {
4160
0
          int ret;
4161
4162
0
          ND_STATUS_ADD(node, MARK1);
4163
4164
0
          ret = infinite_recursive_call_check(ND_BODY(node), env, 1);
4165
0
          if (ret < 0) return ret;
4166
0
          else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
4167
0
            return ONIGERR_NEVER_ENDING_RECURSION;
4168
4169
0
          ND_STATUS_REMOVE(node, MARK1);
4170
0
        }
4171
0
      }
4172
0
      else if (en->type == BAG_IF_ELSE) {
4173
0
        if (IS_NOT_NULL(en->te.Then)) {
4174
0
          r = infinite_recursive_call_check_trav(en->te.Then, env);
4175
0
          if (r != 0) return r;
4176
0
        }
4177
0
        if (IS_NOT_NULL(en->te.Else)) {
4178
0
          r = infinite_recursive_call_check_trav(en->te.Else, env);
4179
0
          if (r != 0) return r;
4180
0
        }
4181
0
      }
4182
0
    }
4183
4184
0
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4185
0
    break;
4186
4187
0
  default:
4188
0
    r = 0;
4189
0
    break;
4190
0
  }
4191
4192
0
  return r;
4193
0
}
4194
4195
static int
4196
recursive_call_check(Node* node)
4197
0
{
4198
0
  int r;
4199
4200
0
  switch (ND_TYPE(node)) {
4201
0
  case ND_LIST:
4202
0
  case ND_ALT:
4203
0
    r = 0;
4204
0
    do {
4205
0
      r |= recursive_call_check(ND_CAR(node));
4206
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4207
0
    break;
4208
4209
0
  case ND_ANCHOR:
4210
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4211
0
      r = 0;
4212
0
      break;
4213
0
    }
4214
    /* fall */
4215
0
  case ND_QUANT:
4216
0
    r = recursive_call_check(ND_BODY(node));
4217
0
    break;
4218
4219
0
  case ND_CALL:
4220
0
    r = recursive_call_check(ND_BODY(node));
4221
0
    if (r != 0) {
4222
0
      if (ND_IS_MARK1(ND_BODY(node)))
4223
0
        ND_STATUS_ADD(node, RECURSION);
4224
0
    }
4225
0
    break;
4226
4227
0
  case ND_BAG:
4228
0
    {
4229
0
      BagNode* en = BAG_(node);
4230
4231
0
      if (en->type == BAG_MEMORY) {
4232
0
        if (ND_IS_MARK2(node))
4233
0
          return 0;
4234
0
        else if (ND_IS_MARK1(node))
4235
0
          return 1; /* recursion */
4236
0
        else {
4237
0
          ND_STATUS_ADD(node, MARK2);
4238
0
          r = recursive_call_check(ND_BODY(node));
4239
0
          ND_STATUS_REMOVE(node, MARK2);
4240
0
        }
4241
0
      }
4242
0
      else if (en->type == BAG_IF_ELSE) {
4243
0
        r = 0;
4244
0
        if (IS_NOT_NULL(en->te.Then)) {
4245
0
          r |= recursive_call_check(en->te.Then);
4246
0
        }
4247
0
        if (IS_NOT_NULL(en->te.Else)) {
4248
0
          r |= recursive_call_check(en->te.Else);
4249
0
        }
4250
0
        r |= recursive_call_check(ND_BODY(node));
4251
0
      }
4252
0
      else {
4253
0
        r = recursive_call_check(ND_BODY(node));
4254
0
      }
4255
0
    }
4256
0
    break;
4257
4258
0
  default:
4259
0
    r = 0;
4260
0
    break;
4261
0
  }
4262
4263
0
  return r;
4264
0
}
4265
4266
0
#define IN_RECURSION         (1<<0)
4267
0
#define FOUND_CALLED_NODE    1
4268
4269
static int
4270
recursive_call_check_trav(Node* node, ParseEnv* env, int state)
4271
0
{
4272
0
  int r = 0;
4273
4274
0
  switch (ND_TYPE(node)) {
4275
0
  case ND_LIST:
4276
0
  case ND_ALT:
4277
0
    {
4278
0
      int ret;
4279
0
      do {
4280
0
        ret = recursive_call_check_trav(ND_CAR(node), env, state);
4281
0
        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
4282
0
        else if (ret < 0) return ret;
4283
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4284
0
    }
4285
0
    break;
4286
4287
0
  case ND_QUANT:
4288
0
    r = recursive_call_check_trav(ND_BODY(node), env, state);
4289
0
    if (QUANT_(node)->upper == 0) {
4290
0
      if (r == FOUND_CALLED_NODE)
4291
0
        QUANT_(node)->include_referred = 1;
4292
0
    }
4293
0
    break;
4294
4295
0
  case ND_ANCHOR:
4296
0
    {
4297
0
      AnchorNode* an = ANCHOR_(node);
4298
0
      if (ANCHOR_HAS_BODY(an))
4299
0
        r = recursive_call_check_trav(ND_ANCHOR_BODY(an), env, state);
4300
0
    }
4301
0
    break;
4302
4303
0
  case ND_BAG:
4304
0
    {
4305
0
      int ret;
4306
0
      int state1;
4307
0
      BagNode* en = BAG_(node);
4308
4309
0
      if (en->type == BAG_MEMORY) {
4310
0
        if (ND_IS_CALLED(node)) {
4311
0
          r = FOUND_CALLED_NODE;
4312
0
          goto check_recursion;
4313
0
        }
4314
0
        else if ((state & IN_RECURSION) != 0) {
4315
0
        check_recursion:
4316
0
          if (! ND_IS_RECURSION(node)) {
4317
0
            ND_STATUS_ADD(node, MARK1);
4318
0
            ret = recursive_call_check(ND_BODY(node));
4319
0
            if (ret != 0) {
4320
0
              ND_STATUS_ADD(node, RECURSION);
4321
0
              MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
4322
0
            }
4323
0
            ND_STATUS_REMOVE(node, MARK1);
4324
0
          }
4325
0
        }
4326
0
      }
4327
4328
0
      state1 = state;
4329
0
      if (ND_IS_RECURSION(node))
4330
0
        state1 |= IN_RECURSION;
4331
4332
0
      ret = recursive_call_check_trav(ND_BODY(node), env, state1);
4333
0
      if (ret == FOUND_CALLED_NODE)
4334
0
        r = FOUND_CALLED_NODE;
4335
4336
0
      if (en->type == BAG_IF_ELSE) {
4337
0
        if (IS_NOT_NULL(en->te.Then)) {
4338
0
          ret = recursive_call_check_trav(en->te.Then, env, state1);
4339
0
          if (ret == FOUND_CALLED_NODE)
4340
0
            r = FOUND_CALLED_NODE;
4341
0
        }
4342
0
        if (IS_NOT_NULL(en->te.Else)) {
4343
0
          ret = recursive_call_check_trav(en->te.Else, env, state1);
4344
0
          if (ret == FOUND_CALLED_NODE)
4345
0
            r = FOUND_CALLED_NODE;
4346
0
        }
4347
0
      }
4348
0
    }
4349
0
    break;
4350
4351
0
  default:
4352
0
    break;
4353
0
  }
4354
4355
0
  return r;
4356
0
}
4357
4358
#endif
4359
4360
static void
4361
remove_from_list(Node* prev, Node* a)
4362
0
{
4363
0
  if (ND_CDR(prev) != a) return ;
4364
4365
0
  ND_CDR(prev) = ND_CDR(a);
4366
0
  ND_CDR(a) = NULL_NODE;
4367
0
}
4368
4369
static int
4370
reduce_string_list(Node* node, OnigEncoding enc)
4371
0
{
4372
0
  int r = 0;
4373
4374
0
  switch (ND_TYPE(node)) {
4375
0
  case ND_LIST:
4376
0
    {
4377
0
      Node* prev;
4378
0
      Node* curr;
4379
0
      Node* prev_node;
4380
0
      Node* next_node;
4381
4382
0
      prev = NULL_NODE;
4383
0
      do {
4384
0
        next_node = ND_CDR(node);
4385
0
        curr = ND_CAR(node);
4386
0
        if (ND_TYPE(curr) == ND_STRING) {
4387
0
          if (IS_NULL(prev)
4388
0
              || STR_(curr)->flag  != STR_(prev)->flag
4389
0
              || ND_STATUS(curr) != ND_STATUS(prev)) {
4390
0
            prev = curr;
4391
0
            prev_node = node;
4392
0
          }
4393
0
          else {
4394
0
            r = node_str_node_cat(prev, curr);
4395
0
            if (r != 0) return r;
4396
0
            remove_from_list(prev_node, node);
4397
0
            onig_node_free(node);
4398
0
          }
4399
0
        }
4400
0
        else {
4401
0
          if (IS_NOT_NULL(prev)) {
4402
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4403
0
            StrNode* sn = STR_(prev);
4404
0
            if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4405
0
              return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4406
0
#endif
4407
0
            prev = NULL_NODE;
4408
0
          }
4409
0
          r = reduce_string_list(curr, enc);
4410
0
          if (r != 0) return r;
4411
0
          prev_node = node;
4412
0
        }
4413
4414
0
        node = next_node;
4415
0
      } while (r == 0 && IS_NOT_NULL(node));
4416
4417
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4418
0
      if (IS_NOT_NULL(prev)) {
4419
0
        StrNode* sn = STR_(prev);
4420
0
        if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4421
0
          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4422
0
      }
4423
0
#endif
4424
0
    }
4425
0
    break;
4426
4427
0
  case ND_ALT:
4428
0
    do {
4429
0
      r = reduce_string_list(ND_CAR(node), enc);
4430
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4431
0
    break;
4432
4433
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4434
0
  case ND_STRING:
4435
0
    {
4436
0
      StrNode* sn = STR_(node);
4437
0
      if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4438
0
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4439
0
    }
4440
0
    break;
4441
0
#endif
4442
4443
0
  case ND_ANCHOR:
4444
0
    if (IS_NULL(ND_BODY(node)))
4445
0
      break;
4446
    /* fall */
4447
0
  case ND_QUANT:
4448
0
    r = reduce_string_list(ND_BODY(node), enc);
4449
0
    break;
4450
4451
0
  case ND_BAG:
4452
0
    {
4453
0
      BagNode* en = BAG_(node);
4454
4455
0
      r = reduce_string_list(ND_BODY(node), enc);
4456
0
      if (r != 0) return r;
4457
4458
0
      if (en->type == BAG_IF_ELSE) {
4459
0
        if (IS_NOT_NULL(en->te.Then)) {
4460
0
          r = reduce_string_list(en->te.Then, enc);
4461
0
          if (r != 0) return r;
4462
0
        }
4463
0
        if (IS_NOT_NULL(en->te.Else)) {
4464
0
          r = reduce_string_list(en->te.Else, enc);
4465
0
          if (r != 0) return r;
4466
0
        }
4467
0
      }
4468
0
    }
4469
0
    break;
4470
4471
0
  default:
4472
0
    break;
4473
0
  }
4474
4475
0
  return r;
4476
0
}
4477
4478
4479
0
#define IN_ALT          (1<<0)
4480
0
#define IN_NOT          (1<<1)
4481
0
#define IN_REAL_REPEAT  (1<<2)
4482
0
#define IN_VAR_REPEAT   (1<<3)
4483
0
#define IN_ZERO_REPEAT  (1<<4)
4484
0
#define IN_MULTI_ENTRY  (1<<5)
4485
0
#define IN_PREC_READ    (1<<6)
4486
0
#define IN_LOOK_BEHIND  (1<<7)
4487
0
#define IN_PEEK         (1<<8)
4488
4489
/* divide different length alternatives in look-behind.
4490
  (?<=A|B) ==> (?<=A)|(?<=B)
4491
  (?<!A|B) ==> (?<!A)(?<!B)
4492
*/
4493
static int
4494
divide_look_behind_alternatives(Node* node)
4495
0
{
4496
0
  int r;
4497
0
  int anc_type;
4498
0
  Node *head, *np, *insert_node;
4499
0
  AnchorNode* an;
4500
4501
0
  an = ANCHOR_(node);
4502
0
  anc_type = an->type;
4503
4504
0
  head = ND_ANCHOR_BODY(an);
4505
0
  np = ND_CAR(head);
4506
0
  node_swap(node, head);
4507
0
  ND_CAR(node) = head;
4508
0
  ND_BODY(head) = np;
4509
4510
0
  np = node;
4511
0
  while (IS_NOT_NULL(np = ND_CDR(np))) {
4512
0
    r = onig_node_copy(&insert_node, head);
4513
0
    if (r != 0) return r;
4514
0
    CHECK_NULL_RETURN_MEMERR(insert_node);
4515
0
    ND_BODY(insert_node) = ND_CAR(np);
4516
0
    ND_CAR(np) = insert_node;
4517
0
  }
4518
4519
0
  if (anc_type == ANCR_LOOK_BEHIND_NOT