Coverage Report

Created: 2023-11-27 06:38

/src/jq/modules/oniguruma/src/regcomp.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************************
2
  regcomp.c -  Oniguruma (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2023  K.Kosako
6
 * All rights reserved.
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "regparse.h"
31
32
0
#define OPS_INIT_SIZE  8
33
34
#define ND_IS_REAL_IGNORECASE(node) \
35
0
  (ND_IS_IGNORECASE(node) && !ND_STRING_IS_CRUDE(node))
36
37
typedef struct {
38
  OnigLen min;
39
  OnigLen max;
40
} MinMaxLen;
41
42
typedef struct {
43
  OnigLen min;
44
  OnigLen max;
45
  int min_is_sure;
46
} MinMaxCharLen;
47
48
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
49
50
static OnigLen node_min_byte_len(Node* node, ParseEnv* env);
51
52
static int
53
ops_init(regex_t* reg, int init_alloc_size)
54
0
{
55
0
  Operation* p;
56
0
  size_t size;
57
58
0
  if (init_alloc_size <= 0)
59
0
    return ONIGERR_PARSER_BUG;
60
61
0
  size = sizeof(Operation) * init_alloc_size;
62
0
  p = (Operation* )xrealloc(reg->ops, size);
63
0
  CHECK_NULL_RETURN_MEMERR(p);
64
0
  reg->ops = p;
65
0
#ifdef USE_DIRECT_THREADED_CODE
66
0
  {
67
0
    enum OpCode* cp;
68
0
    size = sizeof(enum OpCode) * init_alloc_size;
69
0
    cp = (enum OpCode* )xrealloc(reg->ocs, size);
70
0
    CHECK_NULL_RETURN_MEMERR(cp);
71
0
    reg->ocs = cp;
72
0
  }
73
0
#endif
74
75
0
  reg->ops_curr  = 0; /* !!! not yet done ops_new() */
76
0
  reg->ops_alloc = init_alloc_size;
77
0
  reg->ops_used  = 0;
78
79
0
  return ONIG_NORMAL;
80
0
}
81
82
static int
83
ops_resize(regex_t* reg, int n)
84
0
{
85
0
#ifdef USE_DIRECT_THREADED_CODE
86
0
  enum OpCode* cp;
87
0
#endif
88
0
  Operation* p;
89
0
  size_t size;
90
91
0
  if (n == reg->ops_alloc) return ONIG_NORMAL;
92
0
  if (n <= 0) return ONIGERR_PARSER_BUG;
93
94
0
  size = sizeof(Operation) * n;
95
0
  p = (Operation* )xrealloc(reg->ops, size);
96
0
  CHECK_NULL_RETURN_MEMERR(p);
97
0
  reg->ops = p;
98
99
0
#ifdef USE_DIRECT_THREADED_CODE
100
0
  size = sizeof(enum OpCode) * n;
101
0
  cp = (enum OpCode* )xrealloc(reg->ocs, size);
102
0
  CHECK_NULL_RETURN_MEMERR(cp);
103
0
  reg->ocs = cp;
104
0
#endif
105
106
0
  reg->ops_alloc = n;
107
0
  if (reg->ops_used == 0)
108
0
    reg->ops_curr = 0;
109
0
  else
110
0
    reg->ops_curr = reg->ops + (reg->ops_used - 1);
111
112
0
  return ONIG_NORMAL;
113
0
}
114
115
static int
116
ops_new(regex_t* reg)
117
0
{
118
0
  if (reg->ops_used >= reg->ops_alloc) {
119
0
    int r = ops_resize(reg, reg->ops_alloc << 1);
120
0
    if (r != ONIG_NORMAL) return r;
121
0
  }
122
123
0
  reg->ops_curr = reg->ops + reg->ops_used;
124
0
  reg->ops_used++;
125
126
0
  xmemset(reg->ops_curr, 0, sizeof(Operation));
127
0
  return ONIG_NORMAL;
128
0
}
129
130
static int
131
is_in_string_pool(regex_t* reg, UChar* s)
132
0
{
133
0
  return (s >= reg->string_pool && s < reg->string_pool_end);
134
0
}
135
136
static void
137
ops_free(regex_t* reg)
138
0
{
139
0
  int i;
140
141
0
  if (IS_NULL(reg->ops)) return ;
142
143
0
  for (i = 0; i < (int )reg->ops_used; i++) {
144
0
    enum OpCode opcode;
145
0
    Operation* op;
146
147
0
    op = reg->ops + i;
148
149
0
#ifdef USE_DIRECT_THREADED_CODE
150
0
    opcode = *(reg->ocs + i);
151
#else
152
    opcode = op->opcode;
153
#endif
154
155
0
    switch (opcode) {
156
0
    case OP_STR_MBN:
157
0
      if (! is_in_string_pool(reg, op->exact_len_n.s))
158
0
        xfree(op->exact_len_n.s);
159
0
      break;
160
0
    case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N:
161
0
      if (! is_in_string_pool(reg, op->exact_n.s))
162
0
        xfree(op->exact_n.s);
163
0
      break;
164
0
    case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
165
0
    case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
166
0
    case OP_STR_MB2N3:
167
0
      break;
168
169
0
    case OP_CCLASS_NOT: case OP_CCLASS:
170
0
      xfree(op->cclass.bsp);
171
0
      break;
172
173
0
    case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
174
0
      xfree(op->cclass_mb.mb);
175
0
      break;
176
0
    case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
177
0
      xfree(op->cclass_mix.mb);
178
0
      xfree(op->cclass_mix.bsp);
179
0
      break;
180
181
0
    case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
182
0
      break;
183
0
    case OP_BACKREF_MULTI:      case OP_BACKREF_MULTI_IC:
184
0
    case OP_BACKREF_CHECK:
185
0
#ifdef USE_BACKREF_WITH_LEVEL
186
0
    case OP_BACKREF_WITH_LEVEL:
187
0
    case OP_BACKREF_WITH_LEVEL_IC:
188
0
    case OP_BACKREF_CHECK_WITH_LEVEL:
189
0
#endif
190
0
      if (op->backref_general.num != 1)
191
0
        xfree(op->backref_general.ns);
192
0
      break;
193
194
0
    default:
195
0
      break;
196
0
    }
197
0
  }
198
199
0
  xfree(reg->ops);
200
0
#ifdef USE_DIRECT_THREADED_CODE
201
0
  xfree(reg->ocs);
202
0
  reg->ocs = 0;
203
0
#endif
204
205
0
  reg->ops = 0;
206
0
  reg->ops_curr  = 0;
207
0
  reg->ops_alloc = 0;
208
0
  reg->ops_used  = 0;
209
0
}
210
211
static int
212
ops_calc_size_of_string_pool(regex_t* reg)
213
0
{
214
0
  int i;
215
0
  int total;
216
217
0
  if (IS_NULL(reg->ops)) return 0;
218
219
0
  total = 0;
220
0
  for (i = 0; i < (int )reg->ops_used; i++) {
221
0
    enum OpCode opcode;
222
0
    Operation* op;
223
224
0
    op = reg->ops + i;
225
0
#ifdef USE_DIRECT_THREADED_CODE
226
0
    opcode = *(reg->ocs + i);
227
#else
228
    opcode = op->opcode;
229
#endif
230
231
0
    switch (opcode) {
232
0
    case OP_STR_MBN:
233
0
      total += op->exact_len_n.len * op->exact_len_n.n;
234
0
      break;
235
0
    case OP_STR_N:
236
0
    case OP_STR_MB2N:
237
0
      total += op->exact_n.n * 2;
238
0
      break;
239
0
    case OP_STR_MB3N:
240
0
      total += op->exact_n.n * 3;
241
0
      break;
242
243
0
    default:
244
0
      break;
245
0
    }
246
0
  }
247
248
0
  return total;
249
0
}
250
251
static int
252
ops_make_string_pool(regex_t* reg)
253
0
{
254
0
  int i;
255
0
  int len;
256
0
  int size;
257
0
  UChar* pool;
258
0
  UChar* curr;
259
260
0
  size = ops_calc_size_of_string_pool(reg);
261
0
  if (size <= 0) {
262
0
    return 0;
263
0
  }
264
265
0
  curr = pool = (UChar* )xmalloc((size_t )size);
266
0
  CHECK_NULL_RETURN_MEMERR(pool);
267
268
0
  for (i = 0; i < (int )reg->ops_used; i++) {
269
0
    enum OpCode opcode;
270
0
    Operation* op;
271
272
0
    op = reg->ops + i;
273
0
#ifdef USE_DIRECT_THREADED_CODE
274
0
    opcode = *(reg->ocs + i);
275
#else
276
    opcode = op->opcode;
277
#endif
278
279
0
    switch (opcode) {
280
0
    case OP_STR_MBN:
281
0
      len = op->exact_len_n.len * op->exact_len_n.n;
282
0
      xmemcpy(curr, op->exact_len_n.s, len);
283
0
      xfree(op->exact_len_n.s);
284
0
      op->exact_len_n.s = curr;
285
0
      curr += len;
286
0
      break;
287
0
    case OP_STR_N:
288
0
      len = op->exact_n.n;
289
0
    copy:
290
0
      xmemcpy(curr, op->exact_n.s, len);
291
0
      xfree(op->exact_n.s);
292
0
      op->exact_n.s = curr;
293
0
      curr += len;
294
0
      break;
295
0
    case OP_STR_MB2N:
296
0
      len = op->exact_n.n * 2;
297
0
      goto copy;
298
0
      break;
299
0
    case OP_STR_MB3N:
300
0
      len = op->exact_n.n * 3;
301
0
      goto copy;
302
0
      break;
303
304
0
    default:
305
0
      break;
306
0
    }
307
0
  }
308
309
0
  reg->string_pool     = pool;
310
0
  reg->string_pool_end = pool + size;
311
0
  return 0;
312
0
}
313
314
extern OnigCaseFoldType
315
onig_get_default_case_fold_flag(void)
316
0
{
317
0
  return OnigDefaultCaseFoldFlag;
318
0
}
319
320
extern int
321
onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
322
0
{
323
0
  OnigDefaultCaseFoldFlag = case_fold_flag;
324
0
  return 0;
325
0
}
326
327
static int
328
len_multiply_cmp(OnigLen x, int y, OnigLen v)
329
0
{
330
0
  if (x == 0 || y == 0) return -1;
331
332
0
  if (x < INFINITE_LEN / y) {
333
0
    OnigLen xy = x * (OnigLen )y;
334
0
    if (xy > v) return 1;
335
0
    else {
336
0
      if (xy == v) return 0;
337
0
      else return -1;
338
0
    }
339
0
  }
340
0
  else
341
0
    return v == INFINITE_LEN ? 0 : 1;
342
0
}
343
344
extern int
345
onig_positive_int_multiply(int x, int y)
346
0
{
347
0
  if (x == 0 || y == 0) return 0;
348
349
0
  if (x < ONIG_INT_MAX / y)
350
0
    return x * y;
351
0
  else
352
0
    return -1;
353
0
}
354
355
356
static void
357
node_swap(Node* a, Node* b)
358
0
{
359
0
  Node c;
360
361
0
  c = *a; *a = *b; *b = c;
362
363
0
  if (ND_TYPE(a) == ND_STRING) {
364
0
    StrNode* sn = STR_(a);
365
0
    if (sn->capacity == 0) {
366
0
      int len = (int )(sn->end - sn->s);
367
0
      sn->s   = sn->buf;
368
0
      sn->end = sn->s + len;
369
0
    }
370
0
  }
371
372
0
  if (ND_TYPE(b) == ND_STRING) {
373
0
    StrNode* sn = STR_(b);
374
0
    if (sn->capacity == 0) {
375
0
      int len = (int )(sn->end - sn->s);
376
0
      sn->s   = sn->buf;
377
0
      sn->end = sn->s + len;
378
0
    }
379
0
  }
380
0
}
381
382
static int
383
node_list_len(Node* list)
384
0
{
385
0
  int len;
386
387
0
  len = 1;
388
0
  while (IS_NOT_NULL(ND_CDR(list))) {
389
0
    list = ND_CDR(list);
390
0
    len++;
391
0
  }
392
393
0
  return len;
394
0
}
395
396
static Node*
397
node_list_add(Node* list, Node* x)
398
0
{
399
0
  Node *n;
400
401
0
  n = onig_node_new_list(x, NULL);
402
0
  if (IS_NULL(n)) return NULL_NODE;
403
404
0
  if (IS_NOT_NULL(list)) {
405
0
    while (IS_NOT_NULL(ND_CDR(list)))
406
0
      list = ND_CDR(list);
407
408
0
    ND_CDR(list) = n;
409
0
  }
410
411
0
  return n;
412
0
}
413
414
static int
415
node_str_node_cat(Node* node, Node* add)
416
0
{
417
0
  int r;
418
419
0
  if (ND_STATUS(node) != ND_STATUS(add))
420
0
    return ONIGERR_TYPE_BUG;
421
422
0
  if (STR_(node)->flag != STR_(add)->flag)
423
0
    return ONIGERR_TYPE_BUG;
424
425
0
  r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
426
0
  if (r != 0) return r;
427
428
0
  return 0;
429
0
}
430
431
static void
432
node_conv_to_str_node(Node* node, Node* ref_node)
433
0
{
434
0
  xmemset(node, 0, sizeof(*node));
435
0
  ND_SET_TYPE(node, ND_STRING);
436
0
  ND_STATUS(node) = ND_STATUS(ref_node);
437
438
0
  STR_(node)->flag     = STR_(ref_node)->flag;
439
0
  STR_(node)->s        = STR_(node)->buf;
440
0
  STR_(node)->end      = STR_(node)->buf;
441
0
  STR_(node)->capacity = 0;
442
0
}
443
444
static OnigLen
445
distance_add(OnigLen d1, OnigLen d2)
446
0
{
447
0
  if (d1 == INFINITE_LEN || d2 == INFINITE_LEN)
448
0
    return INFINITE_LEN;
449
0
  else {
450
0
    if (d1 <= INFINITE_LEN - d2) return d1 + d2;
451
0
    else return INFINITE_LEN;
452
0
  }
453
0
}
454
455
static OnigLen
456
distance_multiply(OnigLen d, int m)
457
0
{
458
0
  if (m == 0) return 0;
459
460
0
  if (d < INFINITE_LEN / m)
461
0
    return d * m;
462
0
  else
463
0
    return INFINITE_LEN;
464
0
}
465
466
static int
467
bitset_is_empty(BitSetRef bs)
468
0
{
469
0
  int i;
470
471
0
  for (i = 0; i < (int )BITSET_REAL_SIZE; i++) {
472
0
    if (bs[i] != 0) return 0;
473
0
  }
474
0
  return 1;
475
0
}
476
477
#ifdef USE_CALL
478
479
static int
480
unset_addr_list_init(UnsetAddrList* list, int size)
481
0
{
482
0
  UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
483
0
  CHECK_NULL_RETURN_MEMERR(p);
484
485
0
  list->num   = 0;
486
0
  list->alloc = size;
487
0
  list->us    = p;
488
0
  return 0;
489
0
}
490
491
static void
492
unset_addr_list_end(UnsetAddrList* list)
493
0
{
494
0
  if (IS_NOT_NULL(list->us))
495
0
    xfree(list->us);
496
0
}
497
498
static int
499
unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
500
0
{
501
0
  UnsetAddr* p;
502
0
  int size;
503
504
0
  if (list->num >= list->alloc) {
505
0
    size = list->alloc * 2;
506
0
    p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size);
507
0
    CHECK_NULL_RETURN_MEMERR(p);
508
0
    list->alloc = size;
509
0
    list->us    = p;
510
0
  }
511
512
0
  list->us[list->num].offset = offset;
513
0
  list->us[list->num].target = node;
514
0
  list->num++;
515
0
  return 0;
516
0
}
517
#endif /* USE_CALL */
518
519
enum CharLenReturnType {
520
  CHAR_LEN_NORMAL = 0,       /* fixed or variable */
521
  CHAR_LEN_TOP_ALT_FIXED = 1
522
};
523
524
static int
525
mmcl_fixed(MinMaxCharLen* c)
526
0
{
527
0
  return (c->min == c->max && c->min != INFINITE_LEN);
528
0
}
529
530
static void
531
mmcl_set(MinMaxCharLen* l, OnigLen len)
532
0
{
533
0
  l->min = len;
534
0
  l->max = len;
535
0
  l->min_is_sure = TRUE;
536
0
}
537
538
static void
539
mmcl_set_min_max(MinMaxCharLen* l, OnigLen min, OnigLen max, int min_is_sure)
540
0
{
541
0
  l->min = min;
542
0
  l->max = max;
543
0
  l->min_is_sure = min_is_sure;
544
0
}
545
546
static void
547
mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)
548
0
{
549
0
  to->min = distance_add(to->min, add->min);
550
0
  to->max = distance_add(to->max, add->max);
551
552
0
  to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;
553
0
}
554
555
static void
556
mmcl_multiply(MinMaxCharLen* to, int m)
557
0
{
558
0
  to->min = distance_multiply(to->min, m);
559
0
  to->max = distance_multiply(to->max, m);
560
0
}
561
562
static void
563
mmcl_repeat_range_multiply(MinMaxCharLen* to, int mlow, int mhigh)
564
0
{
565
0
  to->min = distance_multiply(to->min, mlow);
566
567
0
  if (IS_INFINITE_REPEAT(mhigh))
568
0
    to->max = INFINITE_LEN;
569
0
  else
570
0
    to->max = distance_multiply(to->max, mhigh);
571
0
}
572
573
static void
574
mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)
575
0
{
576
0
  if (to->min > alt->min) {
577
0
    to->min         = alt->min;
578
0
    to->min_is_sure = alt->min_is_sure;
579
0
  }
580
0
  else if (to->min == alt->min) {
581
0
    if (alt->min_is_sure != FALSE)
582
0
      to->min_is_sure = TRUE;
583
0
  }
584
585
0
  if (to->max < alt->max) to->max = alt->max;
586
0
}
587
588
#ifndef ONIG_DONT_OPTIMIZE
589
590
static int
591
mml_is_equal(MinMaxLen* a, MinMaxLen* b)
592
0
{
593
0
  return a->min == b->min && a->max == b->max;
594
0
}
595
596
static void
597
mml_set_min_max(MinMaxLen* l, OnigLen min, OnigLen max)
598
0
{
599
0
  l->min = min;
600
0
  l->max = max;
601
0
}
602
603
static void
604
mml_clear(MinMaxLen* l)
605
0
{
606
0
  l->min = l->max = 0;
607
0
}
608
609
static void
610
mml_copy(MinMaxLen* to, MinMaxLen* from)
611
0
{
612
0
  to->min = from->min;
613
0
  to->max = from->max;
614
0
}
615
616
static void
617
mml_add(MinMaxLen* to, MinMaxLen* add)
618
0
{
619
0
  to->min = distance_add(to->min, add->min);
620
0
  to->max = distance_add(to->max, add->max);
621
0
}
622
623
static void
624
mml_alt_merge(MinMaxLen* to, MinMaxLen* alt)
625
0
{
626
0
  if (to->min > alt->min) to->min = alt->min;
627
0
  if (to->max < alt->max) to->max = alt->max;
628
0
}
629
630
#endif
631
632
/* fixed size pattern node only */
633
static int
634
node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env,
635
               int level)
636
0
{
637
0
  MinMaxCharLen tci;
638
0
  int r = CHAR_LEN_NORMAL;
639
640
0
  level++;
641
642
0
  switch (ND_TYPE(node)) {
643
0
  case ND_LIST:
644
0
    {
645
0
      int first = TRUE;
646
0
      do {
647
0
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
648
0
        if (r < 0) break;
649
0
        if (first == TRUE) {
650
0
          *ci = tci;
651
0
          first = FALSE;
652
0
        }
653
0
        else
654
0
          mmcl_add(ci, &tci);
655
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
656
0
    }
657
0
    break;
658
659
0
  case ND_ALT:
660
0
    {
661
0
      int fixed;
662
663
0
      r = node_char_len1(ND_CAR(node), reg, ci, env, level);
664
0
      if (r < 0) break;
665
666
0
      fixed = TRUE;
667
0
      while (IS_NOT_NULL(node = ND_CDR(node))) {
668
0
        r = node_char_len1(ND_CAR(node), reg, &tci, env, level);
669
0
        if (r < 0) break;
670
0
        if (! mmcl_fixed(&tci))
671
0
          fixed = FALSE;
672
0
        mmcl_alt_merge(ci, &tci);
673
0
      }
674
0
      if (r < 0) break;
675
676
0
      r = CHAR_LEN_NORMAL;
677
0
      if (mmcl_fixed(ci)) break;
678
679
0
      if (fixed == TRUE && level == 1) {
680
0
        r = CHAR_LEN_TOP_ALT_FIXED;
681
0
      }
682
0
    }
683
0
    break;
684
685
0
  case ND_STRING:
686
0
    {
687
0
      OnigLen clen;
688
0
      StrNode* sn = STR_(node);
689
0
      UChar *s = sn->s;
690
691
0
      if (ND_IS_REAL_IGNORECASE(node) &&
692
0
          CASE_FOLD_IS_NOT_ASCII_ONLY(env->case_fold_flag)) {
693
        /* Such a case is possible.
694
           ex. /(?i)(?<=\1)(a)/
695
           Backref node refer to capture group, but it doesn't tune yet.
696
         */
697
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
698
0
        break;
699
0
      }
700
701
0
      clen = 0;
702
0
      while (s < sn->end) {
703
0
        s += enclen(reg->enc, s);
704
0
        clen = distance_add(clen, 1);
705
0
      }
706
0
      mmcl_set(ci, clen);
707
0
    }
708
0
    break;
709
710
0
  case ND_QUANT:
711
0
    {
712
0
      QuantNode* qn = QUANT_(node);
713
714
0
      if (qn->lower == qn->upper) {
715
0
        if (qn->upper == 0) {
716
0
          mmcl_set(ci, 0);
717
0
        }
718
0
        else {
719
0
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
720
0
          if (r < 0) break;
721
0
          mmcl_multiply(ci, qn->lower);
722
0
        }
723
0
      }
724
0
      else {
725
0
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
726
0
        if (r < 0) break;
727
0
        mmcl_repeat_range_multiply(ci, qn->lower, qn->upper);
728
0
      }
729
0
    }
730
0
    break;
731
732
0
#ifdef USE_CALL
733
0
  case ND_CALL:
734
0
    if (ND_IS_RECURSION(node))
735
0
      mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
736
0
    else
737
0
      r = node_char_len1(ND_BODY(node), reg, ci, env, level);
738
0
    break;
739
0
#endif
740
741
0
  case ND_CTYPE:
742
0
  case ND_CCLASS:
743
0
    mmcl_set(ci, 1);
744
0
    break;
745
746
0
  case ND_BAG:
747
0
    {
748
0
      BagNode* en = BAG_(node);
749
750
0
      switch (en->type) {
751
0
      case BAG_MEMORY:
752
0
        if (ND_IS_FIXED_CLEN(node)) {
753
0
          mmcl_set_min_max(ci, en->min_char_len, en->max_char_len,
754
0
                           ND_IS_FIXED_CLEN_MIN_SURE(node));
755
0
        }
756
0
        else {
757
0
          if (ND_IS_MARK1(node)) {
758
0
            mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
759
0
          }
760
0
          else {
761
0
            ND_STATUS_ADD(node, MARK1);
762
0
            r = node_char_len1(ND_BODY(node), reg, ci, env, level);
763
0
            ND_STATUS_REMOVE(node, MARK1);
764
0
            if (r < 0) break;
765
766
0
            en->min_char_len = ci->min;
767
0
            en->max_char_len = ci->max;
768
0
            ND_STATUS_ADD(node, FIXED_CLEN);
769
0
            if (ci->min_is_sure != FALSE)
770
0
              ND_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);
771
0
          }
772
0
        }
773
        /* can't optimize look-behind if capture exists. */
774
0
        ci->min_is_sure = FALSE;
775
0
        break;
776
0
      case BAG_OPTION:
777
0
      case BAG_STOP_BACKTRACK:
778
0
        r = node_char_len1(ND_BODY(node), reg, ci, env, level);
779
0
        break;
780
0
      case BAG_IF_ELSE:
781
0
        {
782
0
          MinMaxCharLen eci;
783
784
0
          r = node_char_len1(ND_BODY(node), reg, ci, env, level);
785
0
          if (r < 0) break;
786
787
0
          if (IS_NOT_NULL(en->te.Then)) {
788
0
            r = node_char_len1(en->te.Then, reg, &tci, env, level);
789
0
            if (r < 0) break;
790
0
            mmcl_add(ci, &tci);
791
0
          }
792
793
0
          if (IS_NOT_NULL(en->te.Else)) {
794
0
            r = node_char_len1(en->te.Else, reg, &eci, env, level);
795
0
            if (r < 0) break;
796
0
          }
797
0
          else {
798
0
            mmcl_set(&eci, 0);
799
0
          }
800
801
0
          mmcl_alt_merge(ci, &eci);
802
0
        }
803
0
        break;
804
0
      default: /* never come here */
805
0
        r = ONIGERR_PARSER_BUG;
806
0
        break;
807
0
      }
808
0
    }
809
0
    break;
810
811
0
  case ND_GIMMICK:
812
0
    mmcl_set(ci, 0);
813
0
    break;
814
815
0
  case ND_ANCHOR:
816
0
  zero:
817
0
    mmcl_set(ci, 0);
818
    /* can't optimize look-behind if anchor exists. */
819
0
    ci->min_is_sure = FALSE;
820
0
    break;
821
822
0
  case ND_BACKREF:
823
0
    if (ND_IS_CHECKER(node))
824
0
      goto zero;
825
826
0
    if (ND_IS_RECURSION(node)) {
827
0
#ifdef USE_BACKREF_WITH_LEVEL
828
0
      if (ND_IS_NEST_LEVEL(node)) {
829
0
        mmcl_set_min_max(ci, 0, INFINITE_LEN, FALSE);
830
0
        break;
831
0
      }
832
0
#endif
833
834
0
      mmcl_set_min_max(ci, 0, 0, FALSE);
835
0
      break;
836
0
    }
837
838
0
    {
839
0
      int i;
840
0
      int* backs;
841
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
842
0
      BackRefNode* br = BACKREF_(node);
843
844
0
      backs = BACKREFS_P(br);
845
0
      r = node_char_len1(mem_env[backs[0]].mem_node, reg, ci, env, level);
846
0
      if (r < 0) break;
847
0
      if (! mmcl_fixed(ci)) ci->min_is_sure = FALSE;
848
849
0
      for (i = 1; i < br->back_num; i++) {
850
0
        r = node_char_len1(mem_env[backs[i]].mem_node, reg, &tci, env, level);
851
0
        if (r < 0) break;
852
0
        if (! mmcl_fixed(&tci)) tci.min_is_sure = FALSE;
853
0
        mmcl_alt_merge(ci, &tci);
854
0
      }
855
0
    }
856
0
    break;
857
858
0
  default: /* never come here */
859
0
    r = ONIGERR_PARSER_BUG;
860
0
    break;
861
0
  }
862
863
0
  return r;
864
0
}
865
866
static int
867
node_char_len(Node* node, regex_t* reg, MinMaxCharLen* ci, ParseEnv* env)
868
0
{
869
0
  return node_char_len1(node, reg, ci, env, 0);
870
0
}
871
872
873
static int
874
add_op(regex_t* reg, int opcode)
875
0
{
876
0
  int r;
877
878
0
  r = ops_new(reg);
879
0
  if (r != ONIG_NORMAL) return r;
880
881
0
#ifdef USE_DIRECT_THREADED_CODE
882
0
  *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
883
#else
884
  reg->ops_curr->opcode = opcode;
885
#endif
886
887
0
  return 0;
888
0
}
889
890
static int compile_length_tree(Node* node, regex_t* reg, ParseEnv* env);
891
static int compile_tree(Node* node, regex_t* reg, ParseEnv* env);
892
893
894
#define IS_NEED_STR_LEN_OP(op) \
895
0
   ((op) == OP_STR_N    || (op) == OP_STR_MB2N ||\
896
0
    (op) == OP_STR_MB3N || (op) == OP_STR_MBN)
897
898
static int
899
select_str_opcode(int mb_len, int str_len)
900
0
{
901
0
  int op;
902
903
0
  switch (mb_len) {
904
0
  case 1:
905
0
    switch (str_len) {
906
0
    case 1:  op = OP_STR_1; break;
907
0
    case 2:  op = OP_STR_2; break;
908
0
    case 3:  op = OP_STR_3; break;
909
0
    case 4:  op = OP_STR_4; break;
910
0
    case 5:  op = OP_STR_5; break;
911
0
    default: op = OP_STR_N; break;
912
0
    }
913
0
    break;
914
915
0
  case 2:
916
0
    switch (str_len) {
917
0
    case 1:  op = OP_STR_MB2N1; break;
918
0
    case 2:  op = OP_STR_MB2N2; break;
919
0
    case 3:  op = OP_STR_MB2N3; break;
920
0
    default: op = OP_STR_MB2N;  break;
921
0
    }
922
0
    break;
923
924
0
  case 3:
925
0
    op = OP_STR_MB3N;
926
0
    break;
927
928
0
  default:
929
0
    op = OP_STR_MBN;
930
0
    break;
931
0
  }
932
933
0
  return op;
934
0
}
935
936
static int
937
is_strict_real_node(Node* node)
938
0
{
939
0
  switch (ND_TYPE(node)) {
940
0
  case ND_STRING:
941
0
    {
942
0
      StrNode* sn = STR_(node);
943
0
      return (sn->end != sn->s);
944
0
    }
945
0
    break;
946
947
0
  case ND_CCLASS:
948
0
  case ND_CTYPE:
949
0
    return 1;
950
0
    break;
951
952
0
  default:
953
0
    return 0;
954
0
    break;
955
0
  }
956
0
}
957
958
static int
959
compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ParseEnv* env)
960
0
{
961
0
  int r;
962
0
  int saved_num_empty_check;
963
0
  int emptiness;
964
0
  Node* body;
965
966
0
  body = ND_BODY((Node* )qn);
967
0
  emptiness = qn->emptiness;
968
0
  saved_num_empty_check = reg->num_empty_check;
969
970
0
  if (emptiness != BODY_IS_NOT_EMPTY) {
971
0
    r = add_op(reg, OP_EMPTY_CHECK_START);
972
0
    if (r != 0) return r;
973
0
    COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
974
0
    reg->num_empty_check++;
975
0
  }
976
977
0
  r = compile_tree(body, reg, env);
978
0
  if (r != 0) return r;
979
980
0
  if (emptiness != BODY_IS_NOT_EMPTY) {
981
0
    if (emptiness == BODY_MAY_BE_EMPTY)
982
0
      r = add_op(reg, OP_EMPTY_CHECK_END);
983
0
    else if (emptiness == BODY_MAY_BE_EMPTY_MEM) {
984
0
      if (ND_IS_EMPTY_STATUS_CHECK(qn) != 0 && qn->empty_status_mem != 0) {
985
0
        r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
986
0
        if (r != 0) return r;
987
0
        COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
988
0
      }
989
0
      else
990
0
        r = add_op(reg, OP_EMPTY_CHECK_END);
991
0
    }
992
0
#ifdef USE_CALL
993
0
    else if (emptiness == BODY_MAY_BE_EMPTY_REC) {
994
0
      r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
995
0
      if (r != 0) return r;
996
0
      COP(reg)->empty_check_end.empty_status_mem = qn->empty_status_mem;
997
0
    }
998
0
#endif
999
1000
0
    if (r != 0) return r;
1001
0
    COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
1002
0
  }
1003
0
  return r;
1004
0
}
1005
1006
#ifdef USE_CALL
1007
static int
1008
compile_call(CallNode* node, regex_t* reg, ParseEnv* env)
1009
0
{
1010
0
  int r;
1011
0
  int offset;
1012
1013
0
  r = add_op(reg, OP_CALL);
1014
0
  if (r != 0) return r;
1015
1016
0
  COP(reg)->call.addr = 0; /* dummy addr. */
1017
#ifdef ONIG_DEBUG_MATCH_COUNTER
1018
  COP(reg)->call.called_mem = node->called_gnum;
1019
#endif
1020
1021
0
  offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
1022
0
  r = unset_addr_list_add(env->unset_addr_list, offset, ND_CALL_BODY(node));
1023
0
  return r;
1024
0
}
1025
#endif
1026
1027
static int
1028
compile_tree_n_times(Node* node, int n, regex_t* reg, ParseEnv* env)
1029
0
{
1030
0
  int i, r;
1031
1032
0
  for (i = 0; i < n; i++) {
1033
0
    r = compile_tree(node, reg, env);
1034
0
    if (r != 0) return r;
1035
0
  }
1036
0
  return 0;
1037
0
}
1038
1039
static int
1040
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
1041
                          regex_t* reg ARG_UNUSED)
1042
0
{
1043
0
  return 1;
1044
0
}
1045
1046
static int
1047
add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
1048
0
{
1049
0
  int op;
1050
0
  int r;
1051
0
  int byte_len;
1052
0
  UChar* p;
1053
0
  UChar* end;
1054
1055
0
  op = select_str_opcode(mb_len, str_len);
1056
0
  r = add_op(reg, op);
1057
0
  if (r != 0) return r;
1058
1059
0
  byte_len = mb_len * str_len;
1060
0
  end = s + byte_len;
1061
1062
0
  if (op == OP_STR_MBN) {
1063
0
    p = onigenc_strdup(reg->enc, s, end);
1064
0
    CHECK_NULL_RETURN_MEMERR(p);
1065
1066
0
    COP(reg)->exact_len_n.len = mb_len;
1067
0
    COP(reg)->exact_len_n.n   = str_len;
1068
0
    COP(reg)->exact_len_n.s   = p;
1069
0
  }
1070
0
  else if (IS_NEED_STR_LEN_OP(op)) {
1071
0
    p = onigenc_strdup(reg->enc, s, end);
1072
0
    CHECK_NULL_RETURN_MEMERR(p);
1073
0
    COP(reg)->exact_n.n = str_len;
1074
0
    COP(reg)->exact_n.s = p;
1075
0
  }
1076
0
  else {
1077
0
    xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
1078
0
    xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
1079
0
  }
1080
1081
0
  return 0;
1082
0
}
1083
1084
static int
1085
compile_length_string_node(Node* node, regex_t* reg)
1086
0
{
1087
0
  int rlen, r, len, prev_len, slen;
1088
0
  UChar *p, *prev;
1089
0
  StrNode* sn;
1090
0
  OnigEncoding enc = reg->enc;
1091
1092
0
  sn = STR_(node);
1093
0
  if (sn->end <= sn->s)
1094
0
    return 0;
1095
1096
0
  p = prev = sn->s;
1097
0
  prev_len = enclen(enc, p);
1098
0
  p += prev_len;
1099
0
  slen = 1;
1100
0
  rlen = 0;
1101
1102
0
  for (; p < sn->end; ) {
1103
0
    len = enclen(enc, p);
1104
0
    if (len == prev_len) {
1105
0
      slen++;
1106
0
    }
1107
0
    else {
1108
0
      r = add_compile_string_length(prev, prev_len, slen, reg);
1109
0
      rlen += r;
1110
0
      prev = p;
1111
0
      slen = 1;
1112
0
      prev_len = len;
1113
0
    }
1114
0
    p += len;
1115
0
  }
1116
1117
0
  r = add_compile_string_length(prev, prev_len, slen, reg);
1118
0
  rlen += r;
1119
0
  return rlen;
1120
0
}
1121
1122
static int
1123
compile_length_string_crude_node(StrNode* sn, regex_t* reg)
1124
0
{
1125
0
  if (sn->end <= sn->s)
1126
0
    return 0;
1127
1128
0
  return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
1129
0
                                   reg);
1130
0
}
1131
1132
static int
1133
compile_string_node(Node* node, regex_t* reg)
1134
0
{
1135
0
  int r, len, prev_len, slen;
1136
0
  UChar *p, *prev, *end;
1137
0
  StrNode* sn;
1138
0
  OnigEncoding enc = reg->enc;
1139
1140
0
  sn = STR_(node);
1141
0
  if (sn->end <= sn->s)
1142
0
    return 0;
1143
1144
0
  end = sn->end;
1145
1146
0
  p = prev = sn->s;
1147
0
  prev_len = enclen(enc, p);
1148
0
  p += prev_len;
1149
0
  slen = 1;
1150
1151
0
  for (; p < end; ) {
1152
0
    len = enclen(enc, p);
1153
0
    if (len == prev_len) {
1154
0
      slen++;
1155
0
    }
1156
0
    else {
1157
0
      r = add_compile_string(prev, prev_len, slen, reg);
1158
0
      if (r != 0) return r;
1159
1160
0
      prev  = p;
1161
0
      slen  = 1;
1162
0
      prev_len = len;
1163
0
    }
1164
1165
0
    p += len;
1166
0
  }
1167
1168
0
  return add_compile_string(prev, prev_len, slen, reg);
1169
0
}
1170
1171
static int
1172
compile_string_crude_node(StrNode* sn, regex_t* reg)
1173
0
{
1174
0
  if (sn->end <= sn->s)
1175
0
    return 0;
1176
1177
0
  return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
1178
0
}
1179
1180
static void*
1181
set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
1182
0
{
1183
0
  size_t len;
1184
0
  void* p;
1185
1186
0
  len = (size_t )mbuf->used;
1187
0
  p = xmalloc(len);
1188
0
  if (IS_NULL(p)) return NULL;
1189
1190
0
  xmemcpy(p, mbuf->p, len);
1191
0
  return p;
1192
0
}
1193
1194
static int
1195
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
1196
0
{
1197
0
  return 1;
1198
0
}
1199
1200
static int
1201
compile_cclass_node(CClassNode* cc, regex_t* reg)
1202
0
{
1203
0
  int r;
1204
1205
0
  if (IS_NULL(cc->mbuf)) {
1206
0
    r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
1207
0
    if (r != 0) return r;
1208
1209
0
    COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
1210
0
    CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
1211
0
    xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
1212
0
  }
1213
0
  else {
1214
0
    void* p;
1215
1216
0
    if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
1217
0
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
1218
0
      if (r != 0) return r;
1219
1220
0
      p = set_multi_byte_cclass(cc->mbuf, reg);
1221
0
      CHECK_NULL_RETURN_MEMERR(p);
1222
0
      COP(reg)->cclass_mb.mb = p;
1223
0
    }
1224
0
    else {
1225
0
      r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
1226
0
      if (r != 0) return r;
1227
1228
0
      COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
1229
0
      CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
1230
0
      xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
1231
1232
0
      p = set_multi_byte_cclass(cc->mbuf, reg);
1233
0
      CHECK_NULL_RETURN_MEMERR(p);
1234
0
      COP(reg)->cclass_mix.mb = p;
1235
0
    }
1236
0
  }
1237
1238
0
  return 0;
1239
0
}
1240
1241
static void
1242
set_addr_in_repeat_range(regex_t* reg)
1243
0
{
1244
0
  int i;
1245
1246
0
  for (i = 0; i < reg->num_repeat; i++) {
1247
0
    RepeatRange* p = reg->repeat_range + i;
1248
0
    int offset = p->u.offset;
1249
0
    p->u.pcode = reg->ops + offset;
1250
0
  }
1251
0
}
1252
1253
static int
1254
entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
1255
0
{
1256
0
#define REPEAT_RANGE_ALLOC  4
1257
1258
0
  RepeatRange* p;
1259
1260
0
  if (reg->repeat_range_alloc == 0) {
1261
0
    p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
1262
0
    CHECK_NULL_RETURN_MEMERR(p);
1263
0
    reg->repeat_range = p;
1264
0
    reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
1265
0
  }
1266
0
  else if (reg->repeat_range_alloc <= id) {
1267
0
    int n;
1268
0
    n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
1269
0
    p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
1270
0
    CHECK_NULL_RETURN_MEMERR(p);
1271
0
    reg->repeat_range = p;
1272
0
    reg->repeat_range_alloc = n;
1273
0
  }
1274
0
  else {
1275
0
    p = reg->repeat_range;
1276
0
  }
1277
1278
0
  p[id].lower    = lower;
1279
0
  p[id].upper    = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
1280
0
  p[id].u.offset = ops_index;
1281
0
  return 0;
1282
0
}
1283
1284
static int
1285
compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
1286
                          regex_t* reg, ParseEnv* env)
1287
0
{
1288
0
  int r;
1289
0
  int num_repeat = reg->num_repeat++;
1290
1291
0
  r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
1292
0
  if (r != 0) return r;
1293
1294
0
  COP(reg)->repeat.id   = num_repeat;
1295
0
  COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
1296
1297
0
  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
1298
0
                         COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
1299
0
  if (r != 0) return r;
1300
1301
0
  r = compile_quant_body_with_empty_check(qn, reg, env);
1302
0
  if (r != 0) return r;
1303
1304
0
  r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
1305
0
  if (r != 0) return r;
1306
1307
0
  COP(reg)->repeat_inc.id = num_repeat;
1308
0
  return r;
1309
0
}
1310
1311
static int
1312
is_anychar_infinite_greedy(QuantNode* qn)
1313
0
{
1314
0
  if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
1315
0
      ND_IS_ANYCHAR(ND_QUANT_BODY(qn)))
1316
0
    return 1;
1317
0
  else
1318
0
    return 0;
1319
0
}
1320
1321
0
#define QUANTIFIER_EXPAND_LIMIT_SIZE   10
1322
#define CKN_ON   (ckn > 0)
1323
1324
static int
1325
compile_length_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1326
0
{
1327
0
  int len, mod_tlen;
1328
0
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1329
0
  enum BodyEmptyType emptiness = qn->emptiness;
1330
0
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1331
1332
0
  if (tlen < 0) return tlen;
1333
0
  if (tlen == 0) return 0;
1334
1335
  /* anychar repeat */
1336
0
  if (is_anychar_infinite_greedy(qn)) {
1337
0
    if (qn->lower <= 1 ||
1338
0
        len_multiply_cmp((OnigLen )tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
1339
0
      if (IS_NOT_NULL(qn->next_head_exact))
1340
0
        return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
1341
0
      else
1342
0
        return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
1343
0
    }
1344
0
  }
1345
1346
0
  mod_tlen = tlen;
1347
0
  if (emptiness != BODY_IS_NOT_EMPTY)
1348
0
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1349
1350
0
  if (infinite &&
1351
0
      (qn->lower <= 1 ||
1352
0
       len_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1353
0
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1354
0
      len = OPSIZE_JUMP;
1355
0
    }
1356
0
    else {
1357
0
      len = tlen * qn->lower;
1358
0
    }
1359
1360
0
    if (qn->greedy) {
1361
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1362
0
      if (IS_NOT_NULL(qn->head_exact))
1363
0
        len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
1364
0
      else
1365
0
#endif
1366
0
      if (IS_NOT_NULL(qn->next_head_exact))
1367
0
        len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
1368
0
      else
1369
0
        len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
1370
0
    }
1371
0
    else
1372
0
      len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
1373
0
  }
1374
0
  else if (qn->upper == 0) {
1375
0
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1376
0
      len = OPSIZE_JUMP + tlen;
1377
0
    }
1378
0
    else
1379
0
      len = 0;
1380
0
  }
1381
0
  else if (!infinite && qn->greedy &&
1382
0
           (qn->upper == 1 ||
1383
0
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1384
0
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1385
0
    len = tlen * qn->lower;
1386
0
    len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
1387
0
  }
1388
0
  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1389
0
    len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
1390
0
  }
1391
0
  else {
1392
0
    len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
1393
0
  }
1394
1395
0
  return len;
1396
0
}
1397
1398
static int
1399
compile_quantifier_node(QuantNode* qn, regex_t* reg, ParseEnv* env)
1400
0
{
1401
0
  int i, r, mod_tlen;
1402
0
  int infinite = IS_INFINITE_REPEAT(qn->upper);
1403
0
  enum BodyEmptyType emptiness = qn->emptiness;
1404
0
  int tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1405
1406
0
  if (tlen < 0) return tlen;
1407
0
  if (tlen == 0) return 0;
1408
1409
0
  if (is_anychar_infinite_greedy(qn) &&
1410
0
      (qn->lower <= 1 ||
1411
0
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1412
0
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1413
0
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1414
0
    if (r != 0) return r;
1415
0
    if (IS_NOT_NULL(qn->next_head_exact)) {
1416
0
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1417
0
                 OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
1418
0
      if (r != 0) return r;
1419
1420
0
      COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
1421
0
      return 0;
1422
0
    }
1423
0
    else {
1424
0
      r = add_op(reg, ND_IS_MULTILINE(ND_QUANT_BODY(qn)) ?
1425
0
                 OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
1426
0
      return r;
1427
0
    }
1428
0
  }
1429
1430
0
  mod_tlen = tlen;
1431
0
  if (emptiness != BODY_IS_NOT_EMPTY)
1432
0
    mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
1433
1434
0
  if (infinite &&
1435
0
      (qn->lower <= 1 ||
1436
0
       len_multiply_cmp((OnigLen )tlen, qn->lower,
1437
0
                        QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1438
0
    int addr;
1439
1440
0
    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1441
0
      r = add_op(reg, OP_JUMP);
1442
0
      if (r != 0) return r;
1443
0
      if (qn->greedy) {
1444
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1445
0
        if (IS_NOT_NULL(qn->head_exact))
1446
0
          COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
1447
0
        else
1448
0
#endif
1449
0
        if (IS_NOT_NULL(qn->next_head_exact))
1450
0
          COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
1451
0
        else
1452
0
          COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
1453
0
      }
1454
0
      else {
1455
0
        COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
1456
0
      }
1457
0
    }
1458
0
    else {
1459
0
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1460
0
      if (r != 0) return r;
1461
0
    }
1462
1463
0
    if (qn->greedy) {
1464
0
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1465
0
      if (IS_NOT_NULL(qn->head_exact)) {
1466
0
        r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
1467
0
        if (r != 0) return r;
1468
0
        COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1469
0
        COP(reg)->push_or_jump_exact1.c    = STR_(qn->head_exact)->s[0];
1470
1471
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1472
0
        if (r != 0) return r;
1473
1474
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
1475
0
      }
1476
0
      else
1477
0
#endif
1478
0
      if (IS_NOT_NULL(qn->next_head_exact)) {
1479
0
        r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
1480
0
        if (r != 0) return r;
1481
0
        COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1482
0
        COP(reg)->push_if_peek_next.c    = STR_(qn->next_head_exact)->s[0];
1483
1484
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1485
0
        if (r != 0) return r;
1486
1487
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
1488
0
      }
1489
0
      else {
1490
0
        r = add_op(reg, OP_PUSH);
1491
0
        if (r != 0) return r;
1492
0
        COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
1493
1494
0
        r = compile_quant_body_with_empty_check(qn, reg, env);
1495
0
        if (r != 0) return r;
1496
1497
0
        addr = -(mod_tlen + (int )OPSIZE_PUSH);
1498
0
      }
1499
1500
0
      r = add_op(reg, OP_JUMP);
1501
0
      if (r != 0) return r;
1502
0
      COP(reg)->jump.addr = addr;
1503
0
    }
1504
0
    else {
1505
0
      r = add_op(reg, OP_JUMP);
1506
0
      if (r != 0) return r;
1507
0
      COP(reg)->jump.addr = mod_tlen + SIZE_INC;
1508
1509
0
      r = compile_quant_body_with_empty_check(qn, reg, env);
1510
0
      if (r != 0) return r;
1511
1512
0
      r = add_op(reg, OP_PUSH);
1513
0
      if (r != 0) return r;
1514
0
      COP(reg)->push.addr = -mod_tlen;
1515
0
    }
1516
0
  }
1517
0
  else if (qn->upper == 0) {
1518
0
    if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
1519
0
      r = add_op(reg, OP_JUMP);
1520
0
      if (r != 0) return r;
1521
0
      COP(reg)->jump.addr = tlen + SIZE_INC;
1522
1523
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1524
0
    }
1525
0
    else {
1526
      /* Nothing output */
1527
0
      r = 0;
1528
0
    }
1529
0
  }
1530
0
  else if (! infinite && qn->greedy &&
1531
0
           (qn->upper == 1 ||
1532
0
            len_multiply_cmp((OnigLen )tlen + OPSIZE_PUSH, qn->upper,
1533
0
                             QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
1534
0
    int n = qn->upper - qn->lower;
1535
1536
0
    r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1537
0
    if (r != 0) return r;
1538
1539
0
    for (i = 0; i < n; i++) {
1540
0
      int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
1541
0
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1542
1543
0
      r = add_op(reg, OP_PUSH);
1544
0
      if (r != 0) return r;
1545
0
      COP(reg)->push.addr = v;
1546
1547
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1548
0
      if (r != 0) return r;
1549
0
    }
1550
0
  }
1551
0
  else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1552
0
    r = add_op(reg, OP_PUSH);
1553
0
    if (r != 0) return r;
1554
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
1555
1556
0
    r = add_op(reg, OP_JUMP);
1557
0
    if (r != 0) return r;
1558
0
    COP(reg)->jump.addr = tlen + SIZE_INC;
1559
1560
0
    r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1561
0
  }
1562
0
  else {
1563
0
    r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
1564
0
  }
1565
0
  return r;
1566
0
}
1567
1568
static int
1569
compile_length_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1570
0
{
1571
0
  int tlen;
1572
1573
0
  tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1574
1575
0
  return tlen;
1576
0
}
1577
1578
static int
1579
compile_option_node(BagNode* node, regex_t* reg, ParseEnv* env)
1580
0
{
1581
0
  int r;
1582
1583
0
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1584
1585
0
  return r;
1586
0
}
1587
1588
static int
1589
compile_length_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1590
0
{
1591
0
  int len;
1592
0
  int tlen;
1593
1594
0
  if (node->type == BAG_OPTION)
1595
0
    return compile_length_option_node(node, reg, env);
1596
1597
0
  if (ND_BAG_BODY(node)) {
1598
0
    tlen = compile_length_tree(ND_BAG_BODY(node), reg, env);
1599
0
    if (tlen < 0) return tlen;
1600
0
  }
1601
0
  else
1602
0
    tlen = 0;
1603
1604
0
  switch (node->type) {
1605
0
  case BAG_MEMORY:
1606
0
#ifdef USE_CALL
1607
1608
0
    if (node->m.regnum == 0 && ND_IS_CALLED(node)) {
1609
0
      len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1610
0
      return len;
1611
0
    }
1612
1613
0
    if (ND_IS_CALLED(node)) {
1614
0
      len = OPSIZE_MEM_START_PUSH + tlen
1615
0
        + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
1616
0
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1617
0
        len += (ND_IS_RECURSION(node)
1618
0
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1619
0
      else
1620
0
        len += (ND_IS_RECURSION(node)
1621
0
                ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1622
0
    }
1623
0
    else if (ND_IS_RECURSION(node)) {
1624
0
      len = OPSIZE_MEM_START_PUSH;
1625
0
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1626
0
                     ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
1627
0
    }
1628
0
    else
1629
0
#endif
1630
0
    {
1631
0
      if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1632
0
        len = OPSIZE_MEM_START_PUSH;
1633
0
      else
1634
0
        len = OPSIZE_MEM_START;
1635
1636
0
      len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
1637
0
                     ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
1638
0
    }
1639
0
    break;
1640
1641
0
  case BAG_STOP_BACKTRACK:
1642
0
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1643
0
      int v;
1644
0
      QuantNode* qn;
1645
1646
0
      qn = QUANT_(ND_BAG_BODY(node));
1647
0
      tlen = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1648
0
      if (tlen < 0) return tlen;
1649
1650
0
      v = onig_positive_int_multiply(qn->lower, tlen);
1651
0
      if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
1652
0
      len = v + OPSIZE_PUSH + tlen + OPSIZE_POP + OPSIZE_JUMP;
1653
0
    }
1654
0
    else {
1655
0
      len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1656
0
    }
1657
0
    break;
1658
1659
0
  case BAG_IF_ELSE:
1660
0
    {
1661
0
      Node* cond = ND_BAG_BODY(node);
1662
0
      Node* Then = node->te.Then;
1663
0
      Node* Else = node->te.Else;
1664
1665
0
      len = compile_length_tree(cond, reg, env);
1666
0
      if (len < 0) return len;
1667
0
      len += OPSIZE_PUSH + OPSIZE_MARK + OPSIZE_CUT_TO_MARK;
1668
1669
0
      if (IS_NOT_NULL(Then)) {
1670
0
        tlen = compile_length_tree(Then, reg, env);
1671
0
        if (tlen < 0) return tlen;
1672
0
        len += tlen;
1673
0
      }
1674
1675
0
      len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK;
1676
1677
0
      if (IS_NOT_NULL(Else)) {
1678
0
        tlen = compile_length_tree(Else, reg, env);
1679
0
        if (tlen < 0) return tlen;
1680
0
        len += tlen;
1681
0
      }
1682
0
    }
1683
0
    break;
1684
1685
0
  case BAG_OPTION:
1686
    /* never come here, but set for escape warning */
1687
0
    len = 0;
1688
0
    break;
1689
1690
0
  default:
1691
0
    return ONIGERR_TYPE_BUG;
1692
0
    break;
1693
0
  }
1694
1695
0
  return len;
1696
0
}
1697
1698
static int
1699
compile_bag_memory_node(BagNode* node, regex_t* reg, ParseEnv* env)
1700
0
{
1701
0
  int r;
1702
1703
0
#ifdef USE_CALL
1704
0
  if (ND_IS_CALLED(node)) {
1705
0
    int len;
1706
1707
0
    r = add_op(reg, OP_CALL);
1708
0
    if (r != 0) return r;
1709
1710
0
    node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
1711
0
    ND_STATUS_ADD(node, FIXED_ADDR);
1712
0
    COP(reg)->call.addr = (int )node->m.called_addr;
1713
1714
0
    if (node->m.regnum == 0) {
1715
0
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1716
0
      len += OPSIZE_RETURN;
1717
1718
0
      r = add_op(reg, OP_JUMP);
1719
0
      if (r != 0) return r;
1720
0
      COP(reg)->jump.addr = len + SIZE_INC;
1721
1722
0
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1723
0
      if (r != 0) return r;
1724
1725
0
      r = add_op(reg, OP_RETURN);
1726
0
      return r;
1727
0
    }
1728
0
    else {
1729
0
      len = compile_length_tree(ND_BAG_BODY(node), reg, env);
1730
0
      len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
1731
0
      if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1732
0
        len += (ND_IS_RECURSION(node)
1733
0
                ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
1734
0
      else
1735
0
        len += (ND_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
1736
1737
0
      r = add_op(reg, OP_JUMP);
1738
0
      if (r != 0) return r;
1739
0
      COP(reg)->jump.addr = len + SIZE_INC;
1740
0
    }
1741
0
  }
1742
0
#endif
1743
1744
0
  if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
1745
0
    r = add_op(reg, OP_MEM_START_PUSH);
1746
0
  else
1747
0
    r = add_op(reg, OP_MEM_START);
1748
0
  if (r != 0) return r;
1749
0
  COP(reg)->memory_start.num = node->m.regnum;
1750
1751
0
  r = compile_tree(ND_BAG_BODY(node), reg, env);
1752
0
  if (r != 0) return r;
1753
1754
0
#ifdef USE_CALL
1755
0
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1756
0
    r = add_op(reg, (ND_IS_RECURSION(node)
1757
0
                     ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
1758
0
  else
1759
0
    r = add_op(reg, (ND_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
1760
0
  if (r != 0) return r;
1761
0
  COP(reg)->memory_end.num = node->m.regnum;
1762
1763
0
  if (ND_IS_CALLED(node)) {
1764
0
    r = add_op(reg, OP_RETURN);
1765
0
  }
1766
#else
1767
  if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
1768
    r = add_op(reg, OP_MEM_END_PUSH);
1769
  else
1770
    r = add_op(reg, OP_MEM_END);
1771
  if (r != 0) return r;
1772
  COP(reg)->memory_end.num = node->m.regnum;
1773
#endif
1774
1775
0
  return r;
1776
0
}
1777
1778
static int
1779
compile_bag_node(BagNode* node, regex_t* reg, ParseEnv* env)
1780
0
{
1781
0
  int r, len;
1782
1783
0
  switch (node->type) {
1784
0
  case BAG_MEMORY:
1785
0
    r = compile_bag_memory_node(node, reg, env);
1786
0
    break;
1787
1788
0
  case BAG_OPTION:
1789
0
    r = compile_option_node(node, reg, env);
1790
0
    break;
1791
1792
0
  case BAG_STOP_BACKTRACK:
1793
0
    if (ND_IS_STRICT_REAL_REPEAT(node)) {
1794
0
      QuantNode* qn = QUANT_(ND_BAG_BODY(node));
1795
0
      r = compile_tree_n_times(ND_QUANT_BODY(qn), qn->lower, reg, env);
1796
0
      if (r != 0) return r;
1797
1798
0
      len = compile_length_tree(ND_QUANT_BODY(qn), reg, env);
1799
0
      if (len < 0) return len;
1800
1801
0
      r = add_op(reg, OP_PUSH);
1802
0
      if (r != 0) return r;
1803
0
      COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP + OPSIZE_JUMP;
1804
1805
0
      r = compile_tree(ND_QUANT_BODY(qn), reg, env);
1806
0
      if (r != 0) return r;
1807
0
      r = add_op(reg, OP_POP);
1808
0
      if (r != 0) return r;
1809
1810
0
      r = add_op(reg, OP_JUMP);
1811
0
      if (r != 0) return r;
1812
0
      COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP);
1813
0
    }
1814
0
    else {
1815
0
      MemNumType mid;
1816
1817
0
      ID_ENTRY(env, mid);
1818
0
      r = add_op(reg, OP_MARK);
1819
0
      if (r != 0) return r;
1820
0
      COP(reg)->mark.id = mid;
1821
0
      COP(reg)->mark.save_pos = 0;
1822
1823
0
      r = compile_tree(ND_BAG_BODY(node), reg, env);
1824
0
      if (r != 0) return r;
1825
0
      r = add_op(reg, OP_CUT_TO_MARK);
1826
0
      if (r != 0) return r;
1827
0
      COP(reg)->cut_to_mark.id = mid;
1828
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1829
0
    }
1830
0
    break;
1831
1832
0
  case BAG_IF_ELSE:
1833
0
    {
1834
0
      int cond_len, then_len, else_len, jump_len;
1835
0
      MemNumType mid;
1836
0
      Node* cond = ND_BAG_BODY(node);
1837
0
      Node* Then = node->te.Then;
1838
0
      Node* Else = node->te.Else;
1839
1840
0
      ID_ENTRY(env, mid);
1841
1842
0
      r = add_op(reg, OP_MARK);
1843
0
      if (r != 0) return r;
1844
0
      COP(reg)->mark.id = mid;
1845
0
      COP(reg)->mark.save_pos = 0;
1846
1847
0
      cond_len = compile_length_tree(cond, reg, env);
1848
0
      if (cond_len < 0) return cond_len;
1849
0
      if (IS_NOT_NULL(Then)) {
1850
0
        then_len = compile_length_tree(Then, reg, env);
1851
0
        if (then_len < 0) return then_len;
1852
0
      }
1853
0
      else
1854
0
        then_len = 0;
1855
1856
0
      jump_len = cond_len + then_len + OPSIZE_CUT_TO_MARK + OPSIZE_JUMP;
1857
1858
0
      r = add_op(reg, OP_PUSH);
1859
0
      if (r != 0) return r;
1860
0
      COP(reg)->push.addr = SIZE_INC + jump_len;
1861
1862
0
      r = compile_tree(cond, reg, env);
1863
0
      if (r != 0) return r;
1864
0
      r = add_op(reg, OP_CUT_TO_MARK);
1865
0
      if (r != 0) return r;
1866
0
      COP(reg)->cut_to_mark.id = mid;
1867
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1868
1869
0
      if (IS_NOT_NULL(Then)) {
1870
0
        r = compile_tree(Then, reg, env);
1871
0
        if (r != 0) return r;
1872
0
      }
1873
1874
0
      if (IS_NOT_NULL(Else)) {
1875
0
        else_len = compile_length_tree(Else, reg, env);
1876
0
        if (else_len < 0) return else_len;
1877
0
      }
1878
0
      else
1879
0
        else_len = 0;
1880
1881
0
      r = add_op(reg, OP_JUMP);
1882
0
      if (r != 0) return r;
1883
0
      COP(reg)->jump.addr = OPSIZE_CUT_TO_MARK + else_len + SIZE_INC;
1884
1885
0
      r = add_op(reg, OP_CUT_TO_MARK);
1886
0
      if (r != 0) return r;
1887
0
      COP(reg)->cut_to_mark.id = mid;
1888
0
      COP(reg)->cut_to_mark.restore_pos = 0;
1889
1890
0
      if (IS_NOT_NULL(Else)) {
1891
0
        r = compile_tree(Else, reg, env);
1892
0
      }
1893
0
    }
1894
0
    break;
1895
1896
0
  default:
1897
0
    return ONIGERR_TYPE_BUG;
1898
0
    break;
1899
0
  }
1900
1901
0
  return r;
1902
0
}
1903
1904
static int
1905
compile_length_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1906
0
{
1907
0
  int len;
1908
0
  int tlen = 0;
1909
1910
0
  if (IS_NOT_NULL(ND_ANCHOR_BODY(node))) {
1911
0
    tlen = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
1912
0
    if (tlen < 0) return tlen;
1913
0
  }
1914
1915
0
  switch (node->type) {
1916
0
  case ANCR_PREC_READ:
1917
0
    len = OPSIZE_MARK + tlen + OPSIZE_CUT_TO_MARK;
1918
0
    break;
1919
0
  case ANCR_PREC_READ_NOT:
1920
0
    len = OPSIZE_PUSH + OPSIZE_MARK + tlen + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
1921
0
    break;
1922
0
  case ANCR_LOOK_BEHIND:
1923
0
    if (node->char_min_len == node->char_max_len)
1924
0
      len = OPSIZE_MARK + OPSIZE_STEP_BACK_START + tlen + OPSIZE_CUT_TO_MARK;
1925
0
    else {
1926
0
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_UPDATE_VAR + OPSIZE_FAIL + OPSIZE_JUMP + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_CUT_TO_MARK + OPSIZE_UPDATE_VAR;
1927
1928
0
      if (IS_NOT_NULL(node->lead_node)) {
1929
0
        int llen = compile_length_tree(node->lead_node, reg, env);
1930
0
        if (llen < 0) return llen;
1931
1932
0
        len += OPSIZE_MOVE + llen;
1933
0
      }
1934
1935
0
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1936
0
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1937
0
    }
1938
0
    break;
1939
0
  case ANCR_LOOK_BEHIND_NOT:
1940
0
    if (node->char_min_len == node->char_max_len)
1941
0
      len = OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + tlen + OPSIZE_POP_TO_MARK + OPSIZE_FAIL + OPSIZE_POP;
1942
0
    else {
1943
0
      len = OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR + OPSIZE_MARK + OPSIZE_PUSH + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + tlen + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_POP;
1944
1945
0
      if (IS_NOT_NULL(node->lead_node)) {
1946
0
        int llen = compile_length_tree(node->lead_node, reg, env);
1947
0
        if (llen < 0) return llen;
1948
1949
0
        len += OPSIZE_MOVE + llen;
1950
0
      }
1951
1952
0
      if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
1953
0
        len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
1954
0
    }
1955
0
    break;
1956
1957
0
  case ANCR_WORD_BOUNDARY:
1958
0
  case ANCR_NO_WORD_BOUNDARY:
1959
0
#ifdef USE_WORD_BEGIN_END
1960
0
  case ANCR_WORD_BEGIN:
1961
0
  case ANCR_WORD_END:
1962
0
#endif
1963
0
    len = OPSIZE_WORD_BOUNDARY;
1964
0
    break;
1965
1966
0
  case ANCR_TEXT_SEGMENT_BOUNDARY:
1967
0
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
1968
0
    len = SIZE_OPCODE;
1969
0
    break;
1970
1971
0
  default:
1972
0
    len = SIZE_OPCODE;
1973
0
    break;
1974
0
  }
1975
1976
0
  return len;
1977
0
}
1978
1979
static int
1980
compile_anchor_look_behind_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
1981
0
{
1982
0
  int r;
1983
1984
0
  if (node->char_min_len == node->char_max_len) {
1985
0
    MemNumType mid;
1986
1987
0
    ID_ENTRY(env, mid);
1988
0
    r = add_op(reg, OP_MARK);
1989
0
    if (r != 0) return r;
1990
0
    COP(reg)->mark.id = mid;
1991
0
    COP(reg)->mark.save_pos = FALSE;
1992
1993
0
    r = add_op(reg, OP_STEP_BACK_START);
1994
0
    if (r != 0) return r;
1995
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
1996
0
    COP(reg)->step_back_start.remaining = 0;
1997
0
    COP(reg)->step_back_start.addr      = 1;
1998
1999
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2000
0
    if (r != 0) return r;
2001
2002
0
    r = add_op(reg, OP_CUT_TO_MARK);
2003
0
    if (r != 0) return r;
2004
0
    COP(reg)->cut_to_mark.id = mid;
2005
0
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2006
0
  }
2007
0
  else {
2008
0
    MemNumType mid1, mid2, mid3;
2009
0
    OnigLen diff;
2010
2011
0
    if (IS_NOT_NULL(node->lead_node)) {
2012
0
      MinMaxCharLen ci;
2013
2014
0
      r = node_char_len(node->lead_node, reg, &ci, env);
2015
0
      if (r < 0) return r;
2016
0
      r = add_op(reg, OP_MOVE);
2017
0
      if (r != 0) return r;
2018
0
      COP(reg)->move.n = -((RelPositionType )ci.min);
2019
0
      r = compile_tree(node->lead_node, reg, env);
2020
0
      if (r != 0) return r;
2021
0
    }
2022
2023
0
    ID_ENTRY(env, mid1);
2024
0
    r = add_op(reg, OP_SAVE_VAL);
2025
0
    if (r != 0) return r;
2026
0
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2027
0
    COP(reg)->save_val.id   = mid1;
2028
2029
0
    r = add_op(reg, OP_UPDATE_VAR);
2030
0
    if (r != 0) return r;
2031
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2032
2033
0
    ID_ENTRY(env, mid2);
2034
0
    r = add_op(reg, OP_MARK);
2035
0
    if (r != 0) return r;
2036
0
    COP(reg)->mark.id = mid2;
2037
0
    COP(reg)->mark.save_pos = FALSE;
2038
2039
0
    r = add_op(reg, OP_PUSH);
2040
0
    if (r != 0) return r;
2041
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
2042
2043
0
    r = add_op(reg, OP_JUMP);
2044
0
    if (r != 0) return r;
2045
0
    COP(reg)->jump.addr = SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL;
2046
2047
0
    r = add_op(reg, OP_UPDATE_VAR);
2048
0
    if (r != 0) return r;
2049
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2050
0
    COP(reg)->update_var.id    = mid1;
2051
0
    COP(reg)->update_var.clear = FALSE;
2052
0
    r = add_op(reg, OP_FAIL);
2053
0
    if (r != 0) return r;
2054
2055
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2056
0
      ID_ENTRY(env, mid3);
2057
0
      r = add_op(reg, OP_SAVE_VAL);
2058
0
      if (r != 0) return r;
2059
0
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2060
0
      COP(reg)->save_val.id   = mid3;
2061
0
    }
2062
2063
0
    r = add_op(reg, OP_STEP_BACK_START);
2064
0
    if (r != 0) return r;
2065
2066
0
    if (node->char_max_len != INFINITE_LEN)
2067
0
      diff = node->char_max_len - node->char_min_len;
2068
0
    else
2069
0
      diff = INFINITE_LEN;
2070
2071
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2072
0
    COP(reg)->step_back_start.remaining = diff;
2073
0
    COP(reg)->step_back_start.addr      = 2;
2074
2075
0
    r = add_op(reg, OP_STEP_BACK_NEXT);
2076
0
    if (r != 0) return r;
2077
2078
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2079
0
    if (r != 0) return r;
2080
2081
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2082
0
      r = add_op(reg, OP_UPDATE_VAR);
2083
0
      if (r != 0) return r;
2084
0
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2085
0
      COP(reg)->update_var.id    = mid3;
2086
0
      COP(reg)->update_var.clear = FALSE;
2087
0
    }
2088
2089
0
    r = add_op(reg, OP_CHECK_POSITION);
2090
0
    if (r != 0) return r;
2091
0
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2092
2093
0
    r = add_op(reg, OP_CUT_TO_MARK);
2094
0
    if (r != 0) return r;
2095
0
    COP(reg)->cut_to_mark.id = mid2;
2096
0
    COP(reg)->cut_to_mark.restore_pos = FALSE;
2097
2098
0
    r = add_op(reg, OP_UPDATE_VAR);
2099
0
    if (r != 0) return r;
2100
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2101
0
    COP(reg)->update_var.id    = mid1;
2102
0
    COP(reg)->update_var.clear = TRUE;
2103
0
  }
2104
2105
0
  return r;
2106
0
}
2107
2108
static int
2109
compile_anchor_look_behind_not_node(AnchorNode* node, regex_t* reg,
2110
                                    ParseEnv* env)
2111
0
{
2112
0
  int r;
2113
0
  int len;
2114
2115
0
  len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2116
2117
0
  if (node->char_min_len == node->char_max_len) {
2118
0
    MemNumType mid;
2119
2120
0
    ID_ENTRY(env, mid);
2121
0
    r = add_op(reg, OP_MARK);
2122
0
    if (r != 0) return r;
2123
0
    COP(reg)->mark.id = mid;
2124
0
    COP(reg)->mark.save_pos = FALSE;
2125
2126
0
    r = add_op(reg, OP_PUSH);
2127
0
    if (r != 0) return r;
2128
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL;
2129
2130
0
    r = add_op(reg, OP_STEP_BACK_START);
2131
0
    if (r != 0) return r;
2132
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2133
0
    COP(reg)->step_back_start.remaining = 0;
2134
0
    COP(reg)->step_back_start.addr      = 1;
2135
2136
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2137
0
    if (r != 0) return r;
2138
2139
0
    r = add_op(reg, OP_POP_TO_MARK);
2140
0
    if (r != 0) return r;
2141
0
    COP(reg)->pop_to_mark.id = mid;
2142
0
    r = add_op(reg, OP_FAIL);
2143
0
    if (r != 0) return r;
2144
0
    r = add_op(reg, OP_POP);
2145
0
  }
2146
0
  else {
2147
0
    MemNumType mid1, mid2, mid3;
2148
0
    OnigLen diff;
2149
2150
0
    ID_ENTRY(env, mid1);
2151
0
    r = add_op(reg, OP_SAVE_VAL);
2152
0
    if (r != 0) return r;
2153
0
    COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2154
0
    COP(reg)->save_val.id   = mid1;
2155
2156
0
    r = add_op(reg, OP_UPDATE_VAR);
2157
0
    if (r != 0) return r;
2158
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_TO_S;
2159
2160
0
    ID_ENTRY(env, mid2);
2161
0
    r = add_op(reg, OP_MARK);
2162
0
    if (r != 0) return r;
2163
0
    COP(reg)->mark.id = mid2;
2164
0
    COP(reg)->mark.save_pos = FALSE;
2165
2166
0
    r = add_op(reg, OP_PUSH);
2167
0
    if (r != 0) return r;
2168
2169
0
    COP(reg)->push.addr = SIZE_INC + OPSIZE_STEP_BACK_START + OPSIZE_STEP_BACK_NEXT + len + OPSIZE_CHECK_POSITION + OPSIZE_POP_TO_MARK + OPSIZE_UPDATE_VAR + OPSIZE_POP + OPSIZE_FAIL;
2170
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0)
2171
0
      COP(reg)->push.addr += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
2172
2173
0
    if (IS_NOT_NULL(node->lead_node)) {
2174
0
      int clen;
2175
0
      MinMaxCharLen ci;
2176
2177
0
      clen = compile_length_tree(node->lead_node, reg, env);
2178
0
      COP(reg)->push.addr += OPSIZE_MOVE + clen;
2179
2180
0
      r = node_char_len(node->lead_node, reg, &ci, env);
2181
0
      if (r < 0) return r;
2182
0
      r = add_op(reg, OP_MOVE);
2183
0
      if (r != 0) return r;
2184
0
      COP(reg)->move.n = -((RelPositionType )ci.min);
2185
2186
0
      r = compile_tree(node->lead_node, reg, env);
2187
0
      if (r != 0) return r;
2188
0
    }
2189
2190
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2191
0
      ID_ENTRY(env, mid3);
2192
0
      r = add_op(reg, OP_SAVE_VAL);
2193
0
      if (r != 0) return r;
2194
0
      COP(reg)->save_val.type = SAVE_RIGHT_RANGE;
2195
0
      COP(reg)->save_val.id   = mid3;
2196
0
    }
2197
2198
0
    r = add_op(reg, OP_STEP_BACK_START);
2199
0
    if (r != 0) return r;
2200
2201
0
    if (node->char_max_len != INFINITE_LEN)
2202
0
      diff = node->char_max_len - node->char_min_len;
2203
0
    else
2204
0
      diff = INFINITE_LEN;
2205
2206
0
    COP(reg)->step_back_start.initial   = node->char_min_len;
2207
0
    COP(reg)->step_back_start.remaining = diff;
2208
0
    COP(reg)->step_back_start.addr      = 2;
2209
2210
0
    r = add_op(reg, OP_STEP_BACK_NEXT);
2211
0
    if (r != 0) return r;
2212
2213
0
    r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2214
0
    if (r != 0) return r;
2215
2216
0
    if ((env->flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0) {
2217
0
      r = add_op(reg, OP_UPDATE_VAR);
2218
0
      if (r != 0) return r;
2219
0
      COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2220
0
      COP(reg)->update_var.id    = mid3;
2221
0
      COP(reg)->update_var.clear = FALSE;
2222
0
    }
2223
2224
0
    r = add_op(reg, OP_CHECK_POSITION);
2225
0
    if (r != 0) return r;
2226
0
    COP(reg)->check_position.type = CHECK_POSITION_CURRENT_RIGHT_RANGE;
2227
2228
0
    r = add_op(reg, OP_POP_TO_MARK);
2229
0
    if (r != 0) return r;
2230
0
    COP(reg)->pop_to_mark.id = mid2;
2231
2232
0
    r = add_op(reg, OP_UPDATE_VAR);
2233
0
    if (r != 0) return r;
2234
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2235
0
    COP(reg)->update_var.id   = mid1;
2236
0
    COP(reg)->update_var.clear = FALSE;
2237
2238
0
    r = add_op(reg, OP_POP); /* pop save val */
2239
0
    if (r != 0) return r;
2240
0
    r = add_op(reg, OP_FAIL);
2241
0
    if (r != 0) return r;
2242
2243
0
    r = add_op(reg, OP_UPDATE_VAR);
2244
0
    if (r != 0) return r;
2245
0
    COP(reg)->update_var.type = UPDATE_VAR_RIGHT_RANGE_FROM_STACK;
2246
0
    COP(reg)->update_var.id   = mid1;
2247
0
    COP(reg)->update_var.clear = FALSE;
2248
2249
0
    r = add_op(reg, OP_POP); /* pop mark */
2250
0
    if (r != 0) return r;
2251
0
    r = add_op(reg, OP_POP); /* pop save val */
2252
0
  }
2253
2254
0
  return r;
2255
0
}
2256
2257
static int
2258
compile_anchor_node(AnchorNode* node, regex_t* reg, ParseEnv* env)
2259
0
{
2260
0
  int r, len;
2261
0
  enum OpCode op;
2262
0
  MemNumType mid;
2263
2264
0
  switch (node->type) {
2265
0
  case ANCR_BEGIN_BUF:      r = add_op(reg, OP_BEGIN_BUF);      break;
2266
0
  case ANCR_END_BUF:        r = add_op(reg, OP_END_BUF);        break;
2267
0
  case ANCR_BEGIN_LINE:     r = add_op(reg, OP_BEGIN_LINE);     break;
2268
0
  case ANCR_END_LINE:       r = add_op(reg, OP_END_LINE);       break;
2269
0
  case ANCR_SEMI_END_BUF:   r = add_op(reg, OP_SEMI_END_BUF);   break;
2270
0
  case ANCR_BEGIN_POSITION:
2271
0
    r = add_op(reg, OP_CHECK_POSITION);
2272
0
    if (r != 0) return r;
2273
0
    COP(reg)->check_position.type = CHECK_POSITION_SEARCH_START;
2274
0
    break;
2275
2276
0
  case ANCR_WORD_BOUNDARY:
2277
0
    op = OP_WORD_BOUNDARY;
2278
0
  word:
2279
0
    r = add_op(reg, op);
2280
0
    if (r != 0) return r;
2281
0
    COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
2282
0
    break;
2283
2284
0
  case ANCR_NO_WORD_BOUNDARY:
2285
0
    op = OP_NO_WORD_BOUNDARY; goto word;
2286
0
    break;
2287
0
#ifdef USE_WORD_BEGIN_END
2288
0
  case ANCR_WORD_BEGIN:
2289
0
    op = OP_WORD_BEGIN; goto word;
2290
0
    break;
2291
0
  case ANCR_WORD_END:
2292
0
    op = OP_WORD_END; goto word;
2293
0
    break;
2294
0
#endif
2295
2296
0
  case ANCR_TEXT_SEGMENT_BOUNDARY:
2297
0
  case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
2298
0
    {
2299
0
      enum TextSegmentBoundaryType type;
2300
2301
0
      r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
2302
0
      if (r != 0) return r;
2303
2304
0
      type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
2305
0
#ifdef USE_UNICODE_WORD_BREAK
2306
0
      if (ND_IS_TEXT_SEGMENT_WORD(node))
2307
0
        type = WORD_BOUNDARY;
2308
0
#endif
2309
2310
0
      COP(reg)->text_segment_boundary.type = type;
2311
0
      COP(reg)->text_segment_boundary.not =
2312
0
        (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
2313
0
    }
2314
0
    break;
2315
2316
0
  case ANCR_PREC_READ:
2317
0
    {
2318
0
      ID_ENTRY(env, mid);
2319
0
      r = add_op(reg, OP_MARK);
2320
0
      if (r != 0) return r;
2321
0
      COP(reg)->mark.id = mid;
2322
0
      COP(reg)->mark.save_pos = TRUE;
2323
2324
0
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2325
0
      if (r != 0) return r;
2326
2327
0
      r = add_op(reg, OP_CUT_TO_MARK);
2328
0
      if (r != 0) return r;
2329
0
      COP(reg)->cut_to_mark.id = mid;
2330
0
      COP(reg)->cut_to_mark.restore_pos = TRUE;
2331
0
    }
2332
0
    break;
2333
2334
0
  case ANCR_PREC_READ_NOT:
2335
0
    {
2336
0
      len = compile_length_tree(ND_ANCHOR_BODY(node), reg, env);
2337
0
      if (len < 0) return len;
2338
2339
0
      ID_ENTRY(env, mid);
2340
0
      r = add_op(reg, OP_PUSH);
2341
0
      if (r != 0) return r;
2342
0
      COP(reg)->push.addr = SIZE_INC + OPSIZE_MARK + len +
2343
0
                            OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
2344
2345
0
      r = add_op(reg, OP_MARK);
2346
0
      if (r != 0) return r;
2347
0
      COP(reg)->mark.id = mid;
2348
0
      COP(reg)->mark.save_pos = FALSE;
2349
2350
0
      r = compile_tree(ND_ANCHOR_BODY(node), reg, env);
2351
0
      if (r != 0) return r;
2352
2353
0
      r = add_op(reg, OP_POP_TO_MARK);
2354
0
      if (r != 0) return r;
2355
0
      COP(reg)->pop_to_mark.id = mid;
2356
2357
0
      r = add_op(reg, OP_POP);
2358
0
      if (r != 0) return r;
2359
0
      r = add_op(reg, OP_FAIL);
2360
0
    }
2361
0
    break;
2362
2363
0
  case ANCR_LOOK_BEHIND:
2364
0
    r = compile_anchor_look_behind_node(node, reg, env);
2365
0
    break;
2366
2367
0
  case ANCR_LOOK_BEHIND_NOT:
2368
0
    r = compile_anchor_look_behind_not_node(node, reg, env);
2369
0
    break;
2370
2371
0
  default:
2372
0
    return ONIGERR_TYPE_BUG;
2373
0
    break;
2374
0
  }
2375
2376
0
  return r;
2377
0
}
2378
2379
static int
2380
compile_gimmick_node(GimmickNode* node, regex_t* reg)
2381
0
{
2382
0
  int r = 0;
2383
2384
0
  switch (node->type) {
2385
0
  case GIMMICK_FAIL:
2386
0
    r = add_op(reg, OP_FAIL);
2387
0
    break;
2388
2389
0
  case GIMMICK_SAVE:
2390
0
    r = add_op(reg, OP_SAVE_VAL);
2391
0
    if (r != 0) return r;
2392
0
    COP(reg)->save_val.type = node->detail_type;
2393
0
    COP(reg)->save_val.id   = node->id;
2394
0
    break;
2395
2396
0
  case GIMMICK_UPDATE_VAR:
2397
0
    r = add_op(reg, OP_UPDATE_VAR);
2398
0
    if (r != 0) return r;
2399
0
    COP(reg)->update_var.type = node->detail_type;
2400
0
    COP(reg)->update_var.id   = node->id;
2401
0
    COP(reg)->update_var.clear = FALSE;
2402
0
    break;
2403
2404
0
#ifdef USE_CALLOUT
2405
0
  case GIMMICK_CALLOUT:
2406
0
    switch (node->detail_type) {
2407
0
    case ONIG_CALLOUT_OF_CONTENTS:
2408
0
    case ONIG_CALLOUT_OF_NAME:
2409
0
      {
2410
0
        if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
2411
0
          r = add_op(reg, OP_CALLOUT_NAME);
2412
0
          if (r != 0) return r;
2413
0
          COP(reg)->callout_name.id  = node->id;
2414
0
          COP(reg)->callout_name.num = node->num;
2415
0
        }
2416
0
        else {
2417
0
          r = add_op(reg, OP_CALLOUT_CONTENTS);
2418
0
          if (r != 0) return r;
2419
0
          COP(reg)->callout_contents.num = node->num;
2420
0
        }
2421
0
      }
2422
0
      break;
2423
2424
0
    default:
2425
0
      r = ONIGERR_TYPE_BUG;
2426
0
      break;
2427
0
    }
2428
0
#endif
2429
0
  }
2430
2431
0
  return r;
2432
0
}
2433
2434
static int
2435
compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
2436
0
{
2437
0
  int len;
2438
2439
0
  switch (node->type) {
2440
0
  case GIMMICK_FAIL:
2441
0
    len = OPSIZE_FAIL;
2442
0
    break;
2443
2444
0
  case GIMMICK_SAVE:
2445
0
    len = OPSIZE_SAVE_VAL;
2446
0
    break;
2447
2448
0
  case GIMMICK_UPDATE_VAR:
2449
0
    len = OPSIZE_UPDATE_VAR;
2450
0
    break;
2451
2452
0
#ifdef USE_CALLOUT
2453
0
  case GIMMICK_CALLOUT:
2454
0
    switch (node->detail_type) {
2455
0
    case ONIG_CALLOUT_OF_CONTENTS:
2456
0
      len = OPSIZE_CALLOUT_CONTENTS;
2457
0
      break;
2458
0
    case ONIG_CALLOUT_OF_NAME:
2459
0
      len = OPSIZE_CALLOUT_NAME;
2460
0
      break;
2461
2462
0
    default:
2463
0
      len = ONIGERR_TYPE_BUG;
2464
0
      break;
2465
0
    }
2466
0
    break;
2467
0
#endif
2468
2469
0
  default:
2470
0
    return ONIGERR_TYPE_BUG;
2471
0
    break;
2472
0
  }
2473
2474
0
  return len;
2475
0
}
2476
2477
static int
2478
compile_length_tree(Node* node, regex_t* reg, ParseEnv* env)
2479
0
{
2480
0
  int len, r;
2481
2482
0
  switch (ND_TYPE(node)) {
2483
0
  case ND_LIST:
2484
0
    len = 0;
2485
0
    do {
2486
0
      r = compile_length_tree(ND_CAR(node), reg, env);
2487
0
      if (r < 0) return r;
2488
0
      len += r;
2489
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
2490
0
    r = len;
2491
0
    break;
2492
2493
0
  case ND_ALT:
2494
0
    {
2495
0
      int n;
2496
2497
0
      n = r = 0;
2498
0
      do {
2499
0
        r += compile_length_tree(ND_CAR(node), reg, env);
2500
0
        n++;
2501
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2502
0
      r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
2503
0
    }
2504
0
    break;
2505
2506
0
  case ND_STRING:
2507
0
    if (ND_STRING_IS_CRUDE(node))
2508
0
      r = compile_length_string_crude_node(STR_(node), reg);
2509
0
    else
2510
0
      r = compile_length_string_node(node, reg);
2511
0
    break;
2512
2513
0
  case ND_CCLASS:
2514
0
    r = compile_length_cclass_node(CCLASS_(node), reg);
2515
0
    break;
2516
2517
0
  case ND_CTYPE:
2518
0
    r = SIZE_OPCODE;
2519
0
    break;
2520
2521
0
  case ND_BACKREF:
2522
0
    r = OPSIZE_BACKREF;
2523
0
    break;
2524
2525
0
#ifdef USE_CALL
2526
0
  case ND_CALL:
2527
0
    r = OPSIZE_CALL;
2528
0
    break;
2529
0
#endif
2530
2531
0
  case ND_QUANT:
2532
0
    r = compile_length_quantifier_node(QUANT_(node), reg, env);
2533
0
    break;
2534
2535
0
  case ND_BAG:
2536
0
    r = compile_length_bag_node(BAG_(node), reg, env);
2537
0
    break;
2538
2539
0
  case ND_ANCHOR:
2540
0
    r = compile_length_anchor_node(ANCHOR_(node), reg, env);
2541
0
    break;
2542
2543
0
  case ND_GIMMICK:
2544
0
    r = compile_length_gimmick_node(GIMMICK_(node), reg);
2545
0
    break;
2546
2547
0
  default:
2548
0
    return ONIGERR_TYPE_BUG;
2549
0
    break;
2550
0
  }
2551
2552
0
  return r;
2553
0
}
2554
2555
static int
2556
compile_tree(Node* node, regex_t* reg, ParseEnv* env)
2557
0
{
2558
0
  int n, len, pos, r = 0;
2559
2560
0
  switch (ND_TYPE(node)) {
2561
0
  case ND_LIST:
2562
0
    do {
2563
0
      r = compile_tree(ND_CAR(node), reg, env);
2564
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2565
0
    break;
2566
2567
0
  case ND_ALT:
2568
0
    {
2569
0
      Node* x = node;
2570
0
      len = 0;
2571
0
      do {
2572
0
        len += compile_length_tree(ND_CAR(x), reg, env);
2573
0
        if (IS_NOT_NULL(ND_CDR(x))) {
2574
0
          len += OPSIZE_PUSH + OPSIZE_JUMP;
2575
0
        }
2576
0
      } while (IS_NOT_NULL(x = ND_CDR(x)));
2577
0
      pos = COP_CURR_OFFSET(reg) + 1 + len;  /* goal position */
2578
2579
0
      do {
2580
0
        len = compile_length_tree(ND_CAR(node), reg, env);
2581
0
        if (IS_NOT_NULL(ND_CDR(node))) {
2582
0
          enum OpCode push = ND_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
2583
0
          r = add_op(reg, push);
2584
0
          if (r != 0) break;
2585
0
          COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
2586
0
        }
2587
0
        r = compile_tree(ND_CAR(node), reg, env);
2588
0
        if (r != 0) break;
2589
0
        if (IS_NOT_NULL(ND_CDR(node))) {
2590
0
          len = pos - (COP_CURR_OFFSET(reg) + 1);
2591
0
          r = add_op(reg, OP_JUMP);
2592
0
          if (r != 0) break;
2593
0
          COP(reg)->jump.addr = len;
2594
0
        }
2595
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
2596
0
    }
2597
0
    break;
2598
2599
0
  case ND_STRING:
2600
0
    if (ND_STRING_IS_CRUDE(node))
2601
0
      r = compile_string_crude_node(STR_(node), reg);
2602
0
    else
2603
0
      r = compile_string_node(node, reg);
2604
0
    break;
2605
2606
0
  case ND_CCLASS:
2607
0
    r = compile_cclass_node(CCLASS_(node), reg);
2608
0
    break;
2609
2610
0
  case ND_CTYPE:
2611
0
    {
2612
0
      int op;
2613
2614
0
      switch (CTYPE_(node)->ctype) {
2615
0
      case CTYPE_ANYCHAR:
2616
0
        r = add_op(reg, ND_IS_MULTILINE(node) ? OP_ANYCHAR_ML : OP_ANYCHAR);
2617
0
        break;
2618
2619
0
      case ONIGENC_CTYPE_WORD:
2620
0
        if (CTYPE_(node)->ascii_mode == 0) {
2621
0
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD : OP_WORD;
2622
0
        }
2623
0
        else {
2624
0
          op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
2625
0
        }
2626
0
        r = add_op(reg, op);
2627
0
        break;
2628
2629
0
      default:
2630
0
        return ONIGERR_TYPE_BUG;
2631
0
        break;
2632
0
      }
2633
0
    }
2634
0
    break;
2635
2636
0
  case ND_BACKREF:
2637
0
    {
2638
0
      BackRefNode* br = BACKREF_(node);
2639
2640
0
      if (ND_IS_CHECKER(node)) {
2641
0
#ifdef USE_BACKREF_WITH_LEVEL
2642
0
        if (ND_IS_NEST_LEVEL(node)) {
2643
0
          r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
2644
0
          if (r != 0) return r;
2645
0
          COP(reg)->backref_general.nest_level = br->nest_level;
2646
0
        }
2647
0
        else
2648
0
#endif
2649
0
          {
2650
0
            r = add_op(reg, OP_BACKREF_CHECK);
2651
0
            if (r != 0) return r;
2652
0
          }
2653
0
        goto add_bacref_mems;
2654
0
      }
2655
0
      else {
2656
0
#ifdef USE_BACKREF_WITH_LEVEL
2657
0
        if (ND_IS_NEST_LEVEL(node)) {
2658
0
          if (ND_IS_IGNORECASE(node))
2659
0
            r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
2660
0
          else
2661
0
            r = add_op(reg, OP_BACKREF_WITH_LEVEL);
2662
2663
0
          if (r != 0) return r;
2664
0
          COP(reg)->backref_general.nest_level = br->nest_level;
2665
0
          goto add_bacref_mems;
2666
0
        }
2667
0
        else
2668
0
#endif
2669
0
        if (br->back_num == 1) {
2670
0
          n = br->back_static[0];
2671
0
          if (ND_IS_IGNORECASE(node)) {
2672
0
            r = add_op(reg, OP_BACKREF_N_IC);
2673
0
            if (r != 0) return r;
2674
0
            COP(reg)->backref_n.n1 = n;
2675
0
          }
2676
0
          else {
2677
0
            switch (n) {
2678
0
            case 1:  r = add_op(reg, OP_BACKREF1); break;
2679
0
            case 2:  r = add_op(reg, OP_BACKREF2); break;
2680
0
            default:
2681
0
              r = add_op(reg, OP_BACKREF_N);
2682
0
              if (r != 0) return r;
2683
0
              COP(reg)->backref_n.n1 = n;
2684
0
              break;
2685
0
            }
2686
0
          }
2687
0
        }
2688
0
        else {
2689
0
          int num;
2690
0
          int* p;
2691
2692
0
          r = add_op(reg, ND_IS_IGNORECASE(node) ?
2693
0
                     OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
2694
0
          if (r != 0) return r;
2695
2696
0
        add_bacref_mems:
2697
0
          num = br->back_num;
2698
0
          COP(reg)->backref_general.num = num;
2699
0
          if (num == 1) {
2700
0
            COP(reg)->backref_general.n1 = br->back_static[0];
2701
0
          }
2702
0
          else {
2703
0
            int i, j;
2704
0
            MemNumType* ns;
2705
2706
0
            ns = xmalloc(sizeof(MemNumType) * num);
2707
0
            CHECK_NULL_RETURN_MEMERR(ns);
2708
0
            COP(reg)->backref_general.ns = ns;
2709
0
            p = BACKREFS_P(br);
2710
0
            for (i = num - 1, j = 0; i >= 0; i--, j++) {
2711
0
              ns[j] = p[i];
2712
0
            }
2713
0
          }
2714
0
        }
2715
0
      }
2716
0
    }
2717
0
    break;
2718
2719
0
#ifdef USE_CALL
2720
0
  case ND_CALL:
2721
0
    r = compile_call(CALL_(node), reg, env);
2722
0
    break;
2723
0
#endif
2724
2725
0
  case ND_QUANT:
2726
0
    r = compile_quantifier_node(QUANT_(node), reg, env);
2727
0
    break;
2728
2729
0
  case ND_BAG:
2730
0
    r = compile_bag_node(BAG_(node), reg, env);
2731
0
    break;
2732
2733
0
  case ND_ANCHOR:
2734
0
    r = compile_anchor_node(ANCHOR_(node), reg, env);
2735
0
    break;
2736
2737
0
  case ND_GIMMICK:
2738
0
    r = compile_gimmick_node(GIMMICK_(node), reg);
2739
0
    break;
2740
2741
0
  default:
2742
#ifdef ONIG_DEBUG
2743
    fprintf(DBGFP, "compile_tree: undefined node type %d\n", ND_TYPE(node));
2744
#endif
2745
0
    break;
2746
0
  }
2747
2748
0
  return r;
2749
0
}
2750
2751
static int
2752
make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
2753
0
{
2754
0
  int r;
2755
0
  Node* node = *plink;
2756
2757
0
  switch (ND_TYPE(node)) {
2758
0
  case ND_LIST:
2759
0
  case ND_ALT:
2760
0
    do {
2761
0
      r = make_named_capture_number_map(&(ND_CAR(node)), map, counter);
2762
0
    } while (r >= 0 && IS_NOT_NULL(node = ND_CDR(node)));
2763
0
    if (r < 0) return r;
2764
0
    break;
2765
2766
0
  case ND_QUANT:
2767
0
    {
2768
0
      Node** ptarget = &(ND_BODY(node));
2769
0
      r = make_named_capture_number_map(ptarget, map, counter);
2770
0
      if (r < 0) return r;
2771
0
      if (r == 1 && ND_TYPE(*ptarget) == ND_QUANT) {
2772
0
        return onig_reduce_nested_quantifier(node);
2773
0
      }
2774
0
    }
2775
0
    break;
2776
2777
0
  case ND_BAG:
2778
0
    {
2779
0
      BagNode* en = BAG_(node);
2780
0
      if (en->type == BAG_MEMORY) {
2781
0
        if (ND_IS_NAMED_GROUP(node)) {
2782
0
          (*counter)++;
2783
0
          map[en->m.regnum].new_val = *counter;
2784
0
          en->m.regnum = *counter;
2785
0
          r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2786
0
          if (r < 0) return r;
2787
0
        }
2788
0
        else {
2789
0
          *plink = ND_BODY(node);
2790
0
          ND_BODY(node) = NULL_NODE;
2791
0
          onig_node_free(node);
2792
0
          r = make_named_capture_number_map(plink, map, counter);
2793
0
          if (r < 0) return r;
2794
0
          return 1;
2795
0
        }
2796
0
      }
2797
0
      else if (en->type == BAG_IF_ELSE) {
2798
0
        r = make_named_capture_number_map(&(ND_BAG_BODY(en)), map, counter);
2799
0
        if (r < 0) return r;
2800
0
        if (IS_NOT_NULL(en->te.Then)) {
2801
0
          r = make_named_capture_number_map(&(en->te.Then), map, counter);
2802
0
          if (r < 0) return r;
2803
0
        }
2804
0
        if (IS_NOT_NULL(en->te.Else)) {
2805
0
          r = make_named_capture_number_map(&(en->te.Else), map, counter);
2806
0
          if (r < 0) return r;
2807
0
        }
2808
0
      }
2809
0
      else {
2810
0
        r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2811
0
        if (r < 0) return r;
2812
0
      }
2813
0
    }
2814
0
    break;
2815
2816
0
  case ND_ANCHOR:
2817
0
    if (IS_NOT_NULL(ND_BODY(node))) {
2818
0
      r = make_named_capture_number_map(&(ND_BODY(node)), map, counter);
2819
0
      if (r < 0) return r;
2820
0
    }
2821
0
    break;
2822
2823
0
  default:
2824
0
    break;
2825
0
  }
2826
2827
0
  return 0;
2828
0
}
2829
2830
static int
2831
renumber_backref_node(Node* node, GroupNumMap* map)
2832
0
{
2833
0
  int i, pos, n, old_num;
2834
0
  int *backs;
2835
0
  BackRefNode* bn = BACKREF_(node);
2836
2837
0
  if (! ND_IS_BY_NAME(node))
2838
0
    return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2839
2840
0
  old_num = bn->back_num;
2841
0
  if (IS_NULL(bn->back_dynamic))
2842
0
    backs = bn->back_static;
2843
0
  else
2844
0
    backs = bn->back_dynamic;
2845
2846
0
  for (i = 0, pos = 0; i < old_num; i++) {
2847
0
    n = map[backs[i]].new_val;
2848
0
    if (n > 0) {
2849
0
      backs[pos] = n;
2850
0
      pos++;
2851
0
    }
2852
0
  }
2853
2854
0
  bn->back_num = pos;
2855
0
  return 0;
2856
0
}
2857
2858
static int
2859
renumber_backref_traverse(Node* node, GroupNumMap* map)
2860
0
{
2861
0
  int r = 0;
2862
2863
0
  switch (ND_TYPE(node)) {
2864
0
  case ND_LIST:
2865
0
  case ND_ALT:
2866
0
    do {
2867
0
      r = renumber_backref_traverse(ND_CAR(node), map);
2868
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2869
0
    break;
2870
2871
0
  case ND_QUANT:
2872
0
    r = renumber_backref_traverse(ND_BODY(node), map);
2873
0
    break;
2874
2875
0
  case ND_BAG:
2876
0
    {
2877
0
      BagNode* en = BAG_(node);
2878
2879
0
      r = renumber_backref_traverse(ND_BODY(node), map);
2880
0
      if (r != 0) return r;
2881
2882
0
      if (en->type == BAG_IF_ELSE) {
2883
0
        if (IS_NOT_NULL(en->te.Then)) {
2884
0
          r = renumber_backref_traverse(en->te.Then, map);
2885
0
          if (r != 0) return r;
2886
0
        }
2887
0
        if (IS_NOT_NULL(en->te.Else)) {
2888
0
          r = renumber_backref_traverse(en->te.Else, map);
2889
0
          if (r != 0) return r;
2890
0
        }
2891
0
      }
2892
0
    }
2893
0
    break;
2894
2895
0
  case ND_BACKREF:
2896
0
    r = renumber_backref_node(node, map);
2897
0
    break;
2898
2899
0
  case ND_ANCHOR:
2900
0
    if (IS_NOT_NULL(ND_BODY(node)))
2901
0
      r = renumber_backref_traverse(ND_BODY(node), map);
2902
0
    break;
2903
2904
0
  default:
2905
0
    break;
2906
0
  }
2907
2908
0
  return r;
2909
0
}
2910
2911
static int
2912
numbered_ref_check(Node* node)
2913
0
{
2914
0
  int r = 0;
2915
2916
0
  switch (ND_TYPE(node)) {
2917
0
  case ND_LIST:
2918
0
  case ND_ALT:
2919
0
    do {
2920
0
      r = numbered_ref_check(ND_CAR(node));
2921
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
2922
0
    break;
2923
2924
0
  case ND_ANCHOR:
2925
0
    if (IS_NULL(ND_BODY(node)))
2926
0
      break;
2927
    /* fall */
2928
0
  case ND_QUANT:
2929
0
    r = numbered_ref_check(ND_BODY(node));
2930
0
    break;
2931
2932
0
  case ND_BAG:
2933
0
    {
2934
0
      BagNode* en = BAG_(node);
2935
2936
0
      r = numbered_ref_check(ND_BODY(node));
2937
0
      if (r != 0) return r;
2938
2939
0
      if (en->type == BAG_IF_ELSE) {
2940
0
        if (IS_NOT_NULL(en->te.Then)) {
2941
0
          r = numbered_ref_check(en->te.Then);
2942
0
          if (r != 0) return r;
2943
0
        }
2944
0
        if (IS_NOT_NULL(en->te.Else)) {
2945
0
          r = numbered_ref_check(en->te.Else);
2946
0
          if (r != 0) return r;
2947
0
        }
2948
0
      }
2949
0
    }
2950
2951
0
    break;
2952
2953
0
  case ND_BACKREF:
2954
0
    if (! ND_IS_BY_NAME(node))
2955
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2956
0
    break;
2957
2958
0
  default:
2959
0
    break;
2960
0
  }
2961
2962
0
  return r;
2963
0
}
2964
2965
static int
2966
disable_noname_group_capture(Node** root, regex_t* reg, ParseEnv* env)
2967
0
{
2968
0
  int r, i, pos, counter;
2969
0
  MemStatusType loc;
2970
0
  GroupNumMap* map;
2971
2972
0
  map = (GroupNumMap* )xalloca(sizeof(GroupNumMap) * (env->num_mem + 1));
2973
0
  CHECK_NULL_RETURN_MEMERR(map);
2974
0
  for (i = 1; i <= env->num_mem; i++) {
2975
0
    map[i].new_val = 0;
2976
0
  }
2977
0
  counter = 0;
2978
0
  r = make_named_capture_number_map(root, map, &counter);
2979
0
  if (r < 0) return r;
2980
2981
0
  r = renumber_backref_traverse(*root, map);
2982
0
  if (r != 0) return r;
2983
2984
0
  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2985
0
    if (map[i].new_val > 0) {
2986
0
      PARSEENV_MEMENV(env)[pos] = PARSEENV_MEMENV(env)[i];
2987
0
      pos++;
2988
0
    }
2989
0
  }
2990
2991
0
  loc = env->cap_history;
2992
0
  MEM_STATUS_CLEAR(env->cap_history);
2993
0
  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2994
0
    if (MEM_STATUS_AT(loc, i)) {
2995
0
      MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
2996
0
    }
2997
0
  }
2998
2999
0
  env->num_mem = env->num_named;
3000
0
  reg->num_mem = env->num_named;
3001
3002
0
  return onig_renumber_name_table(reg, map);
3003
0
}
3004
3005
#ifdef USE_CALL
3006
static int
3007
fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
3008
0
{
3009
0
  int i, offset;
3010
0
  BagNode* en;
3011
0
  AbsAddrType addr;
3012
0
  AbsAddrType* paddr;
3013
3014
0
  for (i = 0; i < uslist->num; i++) {
3015
0
    if (! ND_IS_FIXED_ADDR(uslist->us[i].target)) {
3016
0
      if (ND_IS_CALLED(uslist->us[i].target))
3017
0
        return ONIGERR_PARSER_BUG;
3018
0
      else {
3019
        /* CASE: called node doesn't have called address.
3020
           ex. /((|a\g<1>)(.){0}){0}\g<3>/
3021
           group-1 doesn't called, but compiled into bytecodes,
3022
           because group-3 is referred from outside.
3023
        */
3024
0
        continue;
3025
0
      }
3026
0
    }
3027
3028
0
    en = BAG_(uslist->us[i].target);
3029
0
    addr   = en->m.called_addr;
3030
0
    offset = uslist->us[i].offset;
3031
3032
0
    paddr = (AbsAddrType* )((char* )reg->ops + offset);
3033
0
    *paddr = addr;
3034
0
  }
3035
0
  return 0;
3036
0
}
3037
#endif
3038
3039
/* x is not included y ==>  1 : 0 */
3040
static int
3041
is_exclusive(Node* x, Node* y, regex_t* reg)
3042
0
{
3043
0
  int i, len;
3044
0
  OnigCodePoint code;
3045
0
  UChar *p;
3046
0
  NodeType ytype;
3047
3048
0
 retry:
3049
0
  ytype = ND_TYPE(y);
3050
0
  switch (ND_TYPE(x)) {
3051
0
  case ND_CTYPE:
3052
0
    {
3053
0
      if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
3054
0
          CTYPE_(y)->ctype == CTYPE_ANYCHAR)
3055
0
        break;
3056
3057
0
      switch (ytype) {
3058
0
      case ND_CTYPE:
3059
0
        if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
3060
0
            CTYPE_(y)->not   != CTYPE_(x)->not &&
3061
0
            CTYPE_(y)->ascii_mode == CTYPE_(x)->ascii_mode)
3062
0
          return 1;
3063
0
        else
3064
0
          return 0;
3065
0
        break;
3066
3067
0
      case ND_CCLASS:
3068
0
      swap:
3069
0
        {
3070
0
          Node* tmp;
3071
0
          tmp = x; x = y; y = tmp;
3072
0
          goto retry;
3073
0
        }
3074
0
        break;
3075
3076
0
      case ND_STRING:
3077
0
        goto swap;
3078
0
        break;
3079
3080
0
      default:
3081
0
        break;
3082
0
      }
3083
0
    }
3084
0
    break;
3085
3086
0
  case ND_CCLASS:
3087
0
    {
3088
0
      int range;
3089
0
      CClassNode* xc = CCLASS_(x);
3090
3091
0
      switch (ytype) {
3092
0
      case ND_CTYPE:
3093
0
        switch (CTYPE_(y)->ctype) {
3094
0
        case CTYPE_ANYCHAR:
3095
0
          return 0;
3096
0
          break;
3097
3098
0
        case ONIGENC_CTYPE_WORD:
3099
0
          if (CTYPE_(y)->not == 0) {
3100
0
            if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
3101
0
              range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3102
0
              for (i = 0; i < range; i++) {
3103
0
                if (BITSET_AT(xc->bs, i)) {
3104
0
                  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
3105
0
                }
3106
0
              }
3107
0
              return 1;
3108
0
            }
3109
0
            return 0;
3110
0
          }
3111
0
          else {
3112
0
            if (IS_NOT_NULL(xc->mbuf)) return 0;
3113
0
            if (IS_NCCLASS_NOT(xc)) return 0;
3114
3115
0
            range = CTYPE_(y)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
3116
0
            for (i = 0; i < range; i++) {
3117
0
              if (! ONIGENC_IS_CODE_WORD(reg->enc, i)) {
3118
0
                if (BITSET_AT(xc->bs, i))
3119
0
                  return 0;
3120
0
              }
3121
0
            }
3122
0
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
3123
0
              if (BITSET_AT(xc->bs, i)) return 0;
3124
0
            }
3125
0
            return 1;
3126
0
          }
3127
0
          break;
3128
3129
0
        default:
3130
0
          break;
3131
0
        }
3132
0
        break;
3133
3134
0
      case ND_CCLASS:
3135
0
        {
3136
0
          int v;
3137
0
          CClassNode* yc = CCLASS_(y);
3138
3139
0
          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
3140
0
            v = BITSET_AT(xc->bs, i);
3141
0
            if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) {
3142
0
              v = BITSET_AT(yc->bs, i);
3143
0
              if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
3144
0
                  (v == 0 && IS_NCCLASS_NOT(yc)))
3145
0
                return 0;
3146
0
            }
3147
0
          }
3148
0
          if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
3149
0
              (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
3150
0
            return 1;
3151
0
          return 0;
3152
0
        }
3153
0
        break;
3154
3155
0
      case ND_STRING:
3156
0
        goto swap;
3157
0
        break;
3158
3159
0
      default:
3160
0
        break;
3161
0
      }
3162
0
    }
3163
0
    break;
3164
3165
0
  case ND_STRING:
3166
0
    {
3167
0
      StrNode* xs = STR_(x);
3168
3169
0
      if (ND_STRING_LEN(x) == 0)
3170
0
        break;
3171
3172
0
      switch (ytype) {
3173
0
      case ND_CTYPE:
3174
0
        switch (CTYPE_(y)->ctype) {
3175
0
        case CTYPE_ANYCHAR:
3176
0
          break;
3177
3178
0
        case ONIGENC_CTYPE_WORD:
3179
0
          if (CTYPE_(y)->ascii_mode == 0) {
3180
0
            if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
3181
0
              return CTYPE_(y)->not;
3182
0
            else
3183
0
              return !(CTYPE_(y)->not);
3184
0
          }
3185
0
          else {
3186
0
            if (ONIGENC_IS_MBC_WORD_ASCII(reg->enc, xs->s, xs->end))
3187
0
              return CTYPE_(y)->not;
3188
0
            else
3189
0
              return !(CTYPE_(y)->not);
3190
0
          }
3191
0
          break;
3192
0
        default:
3193
0
          break;
3194
0
        }
3195
0
        break;
3196
3197
0
      case ND_CCLASS:
3198
0
        {
3199
0
          CClassNode* cc = CCLASS_(y);
3200
3201
0
          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
3202
0
                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
3203
0
          return onig_is_code_in_cc(reg->enc, code, cc) == 0;
3204
0
        }
3205
0
        break;
3206
3207
0
      case ND_STRING:
3208
0
        {
3209
0
          UChar *q;
3210
0
          StrNode* ys = STR_(y);
3211
3212
0
          len = ND_STRING_LEN(x);
3213
0
          if (len > ND_STRING_LEN(y)) len = ND_STRING_LEN(y);
3214
3215
0
          for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
3216
0
            if (*p != *q) return 1;
3217
0
          }
3218
0
        }
3219
0
        break;
3220
3221
0
      default:
3222
0
        break;
3223
0
      }
3224
0
    }
3225
0
    break;
3226
3227
0
  default:
3228
0
    break;
3229
0
  }
3230
3231
0
  return 0;
3232
0
}
3233
3234
static Node*
3235
get_tree_head_literal(Node* node, int exact, regex_t* reg)
3236
0
{
3237
0
  Node* n = NULL_NODE;
3238
3239
0
  switch (ND_TYPE(node)) {
3240
0
  case ND_BACKREF:
3241
0
  case ND_ALT:
3242
0
#ifdef USE_CALL
3243
0
  case ND_CALL:
3244
0
#endif
3245
0
    break;
3246
3247
0
  case ND_CTYPE:
3248
0
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
3249
0
      break;
3250
    /* fall */
3251
0
  case ND_CCLASS:
3252
0
    if (exact == 0) {
3253
0
      n = node;
3254
0
    }
3255
0
    break;
3256
3257
0
  case ND_LIST:
3258
0
    n = get_tree_head_literal(ND_CAR(node), exact, reg);
3259
0
    break;
3260
3261
0
  case ND_STRING:
3262
0
    {
3263
0
      StrNode* sn = STR_(node);
3264
3265
0
      if (sn->end <= sn->s)
3266
0
        break;
3267
3268
0
      if (exact == 0 || !ND_IS_REAL_IGNORECASE(node)) {
3269
0
        n = node;
3270
0
      }
3271
0
    }
3272
0
    break;
3273
3274
0
  case ND_QUANT:
3275
0
    {
3276
0
      QuantNode* qn = QUANT_(node);
3277
0
      if (qn->lower > 0) {
3278
0
        if (IS_NOT_NULL(qn->head_exact))
3279
0
          n = qn->head_exact;
3280
0
        else
3281
0
          n = get_tree_head_literal(ND_BODY(node), exact, reg);
3282
0
      }
3283
0
    }
3284
0
    break;
3285
3286
0
  case ND_BAG:
3287
0
    {
3288
0
      BagNode* en = BAG_(node);
3289
0
      switch (en->type) {
3290
0
      case BAG_OPTION:
3291
0
      case BAG_MEMORY:
3292
0
      case BAG_STOP_BACKTRACK:
3293
0
        n = get_tree_head_literal(ND_BODY(node), exact, reg);
3294
0
        break;
3295
0
      default:
3296
0
        break;
3297
0
      }
3298
0
    }
3299
0
    break;
3300
3301
0
  case ND_ANCHOR:
3302
0
    if (ANCHOR_(node)->type == ANCR_PREC_READ)
3303
0
      n = get_tree_head_literal(ND_BODY(node), exact, reg);
3304
0
    break;
3305
3306
0
  case ND_GIMMICK:
3307
0
  default:
3308
0
    break;
3309
0
  }
3310
3311
0
  return n;
3312
0
}
3313
3314
enum GetValue {
3315
  GET_VALUE_NONE   = -1,
3316
  GET_VALUE_IGNORE =  0,
3317
  GET_VALUE_FOUND  =  1
3318
};
3319
3320
0
#define MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL  16
3321
3322
static int
3323
get_tree_tail_literal(Node* node, Node** rnode, regex_t* reg, int nest_level)
3324
0
{
3325
0
  int r;
3326
3327
0
  nest_level++;
3328
0
  if (nest_level >= MAX_NEST_LEVEL_GET_TREE_TAIL_LITERAL) {
3329
0
    return GET_VALUE_NONE;
3330
0
  }
3331
3332
0
  switch (ND_TYPE(node)) {
3333
0
  case ND_LIST:
3334
0
    if (IS_NULL(ND_CDR(node))) {
3335
0
      r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3336
0
    }
3337
0
    else {
3338
0
      r = get_tree_tail_literal(ND_CDR(node), rnode, reg, nest_level);
3339
0
      if (r == GET_VALUE_IGNORE) {
3340
0
        r = get_tree_tail_literal(ND_CAR(node), rnode, reg, nest_level);
3341
0
      }
3342
0
    }
3343
0
    break;
3344
3345
0
#ifdef USE_CALL
3346
0
  case ND_CALL:
3347
0
    r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3348
0
    break;
3349
0
#endif
3350
3351
0
  case ND_CTYPE:
3352
0
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) {
3353
0
      r = GET_VALUE_NONE;
3354
0
      break;
3355
0
    }
3356
    /* fall */
3357
0
  case ND_CCLASS:
3358
0
    *rnode = node;
3359
0
    r = GET_VALUE_FOUND;
3360
0
    break;
3361
3362
0
  case ND_STRING:
3363
0
    {
3364
0
      StrNode* sn = STR_(node);
3365
3366
0
      if (sn->end <= sn->s) {
3367
0
        r = GET_VALUE_IGNORE;
3368
0
        break;
3369
0
      }
3370
3371
0
      if (ND_IS_REAL_IGNORECASE(node)) {
3372
0
        r = GET_VALUE_NONE;
3373
0
        break;
3374
0
      }
3375
3376
0
      *rnode = node;
3377
0
      r = GET_VALUE_FOUND;
3378
0
    }
3379
0
    break;
3380
3381
0
  case ND_QUANT:
3382
0
    {
3383
0
      QuantNode* qn = QUANT_(node);
3384
0
      if (qn->lower != 0) {
3385
0
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3386
0
      }
3387
0
      else
3388
0
        r = GET_VALUE_NONE;
3389
0
    }
3390
0
    break;
3391
3392
0
  case ND_BAG:
3393
0
    {
3394
0
      BagNode* en = BAG_(node);
3395
3396
0
      if (en->type == BAG_MEMORY) {
3397
0
        if (ND_IS_MARK1(node))
3398
0
          r = GET_VALUE_NONE;
3399
0
        else {
3400
0
          ND_STATUS_ADD(node, MARK1);
3401
0
          r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3402
0
          ND_STATUS_REMOVE(node, MARK1);
3403
0
        }
3404
0
      }
3405
0
      else {
3406
0
        r = get_tree_tail_literal(ND_BODY(node), rnode, reg, nest_level);
3407
0
      }
3408
0
    }
3409
0
    break;
3410
3411
0
  case ND_ANCHOR:
3412
0
  case ND_GIMMICK:
3413
0
    r = GET_VALUE_IGNORE;
3414
0
    break;
3415
3416
0
  case ND_ALT:
3417
0
  case ND_BACKREF:
3418
0
  default:
3419
0
    r = GET_VALUE_NONE;
3420
0
    break;
3421
0
  }
3422
3423
0
  return r;
3424
0
}
3425
3426
static int
3427
check_called_node_in_look_behind(Node* node, int not)
3428
0
{
3429
0
  int r;
3430
3431
0
  r = 0;
3432
3433
0
  switch (ND_TYPE(node)) {
3434
0
  case ND_LIST:
3435
0
  case ND_ALT:
3436
0
    do {
3437
0
      r = check_called_node_in_look_behind(ND_CAR(node), not);
3438
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3439
0
    break;
3440
3441
0
  case ND_QUANT:
3442
0
    r = check_called_node_in_look_behind(ND_BODY(node), not);
3443
0
    break;
3444
3445
0
  case ND_BAG:
3446
0
    {
3447
0
      BagNode* en = BAG_(node);
3448
3449
0
      if (en->type == BAG_MEMORY) {
3450
0
        if (ND_IS_MARK1(node))
3451
0
          return 0;
3452
0
        else {
3453
0
          ND_STATUS_ADD(node, MARK1);
3454
0
          r = check_called_node_in_look_behind(ND_BODY(node), not);
3455
0
          ND_STATUS_REMOVE(node, MARK1);
3456
0
        }
3457
0
      }
3458
0
      else {
3459
0
        r = check_called_node_in_look_behind(ND_BODY(node), not);
3460
0
        if (r == 0 && en->type == BAG_IF_ELSE) {
3461
0
          if (IS_NOT_NULL(en->te.Then)) {
3462
0
            r = check_called_node_in_look_behind(en->te.Then, not);
3463
0
            if (r != 0) break;
3464
0
          }
3465
0
          if (IS_NOT_NULL(en->te.Else)) {
3466
0
            r = check_called_node_in_look_behind(en->te.Else, not);
3467
0
          }
3468
0
        }
3469
0
      }
3470
0
    }
3471
0
    break;
3472
3473
0
  case ND_ANCHOR:
3474
0
    if (IS_NOT_NULL(ND_BODY(node)))
3475
0
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3476
0
    break;
3477
3478
0
  case ND_GIMMICK:
3479
0
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3480
0
      return 1;
3481
0
    break;
3482
3483
0
  default:
3484
0
    break;
3485
0
  }
3486
3487
0
  return r;
3488
0
}
3489
3490
/* allowed node types in look-behind */
3491
#define ALLOWED_TYPE_IN_LB \
3492
0
  ( ND_BIT_LIST | ND_BIT_ALT | ND_BIT_STRING | ND_BIT_CCLASS \
3493
0
  | ND_BIT_CTYPE | ND_BIT_ANCHOR | ND_BIT_BAG | ND_BIT_QUANT \
3494
0
  | ND_BIT_CALL | ND_BIT_BACKREF | ND_BIT_GIMMICK)
3495
3496
0
#define ALLOWED_BAG_IN_LB       ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3497
0
#define ALLOWED_BAG_IN_LB_NOT   ( 1<<BAG_OPTION | 1<<BAG_STOP_BACKTRACK | 1<<BAG_IF_ELSE )
3498
3499
#define ALLOWED_ANCHOR_IN_LB \
3500
0
  ( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
3501
0
  | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
3502
0
  | ANCR_WORD_BEGIN | ANCR_WORD_END \
3503
0
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3504
3505
#define ALLOWED_ANCHOR_IN_LB_NOT \
3506
0
  ( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
3507
0
  | ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
3508
0
  | ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
3509
0
  | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
3510
3511
3512
static int
3513
check_node_in_look_behind(Node* node, int not, int* used)
3514
0
{
3515
0
  static unsigned int
3516
0
    bag_mask[2] = { ALLOWED_BAG_IN_LB, ALLOWED_BAG_IN_LB_NOT };
3517
3518
0
  static unsigned int
3519
0
    anchor_mask[2] = { ALLOWED_ANCHOR_IN_LB, ALLOWED_ANCHOR_IN_LB_NOT };
3520
3521
0
  NodeType type;
3522
0
  int r = 0;
3523
3524
0
  type = ND_TYPE(node);
3525
0
  if ((ND_TYPE2BIT(type) & ALLOWED_TYPE_IN_LB) == 0)
3526
0
    return 1;
3527
3528
0
  switch (type) {
3529
0
  case ND_LIST:
3530
0
  case ND_ALT:
3531
0
    do {
3532
0
      r = check_node_in_look_behind(ND_CAR(node), not, used);
3533
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3534
0
    break;
3535
3536
0
  case ND_QUANT:
3537
0
    r = check_node_in_look_behind(ND_BODY(node), not, used);
3538
0
    break;
3539
3540
0
  case ND_BAG:
3541
0
    {
3542
0
      BagNode* en = BAG_(node);
3543
0
      if (((1<<en->type) & bag_mask[not]) == 0)
3544
0
        return 1;
3545
3546
0
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3547
0
      if (r != 0) break;
3548
3549
0
      if (en->type == BAG_MEMORY) {
3550
0
        if (ND_IS_BACKREF(node) || ND_IS_CALLED(node)
3551
0
         || ND_IS_REFERENCED(node))
3552
0
          *used = TRUE;
3553
0
      }
3554
0
      else if (en->type == BAG_IF_ELSE) {
3555
0
        if (IS_NOT_NULL(en->te.Then)) {
3556
0
          r = check_node_in_look_behind(en->te.Then, not, used);
3557
0
          if (r != 0) break;
3558
0
        }
3559
0
        if (IS_NOT_NULL(en->te.Else)) {
3560
0
          r = check_node_in_look_behind(en->te.Else, not, used);
3561
0
        }
3562
0
      }
3563
0
    }
3564
0
    break;
3565
3566
0
  case ND_ANCHOR:
3567
0
    type = ANCHOR_(node)->type;
3568
0
    if ((type & anchor_mask[not]) == 0)
3569
0
      return 1;
3570
3571
0
    if (IS_NOT_NULL(ND_BODY(node)))
3572
0
      r = check_node_in_look_behind(ND_BODY(node), not, used);
3573
0
    break;
3574
3575
0
  case ND_GIMMICK:
3576
0
    if (ND_IS_ABSENT_WITH_SIDE_EFFECTS(node) != 0)
3577
0
      return 1;
3578
3579
0
    {
3580
0
      GimmickNode* g = GIMMICK_(node);
3581
0
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
3582
0
        *used = TRUE;
3583
0
    }
3584
0
    break;
3585
3586
0
  case ND_CALL:
3587
0
    if (ND_IS_RECURSION(node)) {
3588
      /* fix: Issue 38040 in oss-fuzz */
3589
      /* This node should be removed before recursive call check. */
3590
0
      *used = TRUE;
3591
0
    }
3592
0
    else
3593
0
      r = check_called_node_in_look_behind(ND_BODY(node), not);
3594
0
    break;
3595
3596
0
  default:
3597
0
    break;
3598
0
  }
3599
0
  return r;
3600
0
}
3601
3602
static OnigLen
3603
node_min_byte_len(Node* node, ParseEnv* env)
3604
0
{
3605
0
  OnigLen len;
3606
0
  OnigLen tmin;
3607
3608
0
  len = 0;
3609
0
  switch (ND_TYPE(node)) {
3610
0
  case ND_BACKREF:
3611
0
    if (! ND_IS_CHECKER(node)) {
3612
0
      int i;
3613
0
      int* backs;
3614
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3615
0
      BackRefNode* br = BACKREF_(node);
3616
0
      if (ND_IS_RECURSION(node)) break;
3617
3618
0
      backs = BACKREFS_P(br);
3619
0
      len = node_min_byte_len(mem_env[backs[0]].mem_node, env);
3620
0
      for (i = 1; i < br->back_num; i++) {
3621
0
        tmin = node_min_byte_len(mem_env[backs[i]].mem_node, env);
3622
0
        if (len > tmin) len = tmin;
3623
0
      }
3624
0
    }
3625
0
    break;
3626
3627
0
#ifdef USE_CALL
3628
0
  case ND_CALL:
3629
0
    {
3630
0
      Node* t = ND_BODY(node);
3631
0
      if (ND_IS_FIXED_MIN(t))
3632
0
        len = BAG_(t)->min_len;
3633
0
      else
3634
0
        len = node_min_byte_len(t, env);
3635
0
    }
3636
0
    break;
3637
0
#endif
3638
3639
0
  case ND_LIST:
3640
0
    do {
3641
0
      tmin = node_min_byte_len(ND_CAR(node), env);
3642
0
      len = distance_add(len, tmin);
3643
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3644
0
    break;
3645
3646
0
  case ND_ALT:
3647
0
    {
3648
0
      Node *x, *y;
3649
0
      y = node;
3650
0
      do {
3651
0
        x = ND_CAR(y);
3652
0
        tmin = node_min_byte_len(x, env);
3653
0
        if (y == node) len = tmin;
3654
0
        else if (len > tmin) len = tmin;
3655
0
      } while (IS_NOT_NULL(y = ND_CDR(y)));
3656
0
    }
3657
0
    break;
3658
3659
0
  case ND_STRING:
3660
0
    {
3661
0
      StrNode* sn = STR_(node);
3662
0
      len = (int )(sn->end - sn->s);
3663
0
    }
3664
0
    break;
3665
3666
0
  case ND_CTYPE:
3667
0
  case ND_CCLASS:
3668
0
    len = ONIGENC_MBC_MINLEN(env->enc);
3669
0
    break;
3670
3671
0
  case ND_QUANT:
3672
0
    {
3673
0
      QuantNode* qn = QUANT_(node);
3674
3675
0
      if (qn->lower > 0) {
3676
0
        len = node_min_byte_len(ND_BODY(node), env);
3677
0
        len = distance_multiply(len, qn->lower);
3678
0
      }
3679
0
    }
3680
0
    break;
3681
3682
0
  case ND_BAG:
3683
0
    {
3684
0
      BagNode* en = BAG_(node);
3685
0
      switch (en->type) {
3686
0
      case BAG_MEMORY:
3687
0
        if (ND_IS_FIXED_MIN(node))
3688
0
          len = en->min_len;
3689
0
        else {
3690
0
          if (ND_IS_MARK1(node))
3691
0
            len = 0;  /* recursive */
3692
0
          else {
3693
0
            ND_STATUS_ADD(node, MARK1);
3694
0
            len = node_min_byte_len(ND_BODY(node), env);
3695
0
            ND_STATUS_REMOVE(node, MARK1);
3696
3697
0
            en->min_len = len;
3698
0
            ND_STATUS_ADD(node, FIXED_MIN);
3699
0
          }
3700
0
        }
3701
0
        break;
3702
3703
0
      case BAG_OPTION:
3704
0
      case BAG_STOP_BACKTRACK:
3705
0
        len = node_min_byte_len(ND_BODY(node), env);
3706
0
        break;
3707
0
      case BAG_IF_ELSE:
3708
0
        {
3709
0
          OnigLen elen;
3710
3711
0
          len = node_min_byte_len(ND_BODY(node), env);
3712
0
          if (IS_NOT_NULL(en->te.Then))
3713
0
            len += node_min_byte_len(en->te.Then, env);
3714
0
          if (IS_NOT_NULL(en->te.Else))
3715
0
            elen = node_min_byte_len(en->te.Else, env);
3716
0
          else elen = 0;
3717
3718
0
          if (elen < len) len = elen;
3719
0
        }
3720
0
        break;
3721
0
      }
3722
0
    }
3723
0
    break;
3724
3725
0
  case ND_GIMMICK:
3726
0
    {
3727
0
      GimmickNode* g = GIMMICK_(node);
3728
0
      if (g->type == GIMMICK_FAIL) {
3729
0
        len = INFINITE_LEN;
3730
0
        break;
3731
0
      }
3732
0
    }
3733
    /* fall */
3734
0
  case ND_ANCHOR:
3735
0
  default:
3736
0
    break;
3737
0
  }
3738
3739
0
  return len;
3740
0
}
3741
3742
static int
3743
check_backrefs(Node* node, ParseEnv* env)
3744
0
{
3745
0
  int r;
3746
3747
0
  switch (ND_TYPE(node)) {
3748
0
  case ND_LIST:
3749
0
  case ND_ALT:
3750
0
    do {
3751
0
      r = check_backrefs(ND_CAR(node), env);
3752
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3753
0
    break;
3754
3755
0
  case ND_ANCHOR:
3756
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
3757
0
      r = 0;
3758
0
      break;
3759
0
    }
3760
    /* fall */
3761
0
  case ND_QUANT:
3762
0
    r = check_backrefs(ND_BODY(node), env);
3763
0
    break;
3764
3765
0
  case ND_BAG:
3766
0
    r = check_backrefs(ND_BODY(node), env);
3767
0
    {
3768
0
      BagNode* en = BAG_(node);
3769
3770
0
      if (en->type == BAG_IF_ELSE) {
3771
0
        if (r != 0) return r;
3772
0
        if (IS_NOT_NULL(en->te.Then)) {
3773
0
          r = check_backrefs(en->te.Then, env);
3774
0
          if (r != 0) return r;
3775
0
        }
3776
0
        if (IS_NOT_NULL(en->te.Else)) {
3777
0
          r = check_backrefs(en->te.Else, env);
3778
0
        }
3779
0
      }
3780
0
    }
3781
0
    break;
3782
3783
0
  case ND_BACKREF:
3784
0
    {
3785
0
      int i;
3786
0
      BackRefNode* br = BACKREF_(node);
3787
0
      int* backs = BACKREFS_P(br);
3788
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3789
3790
0
      for (i = 0; i < br->back_num; i++) {
3791
0
        if (backs[i] > env->num_mem)
3792
0
          return ONIGERR_INVALID_BACKREF;
3793
3794
0
        ND_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
3795
0
      }
3796
0
      r = 0;
3797
0
    }
3798
0
    break;
3799
3800
0
  default:
3801
0
    r = 0;
3802
0
    break;
3803
0
  }
3804
3805
0
  return r;
3806
0
}
3807
3808
static int
3809
set_empty_repeat_node_trav(Node* node, Node* empty, ParseEnv* env)
3810
0
{
3811
0
  int r;
3812
3813
0
  switch (ND_TYPE(node)) {
3814
0
  case ND_LIST:
3815
0
  case ND_ALT:
3816
0
    do {
3817
0
      r = set_empty_repeat_node_trav(ND_CAR(node), empty, env);
3818
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
3819
0
    break;
3820
3821
0
  case ND_ANCHOR:
3822
0
    {
3823
0
      AnchorNode* an = ANCHOR_(node);
3824
3825
0
      if (! ANCHOR_HAS_BODY(an)) {
3826
0
        r = 0;
3827
0
        break;
3828
0
      }
3829
3830
0
      switch (an->type) {
3831
0
      case ANCR_PREC_READ:
3832
0
      case ANCR_LOOK_BEHIND:
3833
0
        empty = NULL_NODE;
3834
0
        break;
3835
0
      default:
3836
0
        break;
3837
0
      }
3838
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3839
0
    }
3840
0
    break;
3841
3842
0
  case ND_QUANT:
3843
0
    {
3844
0
      QuantNode* qn = QUANT_(node);
3845
3846
0
      if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
3847
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3848
0
    }
3849
0
    break;
3850
3851
0
  case ND_BAG:
3852
0
    if (IS_NOT_NULL(ND_BODY(node))) {
3853
0
      r = set_empty_repeat_node_trav(ND_BODY(node), empty, env);
3854
0
      if (r != 0) return r;
3855
0
    }
3856
0
    {
3857
0
      BagNode* en = BAG_(node);
3858
3859
0
      r = 0;
3860
0
      if (en->type == BAG_MEMORY) {
3861
0
        if (ND_IS_BACKREF(node)) {
3862
0
          if (IS_NOT_NULL(empty))
3863
0
            PARSEENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
3864
0
        }
3865
0
      }
3866
0
      else if (en->type == BAG_IF_ELSE) {
3867
0
        if (IS_NOT_NULL(en->te.Then)) {
3868
0
          r = set_empty_repeat_node_trav(en->te.Then, empty, env);
3869
0
          if (r != 0) return r;
3870
0
        }
3871
0
        if (IS_NOT_NULL(en->te.Else)) {
3872
0
          r = set_empty_repeat_node_trav(en->te.Else, empty, env);
3873
0
        }
3874
0
      }
3875
0
    }
3876
0
    break;
3877
3878
0
  default:
3879
0
    r = 0;
3880
0
    break;
3881
0
  }
3882
3883
0
  return r;
3884
0
}
3885
3886
static int
3887
is_ancestor_node(Node* node, Node* me)
3888
0
{
3889
0
  Node* parent;
3890
3891
0
  while ((parent = ND_PARENT(me)) != NULL_NODE) {
3892
0
    if (parent == node) return 1;
3893
0
    me = parent;
3894
0
  }
3895
0
  return 0;
3896
0
}
3897
3898
static void
3899
set_empty_status_check_trav(Node* node, ParseEnv* env)
3900
0
{
3901
0
  switch (ND_TYPE(node)) {
3902
0
  case ND_LIST:
3903
0
  case ND_ALT:
3904
0
    do {
3905
0
      set_empty_status_check_trav(ND_CAR(node), env);
3906
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3907
0
    break;
3908
3909
0
  case ND_ANCHOR:
3910
0
    {
3911
0
      AnchorNode* an = ANCHOR_(node);
3912
3913
0
      if (! ANCHOR_HAS_BODY(an)) break;
3914
0
      set_empty_status_check_trav(ND_BODY(node), env);
3915
0
    }
3916
0
    break;
3917
3918
0
  case ND_QUANT:
3919
0
    set_empty_status_check_trav(ND_BODY(node), env);
3920
0
    break;
3921
3922
0
  case ND_BAG:
3923
0
    if (IS_NOT_NULL(ND_BODY(node)))
3924
0
      set_empty_status_check_trav(ND_BODY(node), env);
3925
0
    {
3926
0
      BagNode* en = BAG_(node);
3927
3928
0
      if (en->type == BAG_IF_ELSE) {
3929
0
        if (IS_NOT_NULL(en->te.Then)) {
3930
0
          set_empty_status_check_trav(en->te.Then, env);
3931
0
        }
3932
0
        if (IS_NOT_NULL(en->te.Else)) {
3933
0
          set_empty_status_check_trav(en->te.Else, env);
3934
0
        }
3935
0
      }
3936
0
    }
3937
0
    break;
3938
3939
0
  case ND_BACKREF:
3940
0
    {
3941
0
      int i;
3942
0
      int* backs;
3943
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
3944
0
      BackRefNode* br = BACKREF_(node);
3945
0
      backs = BACKREFS_P(br);
3946
0
      for (i = 0; i < br->back_num; i++) {
3947
0
        Node* ernode = mem_env[backs[i]].empty_repeat_node;
3948
0
        if (IS_NOT_NULL(ernode)) {
3949
0
          if (! is_ancestor_node(ernode, node)) {
3950
0
            MEM_STATUS_LIMIT_ON(QUANT_(ernode)->empty_status_mem, backs[i]);
3951
0
            ND_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
3952
0
            ND_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
3953
0
          }
3954
0
        }
3955
0
      }
3956
0
    }
3957
0
    break;
3958
3959
0
  default:
3960
0
    break;
3961
0
  }
3962
0
}
3963
3964
static void
3965
set_parent_node_trav(Node* node, Node* parent)
3966
0
{
3967
0
  ND_PARENT(node) = parent;
3968
3969
0
  switch (ND_TYPE(node)) {
3970
0
  case ND_LIST:
3971
0
  case ND_ALT:
3972
0
    do {
3973
0
      set_parent_node_trav(ND_CAR(node), node);
3974
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
3975
0
    break;
3976
3977
0
  case ND_ANCHOR:
3978
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
3979
0
    set_parent_node_trav(ND_BODY(node), node);
3980
0
    break;
3981
3982
0
  case ND_QUANT:
3983
0
    set_parent_node_trav(ND_BODY(node), node);
3984
0
    break;
3985
3986
0
  case ND_BAG:
3987
0
    if (IS_NOT_NULL(ND_BODY(node)))
3988
0
      set_parent_node_trav(ND_BODY(node), node);
3989
0
    {
3990
0
      BagNode* en = BAG_(node);
3991
3992
0
      if (en->type == BAG_IF_ELSE) {
3993
0
        if (IS_NOT_NULL(en->te.Then))
3994
0
          set_parent_node_trav(en->te.Then, node);
3995
0
        if (IS_NOT_NULL(en->te.Else)) {
3996
0
          set_parent_node_trav(en->te.Else, node);
3997
0
        }
3998
0
      }
3999
0
    }
4000
0
    break;
4001
4002
0
  default:
4003
0
    break;
4004
0
  }
4005
0
}
4006
4007
4008
#ifdef USE_CALL
4009
4010
0
#define RECURSION_EXIST        (1<<0)
4011
0
#define RECURSION_MUST         (1<<1)
4012
0
#define RECURSION_INFINITE     (1<<2)
4013
4014
static int
4015
infinite_recursive_call_check(Node* node, ParseEnv* env, int head)
4016
0
{
4017
0
  int ret;
4018
0
  int r = 0;
4019
4020
0
  switch (ND_TYPE(node)) {
4021
0
  case ND_LIST:
4022
0
    {
4023
0
      Node *x;
4024
0
      OnigLen min;
4025
4026
0
      x = node;
4027
0
      do {
4028
0
        ret = infinite_recursive_call_check(ND_CAR(x), env, head);
4029
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4030
0
        r |= ret;
4031
0
        if (head != 0) {
4032
0
          min = node_min_byte_len(ND_CAR(x), env);
4033
0
          if (min != 0) head = 0;
4034
0
        }
4035
0
      } while (IS_NOT_NULL(x = ND_CDR(x)));
4036
0
    }
4037
0
    break;
4038
4039
0
  case ND_ALT:
4040
0
    {
4041
0
      int must;
4042
4043
0
      must = RECURSION_MUST;
4044
0
      do {
4045
0
        ret = infinite_recursive_call_check(ND_CAR(node), env, head);
4046
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4047
4048
0
        r    |= (ret & RECURSION_EXIST);
4049
0
        must &= ret;
4050
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4051
0
      r |= must;
4052
0
    }
4053
0
    break;
4054
4055
0
  case ND_QUANT:
4056
0
    if (QUANT_(node)->upper == 0) break;
4057
4058
0
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4059
0
    if (r < 0) return r;
4060
0
    if ((r & RECURSION_MUST) != 0) {
4061
0
      if (QUANT_(node)->lower == 0)
4062
0
        r &= ~RECURSION_MUST;
4063
0
    }
4064
0
    break;
4065
4066
0
  case ND_ANCHOR:
4067
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
4068
0
      break;
4069
    /* fall */
4070
0
  case ND_CALL:
4071
0
    r = infinite_recursive_call_check(ND_BODY(node), env, head);
4072
0
    break;
4073
4074
0
  case ND_BAG:
4075
0
    {
4076
0
      BagNode* en = BAG_(node);
4077
4078
0
      if (en->type == BAG_MEMORY) {
4079
0
        if (ND_IS_MARK2(node))
4080
0
          return 0;
4081
0
        else if (ND_IS_MARK1(node))
4082
0
          return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
4083
0
                  : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
4084
0
        else {
4085
0
          ND_STATUS_ADD(node, MARK2);
4086
0
          r = infinite_recursive_call_check(ND_BODY(node), env, head);
4087
0
          ND_STATUS_REMOVE(node, MARK2);
4088
0
        }
4089
0
      }
4090
0
      else if (en->type == BAG_IF_ELSE) {
4091
0
        int eret;
4092
4093
0
        ret = infinite_recursive_call_check(ND_BODY(node), env, head);
4094
0
        if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4095
0
        r |= ret;
4096
0
        if (IS_NOT_NULL(en->te.Then)) {
4097
0
          OnigLen min;
4098
0
          if (head != 0) {
4099
0
            min = node_min_byte_len(ND_BODY(node), env);
4100
0
          }
4101
0
          else min = 0;
4102
4103
0
          ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head);
4104
0
          if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
4105
0
          r |= ret;
4106
0
        }
4107
0
        if (IS_NOT_NULL(en->te.Else)) {
4108
0
          eret = infinite_recursive_call_check(en->te.Else, env, head);
4109
0
          if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret;
4110
0
          r |= (eret & RECURSION_EXIST);
4111
0
          if ((eret & RECURSION_MUST) == 0)
4112
0
            r &= ~RECURSION_MUST;
4113
0
        }
4114
0
        else {
4115
0
          r &= ~RECURSION_MUST;
4116
0
        }
4117
0
      }
4118
0
      else {
4119
0
        r = infinite_recursive_call_check(ND_BODY(node), env, head);
4120
0
      }
4121
0
    }
4122
0
    break;
4123
4124
0
  default:
4125
0
    break;
4126
0
  }
4127
4128
0
  return r;
4129
0
}
4130
4131
static int
4132
infinite_recursive_call_check_trav(Node* node, ParseEnv* env)
4133
0
{
4134
0
  int r;
4135
4136
0
  switch (ND_TYPE(node)) {
4137
0
  case ND_LIST:
4138
0
  case ND_ALT:
4139
0
    do {
4140
0
      r = infinite_recursive_call_check_trav(ND_CAR(node), env);
4141
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4142
0
    break;
4143
4144
0
  case ND_ANCHOR:
4145
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4146
0
      r = 0;
4147
0
      break;
4148
0
    }
4149
    /* fall */
4150
0
  case ND_QUANT:
4151
0
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4152
0
    break;
4153
4154
0
  case ND_BAG:
4155
0
    {
4156
0
      BagNode* en = BAG_(node);
4157
4158
0
      if (en->type == BAG_MEMORY) {
4159
0
        if (ND_IS_RECURSION(node) && ND_IS_CALLED(node)) {
4160
0
          int ret;
4161
4162
0
          ND_STATUS_ADD(node, MARK1);
4163
4164
0
          ret = infinite_recursive_call_check(ND_BODY(node), env, 1);
4165
0
          if (ret < 0) return ret;
4166
0
          else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
4167
0
            return ONIGERR_NEVER_ENDING_RECURSION;
4168
4169
0
          ND_STATUS_REMOVE(node, MARK1);
4170
0
        }
4171
0
      }
4172
0
      else if (en->type == BAG_IF_ELSE) {
4173
0
        if (IS_NOT_NULL(en->te.Then)) {
4174
0
          r = infinite_recursive_call_check_trav(en->te.Then, env);
4175
0
          if (r != 0) return r;
4176
0
        }
4177
0
        if (IS_NOT_NULL(en->te.Else)) {
4178
0
          r = infinite_recursive_call_check_trav(en->te.Else, env);
4179
0
          if (r != 0) return r;
4180
0
        }
4181
0
      }
4182
0
    }
4183
4184
0
    r = infinite_recursive_call_check_trav(ND_BODY(node), env);
4185
0
    break;
4186
4187
0
  default:
4188
0
    r = 0;
4189
0
    break;
4190
0
  }
4191
4192
0
  return r;
4193
0
}
4194
4195
static int
4196
recursive_call_check(Node* node)
4197
0
{
4198
0
  int r;
4199
4200
0
  switch (ND_TYPE(node)) {
4201
0
  case ND_LIST:
4202
0
  case ND_ALT:
4203
0
    r = 0;
4204
0
    do {
4205
0
      r |= recursive_call_check(ND_CAR(node));
4206
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4207
0
    break;
4208
4209
0
  case ND_ANCHOR:
4210
0
    if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
4211
0
      r = 0;
4212
0
      break;
4213
0
    }
4214
    /* fall */
4215
0
  case ND_QUANT:
4216
0
    r = recursive_call_check(ND_BODY(node));
4217
0
    break;
4218
4219
0
  case ND_CALL:
4220
0
    r = recursive_call_check(ND_BODY(node));
4221
0
    if (r != 0) {
4222
0
      if (ND_IS_MARK1(ND_BODY(node)))
4223
0
        ND_STATUS_ADD(node, RECURSION);
4224
0
    }
4225
0
    break;
4226
4227
0
  case ND_BAG:
4228
0
    {
4229
0
      BagNode* en = BAG_(node);
4230
4231
0
      if (en->type == BAG_MEMORY) {
4232
0
        if (ND_IS_MARK2(node))
4233
0
          return 0;
4234
0
        else if (ND_IS_MARK1(node))
4235
0
          return 1; /* recursion */
4236
0
        else {
4237
0
          ND_STATUS_ADD(node, MARK2);
4238
0
          r = recursive_call_check(ND_BODY(node));
4239
0
          ND_STATUS_REMOVE(node, MARK2);
4240
0
        }
4241
0
      }
4242
0
      else if (en->type == BAG_IF_ELSE) {
4243
0
        r = 0;
4244
0
        if (IS_NOT_NULL(en->te.Then)) {
4245
0
          r |= recursive_call_check(en->te.Then);
4246
0
        }
4247
0
        if (IS_NOT_NULL(en->te.Else)) {
4248
0
          r |= recursive_call_check(en->te.Else);
4249
0
        }
4250
0
        r |= recursive_call_check(ND_BODY(node));
4251
0
      }
4252
0
      else {
4253
0
        r = recursive_call_check(ND_BODY(node));
4254
0
      }
4255
0
    }
4256
0
    break;
4257
4258
0
  default:
4259
0
    r = 0;
4260
0
    break;
4261
0
  }
4262
4263
0
  return r;
4264
0
}
4265
4266
0
#define IN_RECURSION         (1<<0)
4267
0
#define FOUND_CALLED_NODE    1
4268
4269
static int
4270
recursive_call_check_trav(Node* node, ParseEnv* env, int state)
4271
0
{
4272
0
  int r = 0;
4273
4274
0
  switch (ND_TYPE(node)) {
4275
0
  case ND_LIST:
4276
0
  case ND_ALT:
4277
0
    {
4278
0
      int ret;
4279
0
      do {
4280
0
        ret = recursive_call_check_trav(ND_CAR(node), env, state);
4281
0
        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
4282
0
        else if (ret < 0) return ret;
4283
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
4284
0
    }
4285
0
    break;
4286
4287
0
  case ND_QUANT:
4288
0
    r = recursive_call_check_trav(ND_BODY(node), env, state);
4289
0
    if (QUANT_(node)->upper == 0) {
4290
0
      if (r == FOUND_CALLED_NODE)
4291
0
        QUANT_(node)->include_referred = 1;
4292
0
    }
4293
0
    break;
4294
4295
0
  case ND_ANCHOR:
4296
0
    {
4297
0
      AnchorNode* an = ANCHOR_(node);
4298
0
      if (ANCHOR_HAS_BODY(an))
4299
0
        r = recursive_call_check_trav(ND_ANCHOR_BODY(an), env, state);
4300
0
    }
4301
0
    break;
4302
4303
0
  case ND_BAG:
4304
0
    {
4305
0
      int ret;
4306
0
      int state1;
4307
0
      BagNode* en = BAG_(node);
4308
4309
0
      if (en->type == BAG_MEMORY) {
4310
0
        if (ND_IS_CALLED(node)) {
4311
0
          r = FOUND_CALLED_NODE;
4312
0
          goto check_recursion;
4313
0
        }
4314
0
        else if ((state & IN_RECURSION) != 0) {
4315
0
        check_recursion:
4316
0
          if (! ND_IS_RECURSION(node)) {
4317
0
            ND_STATUS_ADD(node, MARK1);
4318
0
            ret = recursive_call_check(ND_BODY(node));
4319
0
            if (ret != 0) {
4320
0
              ND_STATUS_ADD(node, RECURSION);
4321
0
              MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
4322
0
            }
4323
0
            ND_STATUS_REMOVE(node, MARK1);
4324
0
          }
4325
0
        }
4326
0
      }
4327
4328
0
      state1 = state;
4329
0
      if (ND_IS_RECURSION(node))
4330
0
        state1 |= IN_RECURSION;
4331
4332
0
      ret = recursive_call_check_trav(ND_BODY(node), env, state1);
4333
0
      if (ret == FOUND_CALLED_NODE)
4334
0
        r = FOUND_CALLED_NODE;
4335
4336
0
      if (en->type == BAG_IF_ELSE) {
4337
0
        if (IS_NOT_NULL(en->te.Then)) {
4338
0
          ret = recursive_call_check_trav(en->te.Then, env, state1);
4339
0
          if (ret == FOUND_CALLED_NODE)
4340
0
            r = FOUND_CALLED_NODE;
4341
0
        }
4342
0
        if (IS_NOT_NULL(en->te.Else)) {
4343
0
          ret = recursive_call_check_trav(en->te.Else, env, state1);
4344
0
          if (ret == FOUND_CALLED_NODE)
4345
0
            r = FOUND_CALLED_NODE;
4346
0
        }
4347
0
      }
4348
0
    }
4349
0
    break;
4350
4351
0
  default:
4352
0
    break;
4353
0
  }
4354
4355
0
  return r;
4356
0
}
4357
4358
#endif
4359
4360
static void
4361
remove_from_list(Node* prev, Node* a)
4362
0
{
4363
0
  if (ND_CDR(prev) != a) return ;
4364
4365
0
  ND_CDR(prev) = ND_CDR(a);
4366
0
  ND_CDR(a) = NULL_NODE;
4367
0
}
4368
4369
static int
4370
reduce_string_list(Node* node, OnigEncoding enc)
4371
0
{
4372
0
  int r = 0;
4373
4374
0
  switch (ND_TYPE(node)) {
4375
0
  case ND_LIST:
4376
0
    {
4377
0
      Node* prev;
4378
0
      Node* curr;
4379
0
      Node* prev_node;
4380
0
      Node* next_node;
4381
4382
0
      prev = NULL_NODE;
4383
0
      do {
4384
0
        next_node = ND_CDR(node);
4385
0
        curr = ND_CAR(node);
4386
0
        if (ND_TYPE(curr) == ND_STRING) {
4387
0
          if (IS_NULL(prev)
4388
0
              || STR_(curr)->flag  != STR_(prev)->flag
4389
0
              || ND_STATUS(curr) != ND_STATUS(prev)) {
4390
0
            prev = curr;
4391
0
            prev_node = node;
4392
0
          }
4393
0
          else {
4394
0
            r = node_str_node_cat(prev, curr);
4395
0
            if (r != 0) return r;
4396
0
            remove_from_list(prev_node, node);
4397
0
            onig_node_free(node);
4398
0
          }
4399
0
        }
4400
0
        else {
4401
0
          if (IS_NOT_NULL(prev)) {
4402
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4403
0
            StrNode* sn = STR_(prev);
4404
0
            if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4405
0
              return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4406
0
#endif
4407
0
            prev = NULL_NODE;
4408
0
          }
4409
0
          r = reduce_string_list(curr, enc);
4410
0
          if (r != 0) return r;
4411
0
          prev_node = node;
4412
0
        }
4413
4414
0
        node = next_node;
4415
0
      } while (r == 0 && IS_NOT_NULL(node));
4416
4417
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4418
0
      if (IS_NOT_NULL(prev)) {
4419
0
        StrNode* sn = STR_(prev);
4420
0
        if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4421
0
          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4422
0
      }
4423
0
#endif
4424
0
    }
4425
0
    break;
4426
4427
0
  case ND_ALT:
4428
0
    do {
4429
0
      r = reduce_string_list(ND_CAR(node), enc);
4430
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4431
0
    break;
4432
4433
0
#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
4434
0
  case ND_STRING:
4435
0
    {
4436
0
      StrNode* sn = STR_(node);
4437
0
      if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
4438
0
        return ONIGERR_INVALID_WIDE_CHAR_VALUE;
4439
0
    }
4440
0
    break;
4441
0
#endif
4442
4443
0
  case ND_ANCHOR:
4444
0
    if (IS_NULL(ND_BODY(node)))
4445
0
      break;
4446
    /* fall */
4447
0
  case ND_QUANT:
4448
0
    r = reduce_string_list(ND_BODY(node), enc);
4449
0
    break;
4450
4451
0
  case ND_BAG:
4452
0
    {
4453
0
      BagNode* en = BAG_(node);
4454
4455
0
      r = reduce_string_list(ND_BODY(node), enc);
4456
0
      if (r != 0) return r;
4457
4458
0
      if (en->type == BAG_IF_ELSE) {
4459
0
        if (IS_NOT_NULL(en->te.Then)) {
4460
0
          r = reduce_string_list(en->te.Then, enc);
4461
0
          if (r != 0) return r;
4462
0
        }
4463
0
        if (IS_NOT_NULL(en->te.Else)) {
4464
0
          r = reduce_string_list(en->te.Else, enc);
4465
0
          if (r != 0) return r;
4466
0
        }
4467
0
      }
4468
0
    }
4469
0
    break;
4470
4471
0
  default:
4472
0
    break;
4473
0
  }
4474
4475
0
  return r;
4476
0
}
4477
4478
4479
0
#define IN_ALT          (1<<0)
4480
0
#define IN_NOT          (1<<1)
4481
0
#define IN_REAL_REPEAT  (1<<2)
4482
0
#define IN_VAR_REPEAT   (1<<3)
4483
0
#define IN_ZERO_REPEAT  (1<<4)
4484
0
#define IN_MULTI_ENTRY  (1<<5)
4485
0
#define IN_PREC_READ    (1<<6)
4486
0
#define IN_LOOK_BEHIND  (1<<7)
4487
0
#define IN_PEEK         (1<<8)
4488
4489
/* divide different length alternatives in look-behind.
4490
  (?<=A|B) ==> (?<=A)|(?<=B)
4491
  (?<!A|B) ==> (?<!A)(?<!B)
4492
*/
4493
static int
4494
divide_look_behind_alternatives(Node* node)
4495
0
{
4496
0
  int r;
4497
0
  int anc_type;
4498
0
  Node *head, *np, *insert_node;
4499
0
  AnchorNode* an;
4500
4501
0
  an = ANCHOR_(node);
4502
0
  anc_type = an->type;
4503
4504
0
  head = ND_ANCHOR_BODY(an);
4505
0
  np = ND_CAR(head);
4506
0
  node_swap(node, head);
4507
0
  ND_CAR(node) = head;
4508
0
  ND_BODY(head) = np;
4509
4510
0
  np = node;
4511
0
  while (IS_NOT_NULL(np = ND_CDR(np))) {
4512
0
    r = onig_node_copy(&insert_node, head);
4513
0
    if (r != 0) return r;
4514
0
    CHECK_NULL_RETURN_MEMERR(insert_node);
4515
0
    ND_BODY(insert_node) = ND_CAR(np);
4516
0
    ND_CAR(np) = insert_node;
4517
0
  }
4518
4519
0
  if (anc_type == ANCR_LOOK_BEHIND_NOT) {
4520
0
    np = node;
4521
0
    do {
4522
0
      ND_SET_TYPE(np, ND_LIST);  /* alt -> list */
4523
0
    } while (IS_NOT_NULL(np = ND_CDR(np)));
4524
0
  }
4525
0
  return 0;
4526
0
}
4527
4528
static int
4529
node_reduce_in_look_behind(Node* node)
4530
0
{
4531
0
  NodeType type;
4532
0
  Node* body;
4533
4534
0
  if (ND_TYPE(node) != ND_QUANT) return 0;
4535
4536
0
  body = ND_BODY(node);
4537
0
  type = ND_TYPE(body);
4538
0
  if (type == ND_STRING || type == ND_CTYPE ||
4539
0
      type == ND_CCLASS || type == ND_BACKREF) {
4540
0
    QuantNode* qn = QUANT_(node);
4541
0
    qn->upper = qn->lower;
4542
0
    if (qn->upper == 0)
4543
0
      return 1; /* removed */
4544
0
  }
4545
4546
0
  return 0;
4547
0
}
4548
4549
static int
4550
list_reduce_in_look_behind(Node* node)
4551
0
{
4552
0
  int r;
4553
4554
0
  switch (ND_TYPE(node)) {
4555
0
  case ND_QUANT:
4556
0
    r = node_reduce_in_look_behind(node);
4557
0
    if (r > 0) r = 0;
4558
0
    break;
4559
4560
0
  case ND_LIST:
4561
0
    do {
4562
0
      r = node_reduce_in_look_behind(ND_CAR(node));
4563
0
      if (r <= 0) break;
4564
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
4565
0
    break;
4566
4567
0
  default:
4568
0
    r = 0;
4569
0
    break;
4570
0
  }
4571
4572
0
  return r;
4573
0
}
4574
4575
static int
4576
alt_reduce_in_look_behind(Node* node, regex_t* reg, ParseEnv* env)
4577
0
{
4578
0
  int r;
4579
4580
0
  switch (ND_TYPE(node)) {
4581
0
  case ND_ALT:
4582
0
    do {
4583
0
      r = list_reduce_in_look_behind(ND_CAR(node));
4584
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
4585
0
    break;
4586
4587
0
  default:
4588
0
    r = list_reduce_in_look_behind(node);
4589
0
    break;
4590
0
  }
4591
4592
0
  return r;
4593
0
}
4594
4595
static int tune_tree(Node* node, regex_t* reg, int state, ParseEnv* env);
4596
4597
static int
4598
tune_look_behind(Node* node, regex_t* reg, int state, ParseEnv* env)
4599
0
{
4600
0
  int r;
4601
0
  int state1;
4602
0
  int used;
4603
0
  MinMaxCharLen ci;
4604
0
  Node* body;
4605
0
  AnchorNode* an = ANCHOR_(node);
4606
4607
0
  used = FALSE;
4608
0
  r = check_node_in_look_behind(ND_ANCHOR_BODY(an),
4609
0
                                an->type == ANCR_LOOK_BEHIND_NOT ? 1 : 0,
4610
0
                                &used);
4611
0
  if (r < 0) return r;
4612
0
  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4613
4614
0
  if (an->type == ANCR_LOOK_BEHIND_NOT)
4615
0
    state1 = state | IN_NOT | IN_LOOK_BEHIND;
4616
0
  else
4617
0
    state1 = state | IN_LOOK_BEHIND;
4618
4619
0
  body = ND_ANCHOR_BODY(an);
4620
  /* Execute tune_tree(body) before call node_char_len().
4621
     Because case-fold expansion must be done before node_char_len().
4622
   */
4623
0
  r = tune_tree(body, reg, state1, env);
4624
0
  if (r != 0) return r;
4625
4626
0
  r = alt_reduce_in_look_behind(body, reg, env);
4627
0
  if (r != 0) return r;
4628
4629
0
  r = node_char_len(body, reg, &ci, env);
4630
0
  if (r >= 0) {
4631
    /* #177: overflow in onigenc_step_back() */
4632
0
    if ((ci.max != INFINITE_LEN && ci.max > LOOK_BEHIND_MAX_CHAR_LEN)
4633
0
      || ci.min > LOOK_BEHIND_MAX_CHAR_LEN) {
4634
0
      return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4635
0
    }
4636
4637
0
    if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) {
4638
0
      if (an->type == ANCR_LOOK_BEHIND_NOT)
4639
0
        r = onig_node_reset_fail(node);
4640
0
      else
4641
0
        r = onig_node_reset_empty(node);
4642
4643
0
      return r;
4644
0
    }
4645
4646
0
    if (r == CHAR_LEN_TOP_ALT_FIXED) {
4647
0
      if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) {
4648
0
        r = divide_look_behind_alternatives(node);
4649
0
        if (r == 0)
4650
0
          r = tune_tree(node, reg, state, env);
4651
0
      }
4652
0
      else if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND))
4653
0
        goto normal;
4654
0
      else
4655
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4656
0
    }
4657
0
    else { /* CHAR_LEN_NORMAL */
4658
0
    normal:
4659
0
      if (ci.min == INFINITE_LEN) {
4660
0
        r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4661
0
      }
4662
0
      else {
4663
0
        if (ci.min != ci.max &&
4664
0
            ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND)) {
4665
0
          r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4666
0
        }
4667
0
        else {
4668
0
          Node* tail;
4669
4670
          /* check lead_node is already set by double call after
4671
             divide_look_behind_alternatives() */
4672
0
          if (IS_NULL(an->lead_node)) {
4673
0
            an->char_min_len = ci.min;
4674
0
            an->char_max_len = ci.max;
4675
0
            r = get_tree_tail_literal(body, &tail, reg, 0);
4676
0
            if (r == GET_VALUE_FOUND) {
4677
0
              r = onig_node_copy(&(an->lead_node), tail);
4678
0
              if (r != 0) return r;
4679
0
            }
4680
0
          }
4681
0
          r = ONIG_NORMAL;
4682
0
        }
4683
0
      }
4684
0
    }
4685
0
  }
4686
4687
0
  return r;
4688
0
}
4689
4690
static int
4691
tune_next(Node* node, Node* next_node, regex_t* reg)
4692
0
{
4693
0
  int called;
4694
0
  NodeType type;
4695
4696
0
  called = FALSE;
4697
4698
0
 retry:
4699
0
  type = ND_TYPE(node);
4700
0
  if (type == ND_QUANT) {
4701
0
    QuantNode* qn = QUANT_(node);
4702
0
    if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
4703
0
#ifdef USE_QUANT_PEEK_NEXT
4704
0
      if (called == FALSE) {
4705
0
        Node* n = get_tree_head_literal(next_node, 1, reg);
4706
        /* '\0': for UTF-16BE etc... */
4707
0
        if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
4708
0
          qn->next_head_exact = n;
4709
0
        }
4710
0
      }
4711
0
#endif
4712
      /* automatic posseivation a*b ==> (?>a*)b */
4713
0
      if (qn->lower <= 1) {
4714
0
        if (is_strict_real_node(ND_BODY(node))) {
4715
0
          Node *x, *y;
4716
0
          x = get_tree_head_literal(ND_BODY(node), 0, reg);
4717
0
          if (IS_NOT_NULL(x)) {
4718
0
            y = get_tree_head_literal(next_node,  0, reg);
4719
0
            if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
4720
0
              Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
4721
0
              CHECK_NULL_RETURN_MEMERR(en);
4722
0
              ND_STATUS_ADD(en, STRICT_REAL_REPEAT);
4723
0
              node_swap(node, en);
4724
0
              ND_BODY(node) = en;
4725
0
            }
4726
0
          }
4727
0
        }
4728
0
      }
4729
0
    }
4730
0
  }
4731
0
  else if (type == ND_BAG) {
4732
0
    BagNode* en = BAG_(node);
4733
0
    if (en->type == BAG_MEMORY) {
4734
0
      if (ND_IS_CALLED(node))
4735
0
        called = TRUE;
4736
0
      node = ND_BODY(node);
4737
0
      goto retry;
4738
0
    }
4739
0
  }
4740
0
  return 0;
4741
0
}
4742
4743
4744
static int
4745
is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])
4746
0
{
4747
0
  int i;
4748
4749
0
  for (i = 0; i < n; i++) {
4750
0
    OnigCaseFoldCodeItem* item = items + i;
4751
0
    if (item->code_len != 1) return 0;
4752
0
  }
4753
4754
0
  return 1;
4755
0
}
4756
4757
static int
4758
get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[],
4759
                                     OnigLen* rmin, OnigLen* rmax)
4760
0
{
4761
0
  int i;
4762
0
  OnigLen len, minlen, maxlen;
4763
4764
0
  minlen = INFINITE_LEN;
4765
0
  maxlen = 0;
4766
0
  for (i = 0; i < n; i++) {
4767
0
    OnigCaseFoldCodeItem* item = items + i;
4768
4769
0
    len = item->byte_len;
4770
0
    if (len < minlen) minlen = len;
4771
0
    if (len > maxlen) maxlen = len;
4772
0
  }
4773
4774
0
  *rmin = minlen;
4775
0
  *rmax = maxlen;
4776
0
  return 0;
4777
0
}
4778
4779
static int
4780
make_code_list_to_string(Node** rnode, OnigEncoding enc,
4781
                         int n, OnigCodePoint codes[])
4782
0
{
4783
0
  int r, i, len;
4784
0
  Node* node;
4785
0
  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4786
4787
0
  *rnode = NULL_NODE;
4788
0
  node = onig_node_new_str(NULL, NULL);
4789
0
  CHECK_NULL_RETURN_MEMERR(node);
4790
4791
0
  for (i = 0; i < n; i++) {
4792
0
    len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
4793
0
    if (len < 0) {
4794
0
      r = len;
4795
0
      goto err;
4796
0
    }
4797
4798
0
    r = onig_node_str_cat(node, buf, buf + len);
4799
0
    if (r != 0) goto err;
4800
0
  }
4801
4802
0
  *rnode = node;
4803
0
  return 0;
4804
4805
0
 err:
4806
0
  onig_node_free(node);
4807
0
  return r;
4808
0
}
4809
4810
static int
4811
unravel_cf_node_add(Node** rlist, Node* add)
4812
0
{
4813
0
  Node *list;
4814
4815
0
  list = *rlist;
4816
0
  if (IS_NULL(list)) {
4817
0
    list = onig_node_new_list(add, NULL);
4818
0
    CHECK_NULL_RETURN_MEMERR(list);
4819
0
    *rlist = list;
4820
0
  }
4821
0
  else {
4822
0
    Node* r = node_list_add(list, add);
4823
0
    CHECK_NULL_RETURN_MEMERR(r);
4824
0
  }
4825
4826
0
  return 0;
4827
0
}
4828
4829
static int
4830
unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end,
4831
                      unsigned int flag)
4832
0
{
4833
0
  int r;
4834
0
  Node *sn, *list;
4835
4836
0
  list = *rlist;
4837
0
  sn   = *rsn;
4838
4839
0
  if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) {
4840
0
    r = onig_node_str_cat(sn, s, end);
4841
0
  }
4842
0
  else {
4843
0
    sn = onig_node_new_str(s, end);
4844
0
    CHECK_NULL_RETURN_MEMERR(sn);
4845
4846
0
    STR_(sn)->flag = flag;
4847
0
    r = unravel_cf_node_add(&list, sn);
4848
0
  }
4849
4850
0
  if (r == 0) {
4851
0
    *rlist = list;
4852
0
    *rsn = sn;
4853
0
  }
4854
0
  return r;
4855
0
}
4856
4857
static int
4858
unravel_cf_string_alt_or_cc_add(Node** rlist, int n,
4859
            OnigCaseFoldCodeItem items[], OnigEncoding enc,
4860
            OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
4861
0
{
4862
0
  int r, i;
4863
0
  Node* node;
4864
4865
0
  if (is_all_code_len_1_items(n, items)) {
4866
0
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4867
4868
0
    codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end);
4869
0
    for (i = 0; i < n; i++) {
4870
0
      OnigCaseFoldCodeItem* item = items + i;
4871
0
      codes[i+1] = item->code[0];
4872
0
    }
4873
0
    r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes);
4874
0
    if (r != 0) return r;
4875
0
  }
4876
0
  else {
4877
0
    Node *snode, *alt, *curr;
4878
4879
0
    snode = onig_node_new_str(s, end);
4880
0
    CHECK_NULL_RETURN_MEMERR(snode);
4881
0
    node = curr = onig_node_new_alt(snode, NULL_NODE);
4882
0
    if (IS_NULL(curr)) {
4883
0
      onig_node_free(snode);
4884
0
      return ONIGERR_MEMORY;
4885
0
    }
4886
4887
0
    r = 0;
4888
0
    for (i = 0; i < n; i++) {
4889
0
      OnigCaseFoldCodeItem* item = items + i;
4890
0
      r = make_code_list_to_string(&snode, enc, item->code_len, item->code);
4891
0
      if (r != 0) {
4892
0
        onig_node_free(node);
4893
0
        return r;
4894
0
      }
4895
4896
0
      alt = onig_node_new_alt(snode, NULL_NODE);
4897
0
      if (IS_NULL(alt)) {
4898
0
        onig_node_free(snode);
4899
0
        onig_node_free(node);
4900
0
        return ONIGERR_MEMORY;
4901
0
      }
4902
4903
0
      ND_CDR(curr) = alt;
4904
0
      curr = alt;
4905
0
    }
4906
0
  }
4907
4908
0
  r = unravel_cf_node_add(rlist, node);
4909
0
  if (r != 0) onig_node_free(node);
4910
0
  return r;
4911
0
}
4912
4913
static int
4914
unravel_cf_look_behind_add(Node** rlist, Node** rsn,
4915
                int n, OnigCaseFoldCodeItem items[], OnigEncoding enc,
4916
                UChar* s, OnigLen one_len)
4917
0
{
4918
0
  int r, i, found;
4919
4920
0
  found = FALSE;
4921
0
  for (i = 0; i < n; i++) {
4922
0
    OnigCaseFoldCodeItem* item = items + i;
4923
0
    if (item->byte_len == one_len) {
4924
0
      if (item->code_len == 1) {
4925
0
        found = TRUE;
4926
0
        break;
4927
0
      }
4928
0
    }
4929
0
  }
4930
4931
0
  if (found == FALSE) {
4932
0
    r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */);
4933
0
  }
4934
0
  else {
4935
0
    Node* node;
4936
0
    OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
4937
4938
0
    found = 0;
4939
0
    codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len);
4940
0
    for (i = 0; i < n; i++) {
4941
0
      OnigCaseFoldCodeItem* item = items + i;
4942
0
      if (item->byte_len == one_len) {
4943
0
        if (item->code_len == 1) {
4944
0
          codes[found++] = item->code[0];
4945
0
        }
4946
0
      }
4947
0
    }
4948
0
    r = onig_new_cclass_with_code_list(&node, enc, found, codes);
4949
0
    if (r != 0) return r;
4950
4951
0
    r = unravel_cf_node_add(rlist, node);
4952
0
    if (r != 0) onig_node_free(node);
4953
4954
0
    *rsn = NULL_NODE;
4955
0
  }
4956
4957
0
  return r;
4958
0
}
4959
4960
static int
4961
unravel_case_fold_string(Node* node, regex_t* reg, int state)
4962
0
{
4963
0
  int r, n, in_look_behind;
4964
0
  OnigLen min_len, max_len, one_len;
4965
0
  UChar *start, *end, *p, *q;
4966
0
  StrNode* snode;
4967
0
  Node *sn, *list;
4968
0
  OnigEncoding enc;
4969
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4970
4971
0
  if (ND_STRING_IS_CASE_EXPANDED(node)) return 0;
4972
4973
0
  ND_STATUS_REMOVE(node, IGNORECASE);
4974
0
  snode = STR_(node);
4975
0
  start = snode->s;
4976
0
  end   = snode->end;
4977
0
  if (start >= end) return 0;
4978
4979
0
  in_look_behind = (state & IN_LOOK_BEHIND) != 0;
4980
0
  enc = reg->enc;
4981
4982
0
  list = sn = NULL_NODE;
4983
0
  p = start;
4984
0
  while (p < end) {
4985
0
    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end,
4986
0
                                           items);
4987
0
    if (n < 0) {
4988
0
      r = n;
4989
0
      goto err;
4990
0
    }
4991
4992
0
    one_len = (OnigLen )enclen(enc, p);
4993
0
    if (n == 0) {
4994
0
      q = p + one_len;
4995
0
      if (q > end) q = end;
4996
0
      r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */);
4997
0
      if (r != 0) goto err;
4998
0
    }
4999
0
    else {
5000
0
      if (in_look_behind != 0) {
5001
0
        q = p + one_len;
5002
0
        if (items[0].byte_len != one_len) {
5003
0
          r = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, q,
5004
0
                                                 items);
5005
0
          if (r < 0) goto err;
5006
0
          n = r;
5007
0
        }
5008
0
        r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len);
5009
0
        if (r != 0) goto err;
5010
0
      }
5011
0
      else {
5012
0
        get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len);
5013
0
        if (min_len != max_len) {
5014
0
          r = ONIGERR_PARSER_BUG;
5015
0
          goto err;
5016
0
        }
5017
5018
0
        q = p + max_len;
5019
0
        r = unravel_cf_string_alt_or_cc_add(&list, n, items, enc,
5020
0
                                            reg->case_fold_flag, p, q);
5021
0
        if (r != 0) goto err;
5022
0
        sn = NULL_NODE;
5023
0
      }
5024
0
    }
5025
5026
0
    p = q;
5027
0
  }
5028
5029
0
  if (IS_NOT_NULL(list)) {
5030
0
    if (node_list_len(list) == 1) {
5031
0
      node_swap(node, ND_CAR(list));
5032
0
    }
5033
0
    else {
5034
0
      node_swap(node, list);
5035
0
    }
5036
0
    onig_node_free(list);
5037
0
  }
5038
0
  else {
5039
0
    node_swap(node, sn);
5040
0
    onig_node_free(sn);
5041
0
  }
5042
0
  return 0;
5043
5044
0
 err:
5045
0
  if (IS_NOT_NULL(list))
5046
0
    onig_node_free(list);
5047
0
  else if (IS_NOT_NULL(sn))
5048
0
    onig_node_free(sn);
5049
5050
0
  return r;
5051
0
}
5052
5053
#ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
5054
static enum BodyEmptyType
5055
quantifiers_memory_node_info(Node* node)
5056
0
{
5057
0
  int r = BODY_MAY_BE_EMPTY;
5058
5059
0
  switch (ND_TYPE(node)) {
5060
0
  case ND_LIST:
5061
0
  case ND_ALT:
5062
0
    {
5063
0
      int v;
5064
0
      do {
5065
0
        v = quantifiers_memory_node_info(ND_CAR(node));
5066
0
        if (v > r) r = v;
5067
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
5068
0
    }
5069
0
    break;
5070
5071
0
#ifdef USE_CALL
5072
0
  case ND_CALL:
5073
0
    if (ND_IS_RECURSION(node)) {
5074
0
      return BODY_MAY_BE_EMPTY_REC; /* tiny version */
5075
0
    }
5076
0
    else
5077
0
      r = quantifiers_memory_node_info(ND_BODY(node));
5078
0
    break;
5079
0
#endif
5080
5081
0
  case ND_QUANT:
5082
0
    {
5083
0
      QuantNode* qn = QUANT_(node);
5084
0
      if (qn->upper != 0) {
5085
0
        r = quantifiers_memory_node_info(ND_BODY(node));
5086
0
      }
5087
0
    }
5088
0
    break;
5089
5090
0
  case ND_BAG:
5091
0
    {
5092
0
      BagNode* en = BAG_(node);
5093
0
      switch (en->type) {
5094
0
      case BAG_MEMORY:
5095
0
        if (ND_IS_RECURSION(node)) {
5096
0
          return BODY_MAY_BE_EMPTY_REC;
5097
0
        }
5098
0
        return BODY_MAY_BE_EMPTY_MEM;
5099
0
        break;
5100
5101
0
      case BAG_OPTION:
5102
0
      case BAG_STOP_BACKTRACK:
5103
0
        r = quantifiers_memory_node_info(ND_BODY(node));
5104
0
        break;
5105
0
      case BAG_IF_ELSE:
5106
0
        {
5107
0
          int v;
5108
0
          r = quantifiers_memory_node_info(ND_BODY(node));
5109
0
          if (IS_NOT_NULL(en->te.Then)) {
5110
0
            v = quantifiers_memory_node_info(en->te.Then);
5111
0
            if (v > r) r = v;
5112
0
          }
5113
0
          if (IS_NOT_NULL(en->te.Else)) {
5114
0
            v = quantifiers_memory_node_info(en->te.Else);
5115
0
            if (v > r) r = v;
5116
0
          }
5117
0
        }
5118
0
        break;
5119
0
      }
5120
0
    }
5121
0
    break;
5122
5123
0
  case ND_BACKREF:
5124
0
  case ND_STRING:
5125
0
  case ND_CTYPE:
5126
0
  case ND_CCLASS:
5127
0
  case ND_ANCHOR:
5128
0
  case ND_GIMMICK:
5129
0
  default:
5130
0
    break;
5131
0
  }
5132
5133
0
  return r;
5134
0
}
5135
#endif /* USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT */
5136
5137
5138
#ifdef USE_CALL
5139
5140
#ifdef __GNUC__
5141
__inline
5142
#endif
5143
static int
5144
check_call_reference(CallNode* cn, ParseEnv* env, int state)
5145
0
{
5146
0
  MemEnv* mem_env = PARSEENV_MEMENV(env);
5147
5148
0
  if (cn->by_number != 0) {
5149
0
    int gnum = cn->called_gnum;
5150
5151
0
    if (env->num_named > 0 &&
5152
0
        IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5153
0
        ! OPTON_CAPTURE_GROUP(env->options)) {
5154
0
      return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
5155
0
    }
5156
5157
0
    if (gnum > env->num_mem) {
5158
0
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
5159
0
                                     cn->name, cn->name_end);
5160
0
      return ONIGERR_UNDEFINED_GROUP_REFERENCE;
5161
0
    }
5162
5163
0
  set_call_attr:
5164
0
    ND_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node;
5165
0
    if (IS_NULL(ND_CALL_BODY(cn))) {
5166
0
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5167
0
                                     cn->name, cn->name_end);
5168
0
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5169
0
    }
5170
5171
0
    ND_STATUS_ADD(ND_CALL_BODY(cn), REFERENCED);
5172
0
  }
5173
0
  else {
5174
0
    int *refs;
5175
5176
0
    int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
5177
0
    if (n <= 0) {
5178
0
      onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
5179
0
                                     cn->name, cn->name_end);
5180
0
      return ONIGERR_UNDEFINED_NAME_REFERENCE;
5181
0
    }
5182
0
    else if (n > 1) {
5183
0
      onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
5184
0
                                     cn->name, cn->name_end);
5185
0
      return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
5186
0
    }
5187
0
    else {
5188
0
      cn->called_gnum = refs[0];
5189
0
      goto set_call_attr;
5190
0
    }
5191
0
  }
5192
5193
0
  return 0;
5194
0
}
5195
5196
#ifdef USE_WHOLE_OPTIONS
5197
static int
5198
check_whole_options_position(Node* node /* root */)
5199
0
{
5200
0
  int is_list;
5201
5202
0
  is_list = FALSE;
5203
5204
0
 start:
5205
0
  switch (ND_TYPE(node)) {
5206
0
  case ND_LIST:
5207
0
    if (IS_NOT_NULL(ND_CDR(node)))
5208
0
      is_list = TRUE;
5209
5210
0
    node = ND_CAR(node);
5211
0
    goto start;
5212
0
    break;
5213
5214
0
  case ND_BAG:
5215
0
    {
5216
0
      BagNode* en = BAG_(node);
5217
5218
0
      if (en->type == BAG_OPTION) {
5219
0
        if (ND_IS_WHOLE_OPTIONS(node)) {
5220
0
          if (is_list == TRUE && IS_NOT_NULL(ND_BODY(node)))
5221
0
            break;
5222
5223
0
          return 0;
5224
0
        }
5225
0
      }
5226
0
    }
5227
0
    break;
5228
5229
0
  default:
5230
0
    break;
5231
0
  }
5232
5233
0
  return ONIGERR_INVALID_GROUP_OPTION;
5234
0
}
5235
#endif
5236
5237
static void
5238
tune_call2_call(Node* node)
5239
0
{
5240
0
  switch (ND_TYPE(node)) {
5241
0
  case ND_LIST:
5242
0
  case ND_ALT:
5243
0
    do {
5244
0
      tune_call2_call(ND_CAR(node));
5245
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5246
0
    break;
5247
5248
0
  case ND_QUANT:
5249
0
    tune_call2_call(ND_BODY(node));
5250
0
    break;
5251
5252
0
  case ND_ANCHOR:
5253
0
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5254
0
      tune_call2_call(ND_BODY(node));
5255
0
    break;
5256
5257
0
  case ND_BAG:
5258
0
    {
5259
0
      BagNode* en = BAG_(node);
5260
5261
0
      if (en->type == BAG_MEMORY) {
5262
0
        if (! ND_IS_MARK1(node)) {
5263
0
          ND_STATUS_ADD(node, MARK1);
5264
0
          tune_call2_call(ND_BODY(node));
5265
0
          ND_STATUS_REMOVE(node, MARK1);
5266
0
        }
5267
0
      }
5268
0
      else if (en->type == BAG_IF_ELSE) {
5269
0
        tune_call2_call(ND_BODY(node));
5270
0
        if (IS_NOT_NULL(en->te.Then))
5271
0
          tune_call2_call(en->te.Then);
5272
0
        if (IS_NOT_NULL(en->te.Else))
5273
0
          tune_call2_call(en->te.Else);
5274
0
      }
5275
0
      else {
5276
0
        tune_call2_call(ND_BODY(node));
5277
0
      }
5278
0
    }
5279
0
    break;
5280
5281
0
  case ND_CALL:
5282
0
    if (! ND_IS_MARK1(node)) {
5283
0
      ND_STATUS_ADD(node, MARK1);
5284
0
      {
5285
0
        CallNode* cn = CALL_(node);
5286
0
        Node* called = ND_CALL_BODY(cn);
5287
5288
0
        cn->entry_count++;
5289
5290
0
        ND_STATUS_ADD(called, CALLED);
5291
0
        BAG_(called)->m.entry_count++;
5292
0
        tune_call2_call(called);
5293
0
      }
5294
0
      ND_STATUS_REMOVE(node, MARK1);
5295
0
    }
5296
0
    break;
5297
5298
0
  default:
5299
0
    break;
5300
0
  }
5301
0
}
5302
5303
static int
5304
tune_call(Node* node, ParseEnv* env, int state)
5305
0
{
5306
0
  int r;
5307
5308
0
  switch (ND_TYPE(node)) {
5309
0
  case ND_LIST:
5310
0
  case ND_ALT:
5311
0
    do {
5312
0
      r = tune_call(ND_CAR(node), env, state);
5313
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5314
0
    break;
5315
5316
0
  case ND_QUANT:
5317
0
    if (QUANT_(node)->upper == 0)
5318
0
      state |= IN_ZERO_REPEAT;
5319
5320
0
    r = tune_call(ND_BODY(node), env, state);
5321
0
    break;
5322
5323
0
  case ND_ANCHOR:
5324
0
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5325
0
      r = tune_call(ND_BODY(node), env, state);
5326
0
    else
5327
0
      r = 0;
5328
0
    break;
5329
5330
0
  case ND_BAG:
5331
0
    {
5332
0
      BagNode* en = BAG_(node);
5333
5334
0
      if (en->type == BAG_MEMORY) {
5335
0
        if ((state & IN_ZERO_REPEAT) != 0) {
5336
0
          ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5337
0
          BAG_(node)->m.entry_count--;
5338
0
        }
5339
0
        r = tune_call(ND_BODY(node), env, state);
5340
0
      }
5341
0
      else if (en->type == BAG_IF_ELSE) {
5342
0
        r = tune_call(ND_BODY(node), env, state);
5343
0
        if (r != 0) return r;
5344
0
        if (IS_NOT_NULL(en->te.Then)) {
5345
0
          r = tune_call(en->te.Then, env, state);
5346
0
          if (r != 0) return r;
5347
0
        }
5348
0
        if (IS_NOT_NULL(en->te.Else))
5349
0
          r = tune_call(en->te.Else, env, state);
5350
0
      }
5351
0
      else
5352
0
        r = tune_call(ND_BODY(node), env, state);
5353
0
    }
5354
0
    break;
5355
5356
0
  case ND_CALL:
5357
0
    if ((state & IN_ZERO_REPEAT) != 0) {
5358
0
      ND_STATUS_ADD(node, IN_ZERO_REPEAT);
5359
0
      CALL_(node)->entry_count--;
5360
0
    }
5361
5362
0
    r = check_call_reference(CALL_(node), env, state);
5363
0
    break;
5364
5365
0
  default:
5366
0
    r = 0;
5367
0
    break;
5368
0
  }
5369
5370
0
  return r;
5371
0
}
5372
5373
static int
5374
tune_call2(Node* node)
5375
0
{
5376
0
  int r = 0;
5377
5378
0
  switch (ND_TYPE(node)) {
5379
0
  case ND_LIST:
5380
0
  case ND_ALT:
5381
0
    do {
5382
0
      r = tune_call2(ND_CAR(node));
5383
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5384
0
    break;
5385
5386
0
  case ND_QUANT:
5387
0
    if (QUANT_(node)->upper != 0)
5388
0
      r = tune_call2(ND_BODY(node));
5389
0
    break;
5390
5391
0
  case ND_ANCHOR:
5392
0
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
5393
0
      r = tune_call2(ND_BODY(node));
5394
0
    break;
5395
5396
0
  case ND_BAG:
5397
0
    if (! ND_IS_IN_ZERO_REPEAT(node))
5398
0
      r = tune_call2(ND_BODY(node));
5399
5400
0
    {
5401
0
      BagNode* en = BAG_(node);
5402
5403
0
      if (r != 0) return r;
5404
0
      if (en->type == BAG_IF_ELSE) {
5405
0
        if (IS_NOT_NULL(en->te.Then)) {
5406
0
          r = tune_call2(en->te.Then);
5407
0
          if (r != 0) return r;
5408
0
        }
5409
0
        if (IS_NOT_NULL(en->te.Else))
5410
0
          r = tune_call2(en->te.Else);
5411
0
      }
5412
0
    }
5413
0
    break;
5414
5415
0
  case ND_CALL:
5416
0
    if (! ND_IS_IN_ZERO_REPEAT(node)) {
5417
0
      tune_call2_call(node);
5418
0
    }
5419
0
    break;
5420
5421
0
  default:
5422
0
    break;
5423
0
  }
5424
5425
0
  return r;
5426
0
}
5427
5428
5429
static void
5430
tune_called_state_call(Node* node, int state)
5431
0
{
5432
0
  switch (ND_TYPE(node)) {
5433
0
  case ND_ALT:
5434
0
    state |= IN_ALT;
5435
    /* fall */
5436
0
  case ND_LIST:
5437
0
    do {
5438
0
      tune_called_state_call(ND_CAR(node), state);
5439
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5440
0
    break;
5441
5442
0
  case ND_QUANT:
5443
0
    {
5444
0
      QuantNode* qn = QUANT_(node);
5445
5446
0
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5447
0
        state |= IN_REAL_REPEAT;
5448
0
      if (qn->lower != qn->upper)
5449
0
        state |= IN_VAR_REPEAT;
5450
0
      if ((state & IN_PEEK) != 0)
5451
0
        ND_STATUS_ADD(node, INPEEK);
5452
5453
0
      tune_called_state_call(ND_QUANT_BODY(qn), state);
5454
0
    }
5455
0
    break;
5456
5457
0
  case ND_ANCHOR:
5458
0
    {
5459
0
      AnchorNode* an = ANCHOR_(node);
5460
5461
0
      switch (an->type) {
5462
0
      case ANCR_PREC_READ_NOT:
5463
0
      case ANCR_LOOK_BEHIND_NOT:
5464
0
        state |= (IN_NOT | IN_PEEK);
5465
0
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5466
0
        break;
5467
0
      case ANCR_PREC_READ:
5468
0
      case ANCR_LOOK_BEHIND:
5469
0
        state |= IN_PEEK;
5470
0
        tune_called_state_call(ND_ANCHOR_BODY(an), state);
5471
0
        break;
5472
0
      default:
5473
0
        break;
5474
0
      }
5475
0
    }
5476
0
    break;
5477
5478
0
  case ND_BAG:
5479
0
    {
5480
0
      BagNode* en = BAG_(node);
5481
5482
0
      if (en->type == BAG_MEMORY) {
5483
0
        if (ND_IS_MARK1(node)) {
5484
0
          if ((~en->m.called_state & state) != 0) {
5485
0
            en->m.called_state |= state;
5486
0
            tune_called_state_call(ND_BODY(node), state);
5487
0
          }
5488
0
        }
5489
0
        else {
5490
0
          ND_STATUS_ADD(node, MARK1);
5491
0
          en->m.called_state |= state;
5492
0
          tune_called_state_call(ND_BODY(node), state);
5493
0
          ND_STATUS_REMOVE(node, MARK1);
5494
0
        }
5495
0
      }
5496
0
      else if (en->type == BAG_IF_ELSE) {
5497
0
        state |= IN_ALT;
5498
0
        tune_called_state_call(ND_BODY(node), state);
5499
0
        if (IS_NOT_NULL(en->te.Then)) {
5500
0
          tune_called_state_call(en->te.Then, state);
5501
0
        }
5502
0
        if (IS_NOT_NULL(en->te.Else))
5503
0
          tune_called_state_call(en->te.Else, state);
5504
0
      }
5505
0
      else {
5506
0
        tune_called_state_call(ND_BODY(node), state);
5507
0
      }
5508
0
    }
5509
0
    break;
5510
5511
0
  case ND_CALL:
5512
0
    if ((state & IN_PEEK) != 0)
5513
0
      ND_STATUS_ADD(node, INPEEK);
5514
0
    if ((state & IN_REAL_REPEAT) != 0)
5515
0
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5516
5517
0
    tune_called_state_call(ND_BODY(node), state);
5518
0
    break;
5519
5520
0
  default:
5521
0
    break;
5522
0
  }
5523
0
}
5524
5525
static void
5526
tune_called_state(Node* node, int state)
5527
0
{
5528
0
  switch (ND_TYPE(node)) {
5529
0
  case ND_ALT:
5530
0
    state |= IN_ALT;
5531
    /* fall */
5532
0
  case ND_LIST:
5533
0
    do {
5534
0
      tune_called_state(ND_CAR(node), state);
5535
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
5536
0
    break;
5537
5538
0
#ifdef USE_CALL
5539
0
  case ND_CALL:
5540
0
    if ((state & IN_PEEK) != 0)
5541
0
      ND_STATUS_ADD(node, INPEEK);
5542
0
    if ((state & IN_REAL_REPEAT) != 0)
5543
0
      ND_STATUS_ADD(node, IN_REAL_REPEAT);
5544
5545
0
    tune_called_state_call(node, state);
5546
0
    break;
5547
0
#endif
5548
5549
0
  case ND_BAG:
5550
0
    {
5551
0
      BagNode* en = BAG_(node);
5552
5553
0
      switch (en->type) {
5554
0
      case BAG_MEMORY:
5555
0
        if (en->m.entry_count > 1)
5556
0
          state |= IN_MULTI_ENTRY;
5557
5558
0
        en->m.called_state |= state;
5559
        /* fall */
5560
0
      case BAG_OPTION:
5561
0
      case BAG_STOP_BACKTRACK:
5562
0
        tune_called_state(ND_BODY(node), state);
5563
0
        break;
5564
0
      case BAG_IF_ELSE:
5565
0
        state |= IN_ALT;
5566
0
        tune_called_state(ND_BODY(node), state);
5567
0
        if (IS_NOT_NULL(en->te.Then))
5568
0
          tune_called_state(en->te.Then, state);
5569
0
        if (IS_NOT_NULL(en->te.Else))
5570
0
          tune_called_state(en->te.Else, state);
5571
0
        break;
5572
0
      }
5573
0
    }
5574
0
    break;
5575
5576
0
  case ND_QUANT:
5577
0
    {
5578
0
      QuantNode* qn = QUANT_(node);
5579
5580
0
      if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5581
0
        state |= IN_REAL_REPEAT;
5582
0
      if (qn->lower != qn->upper)
5583
0
        state |= IN_VAR_REPEAT;
5584
0
      if ((state & IN_PEEK) != 0)
5585
0
        ND_STATUS_ADD(node, INPEEK);
5586
5587
0
      tune_called_state(ND_QUANT_BODY(qn), state);
5588
0
    }
5589
0
    break;
5590
5591
0
  case ND_ANCHOR:
5592
0
    {
5593
0
      AnchorNode* an = ANCHOR_(node);
5594
5595
0
      switch (an->type) {
5596
0
      case ANCR_PREC_READ_NOT:
5597
0
      case ANCR_LOOK_BEHIND_NOT:
5598
0
        state |= (IN_NOT | IN_PEEK);
5599
0
        tune_called_state(ND_ANCHOR_BODY(an), state);
5600
0
        break;
5601
0
      case ANCR_PREC_READ:
5602
0
      case ANCR_LOOK_BEHIND:
5603
0
        state |= IN_PEEK;
5604
0
        tune_called_state(ND_ANCHOR_BODY(an), state);
5605
0
        break;
5606
0
      default:
5607
0
        break;
5608
0
      }
5609
0
    }
5610
0
    break;
5611
5612
0
  case ND_BACKREF:
5613
0
  case ND_STRING:
5614
0
  case ND_CTYPE:
5615
0
  case ND_CCLASS:
5616
0
  case ND_GIMMICK:
5617
0
  default:
5618
0
    break;
5619
0
  }
5620
0
}
5621
5622
#endif  /* USE_CALL */
5623
5624
5625
#ifdef __GNUC__
5626
__inline
5627
#endif
5628
static int
5629
tune_anchor(Node* node, regex_t* reg, int state, ParseEnv* env)
5630
0
{
5631
0
  int r;
5632
0
  AnchorNode* an = ANCHOR_(node);
5633
5634
0
  switch (an->type) {
5635
0
  case ANCR_PREC_READ:
5636
0
    r = tune_tree(ND_ANCHOR_BODY(an), reg, (state | IN_PREC_READ), env);
5637
0
    break;
5638
0
  case ANCR_PREC_READ_NOT:
5639
0
    r = tune_tree(ND_ANCHOR_BODY(an), reg, (state | IN_PREC_READ | IN_NOT),
5640
0
                  env);
5641
0
    break;
5642
5643
0
  case ANCR_LOOK_BEHIND:
5644
0
  case ANCR_LOOK_BEHIND_NOT:
5645
0
    r = tune_look_behind(node, reg, state, env);
5646
0
    break;
5647
5648
0
  default:
5649
0
    r = 0;
5650
0
    break;
5651
0
  }
5652
5653
0
  return r;
5654
0
}
5655
5656
#ifdef __GNUC__
5657
__inline
5658
#endif
5659
static int
5660
tune_quant(Node* node, regex_t* reg, int state, ParseEnv* env)
5661
0
{
5662
0
  int r;
5663
0
  QuantNode* qn = QUANT_(node);
5664
0
  Node* body = ND_BODY(node);
5665
5666
0
  if ((state & IN_REAL_REPEAT) != 0) {
5667
0
    ND_STATUS_ADD(node, IN_REAL_REPEAT);
5668
0
  }
5669
0
  if ((state & IN_MULTI_ENTRY) != 0) {
5670
0
    ND_STATUS_ADD(node, IN_MULTI_ENTRY);
5671
0
  }
5672
5673
0
  if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
5674
0
    OnigLen d = node_min_byte_len(body, env);
5675
0
    if (d == 0) {
5676
0
#ifdef USE_RIGID_CHECK_CAPTURES_IN_EMPTY_REPEAT
5677
0
      qn->emptiness = quantifiers_memory_node_info(body);
5678
#else
5679
      qn->emptiness = BODY_MAY_BE_EMPTY;
5680
#endif
5681
0
    }
5682
0
  }
5683
5684
0
  if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
5685
0
    state |= IN_REAL_REPEAT;
5686
0
  if (qn->lower != qn->upper)
5687
0
    state |= IN_VAR_REPEAT;
5688
5689
0
  r = tune_tree(body, reg, state, env);
5690
0
  if (r != 0) return r;
5691
5692
  /* expand string */
5693
0
#define EXPAND_STRING_MAX_LENGTH  100
5694
0
  if (ND_TYPE(body) == ND_STRING) {
5695
0
    if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
5696
0
        qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
5697
0
      int len = ND_STRING_LEN(body);
5698
5699
0
      if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
5700
0
        int i, n = qn->lower;
5701
0
        node_conv_to_str_node(node, body);
5702
0
        for (i = 0; i < n; i++) {
5703
0
          r = node_str_node_cat(node, body);
5704
0
          if (r != 0) return r;
5705
0
        }
5706
0
        onig_node_free(body);
5707
0
        return r;
5708
0
      }
5709
0
    }
5710
0
  }
5711
5712
0
  if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
5713
0
    if (ND_TYPE(body) == ND_QUANT) {
5714
0
      QuantNode* tqn = QUANT_(body);
5715
0
      if (IS_NOT_NULL(tqn->head_exact)) {
5716
0
        qn->head_exact  = tqn->head_exact;
5717
0
        tqn->head_exact = NULL;
5718
0
      }
5719
0
    }
5720
0
    else {
5721
0
      qn->head_exact = get_tree_head_literal(ND_BODY(node), 1, reg);
5722
0
    }
5723
0
  }
5724
5725
0
  return r;
5726
0
}
5727
5728
/* tune_tree does the following work.
5729
 1. check empty loop. (set qn->emptiness)
5730
 2. expand ignore-case in char class.
5731
 3. set memory status bit flags. (reg->mem_stats)
5732
 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
5733
 5. find invalid patterns in look-behind.
5734
 6. expand repeated string.
5735
 */
5736
static int
5737
tune_tree(Node* node, regex_t* reg, int state, ParseEnv* env)
5738
0
{
5739
0
  int r = 0;
5740
5741
0
  switch (ND_TYPE(node)) {
5742
0
  case ND_LIST:
5743
0
    {
5744
0
      Node* prev = NULL_NODE;
5745
0
      do {
5746
0
        r = tune_tree(ND_CAR(node), reg, state, env);
5747
0
        if (IS_NOT_NULL(prev) && r == 0) {
5748
0
          r = tune_next(prev, ND_CAR(node), reg);
5749
0
        }
5750
0
        prev = ND_CAR(node);
5751
0
      } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5752
0
    }
5753
0
    break;
5754
5755
0
  case ND_ALT:
5756
0
    do {
5757
0
      r = tune_tree(ND_CAR(node), reg, (state | IN_ALT), env);
5758
0
    } while (r == 0 && IS_NOT_NULL(node = ND_CDR(node)));
5759
0
    break;
5760
5761
0
  case ND_STRING:
5762
0
    if (ND_IS_REAL_IGNORECASE(node)) {
5763
0
      r = unravel_case_fold_string(node, reg, state);
5764
0
    }
5765
0
    break;
5766
5767
0
  case ND_BACKREF:
5768
0
    {
5769
0
      int i;
5770
0
      int* p;
5771
0
      BackRefNode* br = BACKREF_(node);
5772
0
      p = BACKREFS_P(br);
5773
0
      for (i = 0; i < br->back_num; i++) {
5774
0
        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
5775
0
        MEM_STATUS_ON(env->backrefed_mem, p[i]);
5776
#if 0
5777
#ifdef USE_BACKREF_WITH_LEVEL
5778
        if (ND_IS_NEST_LEVEL(node)) {
5779
          MEM_STATUS_ON(env->backtrack_mem, p[i]);
5780
        }
5781
#endif
5782
#else
5783
        /* More precisely, it should be checked whether alt/repeat exists before
5784
           the subject capture node, and then this backreference position
5785
           exists before (or in) the capture node. */
5786
0
        MEM_STATUS_ON(env->backtrack_mem, p[i]);
5787
0
#endif
5788
0
      }
5789
0
    }
5790
0
    break;
5791
5792
0
  case ND_BAG:
5793
0
    {
5794
0
      BagNode* en = BAG_(node);
5795
5796
0
      switch (en->type) {
5797
0
      case BAG_OPTION:
5798
0
        {
5799
0
          OnigOptionType options = reg->options;
5800
0
          reg->options = BAG_(node)->o.options;
5801
0
          r = tune_tree(ND_BODY(node), reg, state, env);
5802
0
          reg->options = options;
5803
0
        }
5804
0
        break;
5805
5806
0
      case BAG_MEMORY:
5807
0
#ifdef USE_CALL
5808
0
        state |= en->m.called_state;
5809
0
#endif
5810
5811
0
        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
5812
0
            || ND_IS_RECURSION(node)) {
5813
0
          MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
5814
0
        }
5815
0
        r = tune_tree(ND_BODY(node), reg, state, env);
5816
0
        break;
5817
5818
0
      case BAG_STOP_BACKTRACK:
5819
0
        {
5820
0
          Node* target = ND_BODY(node);
5821
0
          r = tune_tree(target, reg, state, env);
5822
0
          if (ND_TYPE(target) == ND_QUANT) {
5823
0
            QuantNode* tqn = QUANT_(target);
5824
0
            if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
5825
0
                tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
5826
0
              if (is_strict_real_node(ND_BODY(target)))
5827
0
                ND_STATUS_ADD(node, STRICT_REAL_REPEAT);
5828
0
            }
5829
0
          }
5830
0
        }
5831
0
        break;
5832
5833
0
      case BAG_IF_ELSE:
5834
0
        r = tune_tree(ND_BODY(node), reg, (state | IN_ALT), env);
5835
0
        if (r != 0) return r;
5836
0
        if (IS_NOT_NULL(en->te.Then)) {
5837
0
          r = tune_tree(en->te.Then, reg, (state | IN_ALT), env);
5838
0
          if (r != 0) return r;
5839
0
        }
5840
0
        if (IS_NOT_NULL(en->te.Else))
5841
0
          r = tune_tree(en->te.Else, reg, (state | IN_ALT), env);
5842
0
        break;
5843
0
      }
5844
0
    }
5845
0
    break;
5846
5847
0
  case ND_QUANT:
5848
0
    if ((state & (IN_PREC_READ | IN_LOOK_BEHIND)) != 0)
5849
0
      ND_STATUS_ADD(node, INPEEK);
5850
5851
0
    r = tune_quant(node, reg, state, env);
5852
0
    break;
5853
5854
0
  case ND_ANCHOR:
5855
0
    r = tune_anchor(node, reg, state, env);
5856
0
    break;
5857
5858
0
#ifdef USE_CALL
5859
0
  case ND_CALL:
5860
0
#endif
5861
0
  case ND_CTYPE:
5862
0
  case ND_CCLASS:
5863
0
  case ND_GIMMICK:
5864
0
  default:
5865
0
    break;
5866
0
  }
5867
5868
0
  return r;
5869
0
}
5870
5871
#ifndef ONIG_DONT_OPTIMIZE
5872
static int
5873
set_sunday_quick_search_or_bmh_skip_table(regex_t* reg, int case_expand,
5874
                                          UChar* s, UChar* end,
5875
                                          UChar skip[], int* roffset)
5876
0
{
5877
0
  int i, j, k, len, offset;
5878
0
  int n, clen;
5879
0
  UChar* p;
5880
0
  OnigEncoding enc;
5881
0
  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
5882
0
  UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
5883
5884
0
  enc = reg->enc;
5885
0
  offset = ENC_GET_SKIP_OFFSET(enc);
5886
0
  if (offset == ENC_SKIP_OFFSET_1_OR_0) {
5887
0
    UChar* p = s;
5888
0
    while (1) {
5889
0
      len = enclen(enc, p);
5890
0
      if (p + len >= end) {
5891
0
        if (len == 1) offset = 1;
5892
0
        else          offset = 0;
5893
0
        break;
5894
0
      }
5895
0
      p += len;
5896
0
    }
5897
0
  }
5898
5899
0
  len = (int )(end - s);
5900
0
  if (len + offset >= UCHAR_MAX)
5901
0
    return ONIGERR_PARSER_BUG;
5902
5903
0
  *roffset = offset;
5904
5905
0
  for (i = 0; i < CHAR_MAP_SIZE; i++) {
5906
0
    skip[i] = (UChar )(len + offset);
5907
0
  }
5908
5909
0
  for (p = s; p < end; ) {
5910
0
    int z;
5911
5912
0
    clen = enclen(enc, p);
5913
0
    if (p + clen > end) clen = (int )(end - p);
5914
5915
0
    len = (int )(end - p);
5916
0
    for (j = 0; j < clen; j++) {
5917
0
      z = len - j + (offset - 1);
5918
0
      if (z <= 0) break;
5919
0
      skip[p[j]] = z;
5920
0
    }
5921
5922
0
    if (case_expand != 0) {
5923
0
      n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
5924
0
                                             p, end, items);
5925
0
      for (k = 0; k < n; k++) {
5926
0
        ONIGENC_CODE_TO_MBC(enc, items[k].code[0], buf);
5927
0
        for (j = 0; j < clen; j++) {
5928
0
          z = len - j + (offset - 1);
5929
0
          if (z <= 0) break;
5930
0
          if (skip[buf[j]] > z)
5931
0
            skip[buf[j]] = z;
5932
0
        }
5933
0
      }
5934
0
    }
5935
5936
0
    p += clen;
5937
0
  }
5938
5939
0
  return 0;
5940
0
}
5941
#endif
5942
5943
5944
0
#define OPT_EXACT_MAXLEN   24
5945
5946
#if OPT_EXACT_MAXLEN >= UCHAR_MAX
5947
#error Too big OPT_EXACT_MAXLEN
5948
#endif
5949
5950
typedef struct {
5951
  MinMaxLen        mm;
5952
  OnigEncoding     enc;
5953
  OnigCaseFoldType case_fold_flag;
5954
  ParseEnv*        scan_env;
5955
} OptEnv;
5956
5957
typedef struct {
5958
  int left;
5959
  int right;
5960
} OptAnc;
5961
5962
typedef struct {
5963
  MinMaxLen  mm;   /* position */
5964
  OptAnc     anc;
5965
  int        reach_end;
5966
  int        len;
5967
  UChar      s[OPT_EXACT_MAXLEN];
5968
} OptStr;
5969
5970
typedef struct {
5971
  MinMaxLen mm;     /* position */
5972
  OptAnc    anc;
5973
  int       value;  /* weighted value */
5974
  UChar     map[CHAR_MAP_SIZE];
5975
} OptMap;
5976
5977
typedef struct {
5978
  MinMaxLen len;
5979
  OptAnc  anc;
5980
  OptStr  sb;     /* boundary */
5981
  OptStr  sm;     /* middle */
5982
  OptStr  spr;    /* prec read (?=...) */
5983
  OptMap  map;    /* boundary */
5984
} OptNode;
5985
5986
5987
#ifndef ONIG_DONT_OPTIMIZE
5988
5989
static int
5990
map_position_value(OnigEncoding enc, int i)
5991
0
{
5992
0
  static const short int Vals[] = {
5993
0
     5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
5994
0
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
5995
0
    12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
5996
0
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
5997
0
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
5998
0
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
5999
0
     5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
6000
0
     6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
6001
0
  };
6002
6003
0
  if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) {
6004
0
    if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
6005
0
      return 20;
6006
0
    else
6007
0
      return (int )Vals[i];
6008
0
  }
6009
0
  else
6010
0
    return 4;   /* Take it easy. */
6011
0
}
6012
6013
static int
6014
distance_value(MinMaxLen* mm)
6015
0
{
6016
  /* 1000 / (min-max-dist + 1) */
6017
0
  static const short int dist_vals[] = {
6018
0
    1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
6019
0
      91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
6020
0
      48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
6021
0
      32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
6022
0
      24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
6023
0
      20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
6024
0
      16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
6025
0
      14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
6026
0
      12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
6027
0
      11,   11,   11,   11,   11,   10,   10,   10,   10,   10
6028
0
  };
6029
6030
0
  OnigLen d;
6031
6032
0
  if (mm->max == INFINITE_LEN) return 0;
6033
6034
0
  d = mm->max - mm->min;
6035
0
  if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0])))
6036
    /* return dist_vals[d] * 16 / (mm->min + 12); */
6037
0
    return (int )dist_vals[d];
6038
0
  else
6039
0
    return 1;
6040
0
}
6041
6042
static int
6043
comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
6044
0
{
6045
0
  if (v2 <= 0) return -1;
6046
0
  if (v1 <= 0) return  1;
6047
6048
0
  v1 *= distance_value(d1);
6049
0
  v2 *= distance_value(d2);
6050
6051
0
  if (v2 > v1) return  1;
6052
0
  if (v2 < v1) return -1;
6053
6054
0
  if (d2->min < d1->min) return  1;
6055
0
  if (d2->min > d1->min) return -1;
6056
0
  return 0;
6057
0
}
6058
6059
static void
6060
copy_opt_env(OptEnv* to, OptEnv* from)
6061
0
{
6062
0
  *to = *from;
6063
0
}
6064
6065
static void
6066
clear_opt_anc_info(OptAnc* a)
6067
0
{
6068
0
  a->left  = 0;
6069
0
  a->right = 0;
6070
0
}
6071
6072
static void
6073
copy_opt_anc_info(OptAnc* to, OptAnc* from)
6074
0
{
6075
0
  *to = *from;
6076
0
}
6077
6078
static void
6079
concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right,
6080
                    OnigLen left_len, OnigLen right_len)
6081
0
{
6082
0
  clear_opt_anc_info(to);
6083
6084
0
  to->left = left->left;
6085
0
  if (left_len == 0) {
6086
0
    to->left |= right->left;
6087
0
  }
6088
6089
0
  to->right = right->right;
6090
0
  if (right_len == 0) {
6091
0
    to->right |= left->right;
6092
0
  }
6093
0
  else {
6094
0
    to->right |= (left->right & ANCR_PREC_READ_NOT);
6095
0
  }
6096
0
}
6097
6098
static int
6099
is_left(int a)
6100
0
{
6101
0
  if (a == ANCR_END_BUF  || a == ANCR_SEMI_END_BUF ||
6102
0
      a == ANCR_END_LINE || a == ANCR_PREC_READ || a == ANCR_PREC_READ_NOT)
6103
0
    return 0;
6104
6105
0
  return 1;
6106
0
}
6107
6108
static int
6109
is_set_opt_anc_info(OptAnc* to, int anc)
6110
0
{
6111
0
  if ((to->left & anc) != 0) return 1;
6112
6113
0
  return ((to->right & anc) != 0 ? 1 : 0);
6114
0
}
6115
6116
static void
6117
add_opt_anc_info(OptAnc* to, int anc)
6118
0
{
6119
0
  if (is_left(anc))
6120
0
    to->left |= anc;
6121
0
  else
6122
0
    to->right |= anc;
6123
0
}
6124
6125
static void
6126
remove_opt_anc_info(OptAnc* to, int anc)
6127
0
{
6128
0
  if (is_left(anc))
6129
0
    to->left &= ~anc;
6130
0
  else
6131
0
    to->right &= ~anc;
6132
0
}
6133
6134
static void
6135
alt_merge_opt_anc_info(OptAnc* to, OptAnc* add)
6136
0
{
6137
0
  to->left  &= add->left;
6138
0
  to->right &= add->right;
6139
0
}
6140
6141
static int
6142
is_full_opt_exact(OptStr* e)
6143
0
{
6144
0
  return e->len >= OPT_EXACT_MAXLEN;
6145
0
}
6146
6147
static void
6148
clear_opt_exact(OptStr* e)
6149
0
{
6150
0
  mml_clear(&e->mm);
6151
0
  clear_opt_anc_info(&e->anc);
6152
0
  e->reach_end = 0;
6153
0
  e->len       = 0;
6154
0
  e->s[0]      = '\0';
6155
0
}
6156
6157
static void
6158
copy_opt_exact(OptStr* to, OptStr* from)
6159
0
{
6160
0
  *to = *from;
6161
0
}
6162
6163
static int
6164
concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
6165
0
{
6166
0
  int i, j, len, r;
6167
0
  UChar *p, *end;
6168
0
  OptAnc tanc;
6169
6170
0
  r = 0;
6171
0
  p = add->s;
6172
0
  end = p + add->len;
6173
0
  for (i = to->len; p < end; ) {
6174
0
    len = enclen(enc, p);
6175
0
    if (i + len > OPT_EXACT_MAXLEN) {
6176
0
      r = 1; /* 1:full */
6177
0
      break;
6178
0
    }
6179
0
    for (j = 0; j < len && p < end; j++) {
6180
      /* coverity[overrun-local] */
6181
0
      to->s[i++] = *p++;
6182
0
    }
6183
0
  }
6184
6185
0
  to->len = i;
6186
0
  to->reach_end = (p == end ? add->reach_end : 0);
6187
6188
0
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
6189
0
  if (! to->reach_end) tanc.right = 0;
6190
0
  copy_opt_anc_info(&to->anc, &tanc);
6191
6192
0
  return r;
6193
0
}
6194
6195
static void
6196
concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
6197
0
{
6198
0
  int i, j, len;
6199
0
  UChar *p;
6200
6201
0
  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
6202
0
    len = enclen(enc, p);
6203
0
    if (i + len > OPT_EXACT_MAXLEN) break;
6204
0
    for (j = 0; j < len && p < end; j++) {
6205
      /* coverity[overrun-local] */
6206
0
      to->s[i++] = *p++;
6207
0
    }
6208
0
  }
6209
6210
0
  to->len = i;
6211
6212
0
  if (p >= end)
6213
0
    to->reach_end = 1;
6214
0
}
6215
6216
static void
6217
alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
6218
0
{
6219
0
  int i, j, len;
6220
6221
0
  if (add->len == 0 || to->len == 0) {
6222
0
    clear_opt_exact(to);
6223
0
    return ;
6224
0
  }
6225
6226
0
  if (! mml_is_equal(&to->mm, &add->mm)) {
6227
0
    clear_opt_exact(to);
6228
0
    return ;
6229
0
  }
6230
6231
0
  for (i = 0; i < to->len && i < add->len; ) {
6232
0
    if (to->s[i] != add->s[i]) break;
6233
0
    len = enclen(env->enc, to->s + i);
6234
6235
0
    for (j = 1; j < len; j++) {
6236
0
      if (to->s[i+j] != add->s[i+j]) break;
6237
0
    }
6238
0
    if (j < len) break;
6239
0
    i += len;
6240
0
  }
6241
6242
0
  if (! add->reach_end || i < add->len || i < to->len) {
6243
0
    to->reach_end = 0;
6244
0
  }
6245
0
  to->len = i;
6246
6247
0
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6248
0
  if (! to->reach_end) to->anc.right = 0;
6249
0
}
6250
6251
static void
6252
select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
6253
0
{
6254
0
  int vn, va;
6255
6256
0
  vn = now->len;
6257
0
  va = alt->len;
6258
6259
0
  if (va == 0) {
6260
0
    return ;
6261
0
  }
6262
0
  else if (vn == 0) {
6263
0
    copy_opt_exact(now, alt);
6264
0
    return ;
6265
0
  }
6266
0
  else if (vn <= 2 && va <= 2) {
6267
    /* ByteValTable[x] is big value --> low price */
6268
0
    va = map_position_value(enc, now->s[0]);
6269
0
    vn = map_position_value(enc, alt->s[0]);
6270
6271
0
    if (now->len > 1) vn += 5;
6272
0
    if (alt->len > 1) va += 5;
6273
0
  }
6274
6275
0
  vn *= 2;
6276
0
  va *= 2;
6277
6278
0
  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
6279
0
    copy_opt_exact(now, alt);
6280
0
}
6281
6282
static void
6283
clear_opt_map(OptMap* map)
6284
0
{
6285
0
  static const OptMap clean_info = {
6286
0
    {0, 0}, {0, 0}, 0,
6287
0
    {
6288
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6289
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6290
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6291
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6292
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6293
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6294
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6295
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6296
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6297
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6298
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6299
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6300
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6301
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6302
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6303
0
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6304
0
    }
6305
0
  };
6306
6307
0
  xmemcpy(map, &clean_info, sizeof(OptMap));
6308
0
}
6309
6310
static void
6311
copy_opt_map(OptMap* to, OptMap* from)
6312
0
{
6313
0
  *to = *from;
6314
0
}
6315
6316
static void
6317
add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc)
6318
0
{
6319
0
  if (m->map[c] == 0) {
6320
0
    m->map[c] = 1;
6321
0
    m->value += map_position_value(enc, c);
6322
0
  }
6323
0
}
6324
6325
static void
6326
select_opt_map(OptMap* now, OptMap* alt)
6327
0
{
6328
0
  static int z = 1<<15; /* 32768: something big value */
6329
6330
0
  int vn, va;
6331
6332
0
  if (alt->value == 0) return ;
6333
0
  if (now->value == 0) {
6334
0
    copy_opt_map(now, alt);
6335
0
    return ;
6336
0
  }
6337
6338
0
  vn = z / now->value;
6339
0
  va = z / alt->value;
6340
0
  if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
6341
0
    copy_opt_map(now, alt);
6342
0
}
6343
6344
static int
6345
comp_opt_exact_or_map(OptStr* e, OptMap* m)
6346
0
{
6347
0
#define COMP_EM_BASE  20
6348
0
  int ae, am;
6349
0
  int case_value;
6350
6351
0
  if (m->value <= 0) return -1;
6352
6353
0
  case_value = 3;
6354
0
  ae = COMP_EM_BASE * e->len * case_value;
6355
0
  am = COMP_EM_BASE * 5 * 2 / m->value;
6356
0
  return comp_distance_value(&e->mm, &m->mm, ae, am);
6357
0
}
6358
6359
static void
6360
alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
6361
0
{
6362
0
  int i, val;
6363
6364
  /* if (! mml_is_equal(&to->mm, &add->mm)) return ; */
6365
0
  if (to->value == 0) return ;
6366
0
  if (add->value == 0 || to->mm.max < add->mm.min) {
6367
0
    clear_opt_map(to);
6368
0
    return ;
6369
0
  }
6370
6371
0
  mml_alt_merge(&to->mm, &add->mm);
6372
6373
0
  val = 0;
6374
0
  for (i = 0; i < CHAR_MAP_SIZE; i++) {
6375
0
    if (add->map[i])
6376
0
      to->map[i] = 1;
6377
6378
0
    if (to->map[i])
6379
0
      val += map_position_value(enc, i);
6380
0
  }
6381
0
  to->value = val;
6382
6383
0
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6384
0
}
6385
6386
static void
6387
set_bound_node_opt_info(OptNode* opt, MinMaxLen* plen)
6388
0
{
6389
0
  mml_copy(&(opt->sb.mm),  plen);
6390
0
  mml_copy(&(opt->spr.mm), plen);
6391
0
  mml_copy(&(opt->map.mm), plen);
6392
0
}
6393
6394
static void
6395
clear_node_opt_info(OptNode* opt)
6396
0
{
6397
0
  mml_clear(&opt->len);
6398
0
  clear_opt_anc_info(&opt->anc);
6399
0
  clear_opt_exact(&opt->sb);
6400
0
  clear_opt_exact(&opt->sm);
6401
0
  clear_opt_exact(&opt->spr);
6402
0
  clear_opt_map(&opt->map);
6403
0
}
6404
6405
static void
6406
copy_node_opt_info(OptNode* to, OptNode* from)
6407
0
{
6408
0
  *to = *from;
6409
0
}
6410
6411
static void
6412
concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
6413
0
{
6414
0
  int sb_reach, sm_reach;
6415
0
  OptAnc tanc;
6416
6417
0
  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
6418
0
  copy_opt_anc_info(&to->anc, &tanc);
6419
6420
0
  if (add->sb.len > 0 && to->len.max == 0) {
6421
0
    concat_opt_anc_info(&tanc, &to->anc, &add->sb.anc, to->len.max, add->len.max);
6422
0
    copy_opt_anc_info(&add->sb.anc, &tanc);
6423
0
  }
6424
6425
0
  if (add->map.value > 0 && to->len.max == 0) {
6426
0
    if (add->map.mm.max == 0)
6427
0
      add->map.anc.left |= to->anc.left;
6428
0
  }
6429
6430
0
  sb_reach = to->sb.reach_end;
6431
0
  sm_reach = to->sm.reach_end;
6432
6433
0
  if (add->len.max != 0)
6434
0
    to->sb.reach_end = to->sm.reach_end = 0;
6435
6436
0
  if (add->sb.len > 0) {
6437
0
    if (sb_reach) {
6438
0
      concat_opt_exact(&to->sb, &add->sb, enc);
6439
0
      clear_opt_exact(&add->sb);
6440
0
    }
6441
0
    else if (sm_reach) {
6442
0
      concat_opt_exact(&to->sm, &add->sb, enc);
6443
0
      clear_opt_exact(&add->sb);
6444
0
    }
6445
0
  }
6446
0
  select_opt_exact(enc, &to->sm, &add->sb);
6447
0
  select_opt_exact(enc, &to->sm, &add->sm);
6448
6449
0
  if (to->spr.len > 0) {
6450
0
    if (add->len.max > 0) {
6451
0
      if (to->spr.mm.max == 0)
6452
0
        select_opt_exact(enc, &to->sb, &to->spr);
6453
0
      else
6454
0
        select_opt_exact(enc, &to->sm, &to->spr);
6455
0
    }
6456
0
  }
6457
0
  else if (add->spr.len > 0) {
6458
0
    copy_opt_exact(&to->spr, &add->spr);
6459
0
  }
6460
6461
0
  select_opt_map(&to->map, &add->map);
6462
0
  mml_add(&to->len, &add->len);
6463
0
}
6464
6465
static void
6466
alt_merge_node_opt_info(OptNode* to, OptNode* add, OptEnv* env)
6467
0
{
6468
0
  alt_merge_opt_anc_info(&to->anc, &add->anc);
6469
0
  alt_merge_opt_exact(&to->sb,  &add->sb, env);
6470
0
  alt_merge_opt_exact(&to->sm,  &add->sm, env);
6471
0
  alt_merge_opt_exact(&to->spr, &add->spr, env);
6472
0
  alt_merge_opt_map(env->enc, &to->map, &add->map);
6473
6474
0
  mml_alt_merge(&to->len, &add->len);
6475
0
}
6476
6477
static OnigLen
6478
node_max_byte_len(Node* node, ParseEnv* env)
6479
0
{
6480
0
  OnigLen len;
6481
0
  OnigLen tmax;
6482
6483
0
  len = 0;
6484
0
  switch (ND_TYPE(node)) {
6485
0
  case ND_LIST:
6486
0
    do {
6487
0
      tmax = node_max_byte_len(ND_CAR(node), env);
6488
0
      len = distance_add(len, tmax);
6489
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
6490
0
    break;
6491
6492
0
  case ND_ALT:
6493
0
    do {
6494
0
      tmax = node_max_byte_len(ND_CAR(node), env);
6495
0
      if (len < tmax) len = tmax;
6496
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
6497
0
    break;
6498
6499
0
  case ND_STRING:
6500
0
    {
6501
0
      StrNode* sn = STR_(node);
6502
0
      len = (OnigLen )(sn->end - sn->s);
6503
0
    }
6504
0
    break;
6505
6506
0
  case ND_CTYPE:
6507
0
  case ND_CCLASS:
6508
0
    len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
6509
0
    break;
6510
6511
0
  case ND_BACKREF:
6512
0
    if (! ND_IS_CHECKER(node)) {
6513
0
      int i;
6514
0
      int* backs;
6515
0
      MemEnv* mem_env = PARSEENV_MEMENV(env);
6516
0
      BackRefNode* br = BACKREF_(node);
6517
0
      if (ND_IS_RECURSION(node)) {
6518
0
#ifdef USE_BACKREF_WITH_LEVEL
6519
0
        if (ND_IS_NEST_LEVEL(node)) {
6520
0
          len = INFINITE_LEN;
6521
0
        }
6522
0
#endif
6523
0
        break;
6524
0
      }
6525
0
      backs = BACKREFS_P(br);
6526
0
      for (i = 0; i < br->back_num; i++) {
6527
0
        tmax = node_max_byte_len(mem_env[backs[i]].mem_node, env);
6528
0
        if (len < tmax) len = tmax;
6529
0
      }
6530
0
    }
6531
0
    break;
6532
6533
0
#ifdef USE_CALL
6534
0
  case ND_CALL:
6535
0
    if (! ND_IS_RECURSION(node))
6536
0
      len = node_max_byte_len(ND_BODY(node), env);
6537
0
    else
6538
0
      len = INFINITE_LEN;
6539
0
    break;
6540
0
#endif
6541
6542
0
  case ND_QUANT:
6543
0
    {
6544
0
      QuantNode* qn = QUANT_(node);
6545
6546
0
      if (qn->upper != 0) {
6547
0
        len = node_max_byte_len(ND_BODY(node), env);
6548
0
        if (len != 0) {
6549
0
          if (! IS_INFINITE_REPEAT(qn->upper))
6550
0
            len = distance_multiply(len, qn->upper);
6551
0
          else
6552
0
            len = INFINITE_LEN;
6553
0
        }
6554
0
      }
6555
0
    }
6556
0
    break;
6557
6558
0
  case ND_BAG:
6559
0
    {
6560
0
      BagNode* en = BAG_(node);
6561
0
      switch (en->type) {
6562
0
      case BAG_MEMORY:
6563
0
        if (ND_IS_FIXED_MAX(node))
6564
0
          len = en->max_len;
6565
0
        else {
6566
0
          if (ND_IS_MARK1(node))
6567
0
            len = INFINITE_LEN;
6568
0
          else {
6569
0
            ND_STATUS_ADD(node, MARK1);
6570
0
            len = node_max_byte_len(ND_BODY(node), env);
6571
0
            ND_STATUS_REMOVE(node, MARK1);
6572
6573
0
            en->max_len = len;
6574
0
            ND_STATUS_ADD(node, FIXED_MAX);
6575
0
          }
6576
0
        }
6577
0
        break;
6578
6579
0
      case BAG_OPTION:
6580
0
      case BAG_STOP_BACKTRACK:
6581
0
        len = node_max_byte_len(ND_BODY(node), env);
6582
0
        break;
6583
0
      case BAG_IF_ELSE:
6584
0
        {
6585
0
          OnigLen tlen, elen;
6586
6587
0
          len = node_max_byte_len(ND_BODY(node), env);
6588
0
          if (IS_NOT_NULL(en->te.Then)) {
6589
0
            tlen = node_max_byte_len(en->te.Then, env);
6590
0
            len = distance_add(len, tlen);
6591
0
          }
6592
0
          if (IS_NOT_NULL(en->te.Else))
6593
0
            elen = node_max_byte_len(en->te.Else, env);
6594
0
          else elen = 0;
6595
6596
0
          if (elen > len) len = elen;
6597
0
        }
6598
0
        break;
6599
0
      }
6600
0
    }
6601
0
    break;
6602
6603
0
  case ND_ANCHOR:
6604
0
  case ND_GIMMICK:
6605
0
  default:
6606
0
    break;
6607
0
  }
6608
6609
0
  return len;
6610
0
}
6611
6612
0
#define MAX_ND_OPT_INFO_REF_COUNT    5
6613
6614
static int
6615
optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
6616
0
{
6617
0
  int i;
6618
0
  int r;
6619
0
  OptNode xo;
6620
0
  OnigEncoding enc;
6621
6622
0
  r = 0;
6623
0
  enc = env->enc;
6624
0
  clear_node_opt_info(opt);
6625
0
  set_bound_node_opt_info(opt, &env->mm);
6626
6627
0
  switch (ND_TYPE(node)) {
6628
0
  case ND_LIST:
6629
0
    {
6630
0
      OptEnv nenv;
6631
0
      Node* nd = node;
6632
6633
0
      copy_opt_env(&nenv, env);
6634
0
      do {
6635
0
        r = optimize_nodes(ND_CAR(nd), &xo, &nenv);
6636
0
        if (r == 0) {
6637
0
          mml_add(&nenv.mm, &xo.len);
6638
0
          concat_left_node_opt_info(enc, opt, &xo);
6639
0
        }
6640
0
      } while (r == 0 && IS_NOT_NULL(nd = ND_CDR(nd)));
6641
0
    }
6642
0
    break;
6643
6644
0
  case ND_ALT:
6645
0
    {
6646
0
      Node* nd = node;
6647
6648
0
      do {
6649
0
        r = optimize_nodes(ND_CAR(nd), &xo, env);
6650
0
        if (r == 0) {
6651
0
          if (nd == node) copy_node_opt_info(opt, &xo);
6652
0
          else            alt_merge_node_opt_info(opt, &xo, env);
6653
0
        }
6654
0
      } while ((r == 0) && IS_NOT_NULL(nd = ND_CDR(nd)));
6655
0
    }
6656
0
    break;
6657
6658
0
  case ND_STRING:
6659
0
    {
6660
0
      StrNode* sn = STR_(node);
6661
0
      int slen = (int )(sn->end - sn->s);
6662
6663
0
      concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
6664
0
      if (slen > 0) {
6665
0
        add_char_opt_map(&opt->map, *(sn->s), enc);
6666
0
      }
6667
0
      mml_set_min_max(&opt->len, slen, slen);
6668
0
    }
6669
0
    break;
6670
6671
0
  case ND_CCLASS:
6672
0
    {
6673
0
      int z;
6674
0
      CClassNode* cc = CCLASS_(node);
6675
6676
      /* no need to check ignore case. (set in tune_tree()) */
6677
6678
0
      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
6679
0
        OnigLen min = ONIGENC_MBC_MINLEN(enc);
6680
0
        OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc);
6681
6682
0
        mml_set_min_max(&opt->len, min, max);
6683
0
      }
6684
0
      else {
6685
0
        for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
6686
0
          z = BITSET_AT(cc->bs, i);
6687
0
          if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) {
6688
0
            add_char_opt_map(&opt->map, (UChar )i, enc);
6689
0
          }
6690
0
        }
6691
0
        mml_set_min_max(&opt->len, 1, 1);
6692
0
      }
6693
0
    }
6694
0
    break;
6695
6696
0
  case ND_CTYPE:
6697
0
    {
6698
0
      int min, max;
6699
0
      int range;
6700
6701
0
      max = ONIGENC_MBC_MAXLEN_DIST(enc);
6702
6703
0
      if (max == 1) {
6704
0
        min = 1;
6705
6706
0
        switch (CTYPE_(node)->ctype) {
6707
0
        case CTYPE_ANYCHAR:
6708
0
          break;
6709
6710
0
        case ONIGENC_CTYPE_WORD:
6711
0
          range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE;
6712
0
          if (CTYPE_(node)->not != 0) {
6713
0
            for (i = 0; i < range; i++) {
6714
0
              if (! ONIGENC_IS_CODE_WORD(enc, i)) {
6715
0
                add_char_opt_map(&opt->map, (UChar )i, enc);
6716
0
              }
6717
0
            }
6718
0
            for (i = range; i < SINGLE_BYTE_SIZE; i++) {
6719
0
              add_char_opt_map(&opt->map, (UChar )i, enc);
6720
0
            }
6721
0
          }
6722
0
          else {
6723
0
            for (i = 0; i < range; i++) {
6724
0
              if (ONIGENC_IS_CODE_WORD(enc, i)) {
6725
0
                add_char_opt_map(&opt->map, (UChar )i, enc);
6726
0
              }
6727
0
            }
6728
0
          }
6729
0
          break;
6730
0
        }
6731
0
      }
6732
0
      else {
6733
0
        min = ONIGENC_MBC_MINLEN(enc);
6734
0
      }
6735
0
      mml_set_min_max(&opt->len, min, max);
6736
0
    }
6737
0
    break;
6738
6739
0
  case ND_ANCHOR:
6740
0
    switch (ANCHOR_(node)->type) {
6741
0
    case ANCR_BEGIN_BUF:
6742
0
    case ANCR_BEGIN_POSITION:
6743
0
    case ANCR_BEGIN_LINE:
6744
0
    case ANCR_END_BUF:
6745
0
    case ANCR_SEMI_END_BUF:
6746
0
    case ANCR_END_LINE:
6747
0
    case ANCR_PREC_READ_NOT:
6748
0
    case ANCR_LOOK_BEHIND:
6749
0
      add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
6750
0
      break;
6751
6752
0
    case ANCR_PREC_READ:
6753
0
      {
6754
0
        r = optimize_nodes(ND_BODY(node), &xo, env);
6755
0
        if (r == 0) {
6756
0
          if (xo.sb.len > 0)
6757
0
            copy_opt_exact(&opt->spr, &xo.sb);
6758
0
          else if (xo.sm.len > 0)
6759
0
            copy_opt_exact(&opt->spr, &xo.sm);
6760
6761
0
          opt->spr.reach_end = 0;
6762
6763
0
          if (xo.map.value > 0)
6764
0
            copy_opt_map(&opt->map, &xo.map);
6765
0
        }
6766
0
      }
6767
0
      break;
6768
6769
0
    case ANCR_LOOK_BEHIND_NOT:
6770
0
      break;
6771
0
    }
6772
0
    break;
6773
6774
0
  case ND_BACKREF:
6775
0
    if (! ND_IS_CHECKER(node)) {
6776
0
      OnigLen min, max;
6777
6778
0
      min = node_min_byte_len(node, env->scan_env);
6779
0
      max = node_max_byte_len(node, env->scan_env);
6780
0
      mml_set_min_max(&opt->len, min, max);
6781
0
    }
6782
0
    break;
6783
6784
0
#ifdef USE_CALL
6785
0
  case ND_CALL:
6786
0
    if (ND_IS_RECURSION(node))
6787
0
      mml_set_min_max(&opt->len, 0, INFINITE_LEN);
6788
0
    else {
6789
0
      r = optimize_nodes(ND_BODY(node), opt, env);
6790
0
    }
6791
0
    break;
6792
0
#endif
6793
6794
0
  case ND_QUANT:
6795
0
    {
6796
0
      OnigLen min, max;
6797
0
      QuantNode* qn = QUANT_(node);
6798
6799
      /* Issue #175
6800
         ex. /\g<1>{0}(?<=|())/
6801
6802
         Empty and unused nodes in look-behind is removed in
6803
         tune_look_behind().
6804
         Called group nodes are assigned to be not called if the caller side is
6805
         inside of zero-repetition.
6806
         As a result, the nodes are considered unused.
6807
       */
6808
0
      if (qn->upper == 0) {
6809
0
        mml_set_min_max(&opt->len, 0, 0);
6810
0
        break;
6811
0
      }
6812
6813
0
      r = optimize_nodes(ND_BODY(node), &xo, env);
6814
0
      if (r != 0) break;
6815
6816
0
      if (qn->lower > 0) {
6817
0
        copy_node_opt_info(opt, &xo);
6818
0
        if (xo.sb.len > 0) {
6819
0
          if (xo.sb.reach_end) {
6820
0
            for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->sb); i++) {
6821
0
              int rc = concat_opt_exact(&opt->sb, &xo.sb, enc);
6822
0
              if (rc > 0) break;
6823
0
            }
6824
0
            if (i < qn->lower) opt->sb.reach_end = 0;
6825
0
          }
6826
0
        }
6827
6828
0
        if (qn->lower != qn->upper) {
6829
0
          opt->sb.reach_end = 0;
6830
0
          opt->sm.reach_end = 0;
6831
0
        }
6832
0
        if (qn->lower > 1)
6833
0
          opt->sm.reach_end = 0;
6834
0
      }
6835
6836
0
      if (IS_INFINITE_REPEAT(qn->upper)) {
6837
0
        if (env->mm.max == 0 &&
6838
0
            ND_IS_ANYCHAR(ND_BODY(node)) && qn->greedy != 0) {
6839
0
          if (ND_IS_MULTILINE(ND_QUANT_BODY(qn)))
6840
0
            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
6841
0
          else
6842
0
            add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF);
6843
0
        }
6844
6845
0
        max = (xo.len.max > 0 ? INFINITE_LEN : 0);
6846
0
      }
6847
0
      else {
6848
0
        max = distance_multiply(xo.len.max, qn->upper);
6849
0
      }
6850
6851
0
      min = distance_multiply(xo.len.min, qn->lower);
6852
0
      mml_set_min_max(&opt->len, min, max);
6853
0
    }
6854
0
    break;
6855
6856
0
  case ND_BAG:
6857
0
    {
6858
0
      BagNode* en = BAG_(node);
6859
6860
0
      switch (en->type) {
6861
0
      case BAG_STOP_BACKTRACK:
6862
0
      case BAG_OPTION:
6863
0
        r = optimize_nodes(ND_BODY(node), opt, env);
6864
0
        break;
6865
6866
0
      case BAG_MEMORY:
6867
0
#ifdef USE_CALL
6868
0
        en->opt_count++;
6869
0
        if (en->opt_count > MAX_ND_OPT_INFO_REF_COUNT) {
6870
0
          OnigLen min, max;
6871
6872
0
          min = 0;
6873
0
          max = INFINITE_LEN;
6874
0
          if (ND_IS_FIXED_MIN(node)) min = en->min_len;
6875
0
          if (ND_IS_FIXED_MAX(node)) max = en->max_len;
6876
0
          mml_set_min_max(&opt->len, min, max);
6877
0
        }
6878
0
        else
6879
0
#endif
6880
0
          {
6881
0
            r = optimize_nodes(ND_BODY(node), opt, env);
6882
0
            if (is_set_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK)) {
6883
0
              if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
6884
0
                remove_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_MASK);
6885
0
            }
6886
0
          }
6887
0
        break;
6888
6889
0
      case BAG_IF_ELSE:
6890
0
        {
6891
0
          OptEnv nenv;
6892
6893
0
          if (IS_NOT_NULL(en->te.Else)) {
6894
0
            copy_opt_env(&nenv, env);
6895
0
            r = optimize_nodes(ND_BAG_BODY(en), &xo, &nenv);
6896
0
            if (r == 0) {
6897
0
              mml_add(&nenv.mm, &xo.len);
6898
0
              concat_left_node_opt_info(enc, opt, &xo);
6899
0
              if (IS_NOT_NULL(en->te.Then)) {
6900
0
                r = optimize_nodes(en->te.Then, &xo, &nenv);
6901
0
                if (r == 0) {
6902
0
                  concat_left_node_opt_info(enc, opt, &xo);
6903
0
                }
6904
0
              }
6905
6906
0
                r = optimize_nodes(en->te.Else, &xo, env);
6907
0
                if (r == 0)
6908
0
                  alt_merge_node_opt_info(opt, &xo, env);
6909
0
            }
6910
0
          }
6911
0
        }
6912
0
        break;
6913
0
      }
6914
0
    }
6915
0
    break;
6916
6917
0
  case ND_GIMMICK:
6918
0
    break;
6919
6920
0
  default:
6921
#ifdef ONIG_DEBUG
6922
    fprintf(DBGFP, "optimize_nodes: undefined node type %d\n", ND_TYPE(node));
6923
#endif
6924
0
    r = ONIGERR_TYPE_BUG;
6925
0
    break;
6926
0
  }
6927
6928
0
  return r;
6929
0
}
6930
6931
static int
6932
set_optimize_exact(regex_t* reg, OptStr* e)
6933
0
{
6934
0
  int r;
6935
0
  int allow_reverse;
6936
6937
0
  if (e->len == 0) return 0;
6938
6939
0
  reg->exact = (UChar* )xmalloc(e->len);
6940
0
  CHECK_NULL_RETURN_MEMERR(reg->exact);
6941
0
  xmemcpy(reg->exact, e->s, e->len);
6942
0
  reg->exact_end = reg->exact + e->len;
6943
6944
0
  allow_reverse =
6945
0
    ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
6946
6947
0
  if (e->len >= 2 || (e->len >= 1 && allow_reverse)) {
6948
0
    r = set_sunday_quick_search_or_bmh_skip_table(reg, 0,
6949
0
                                                  reg->exact, reg->exact_end,
6950
0
                                                  reg->map, &(reg->map_offset));
6951
0
    if (r != 0) return r;
6952
6953
0
    reg->optimize = (allow_reverse != 0
6954
0
                     ? OPTIMIZE_STR_FAST
6955
0
                     : OPTIMIZE_STR_FAST_STEP_FORWARD);
6956
0
  }
6957
0
  else {
6958
0
    reg->optimize = OPTIMIZE_STR;
6959
0
  }
6960
6961
0
  reg->dist_min = e->mm.min;
6962
0
  reg->dist_max = e->mm.max;
6963
6964
0
  if (reg->dist_min != INFINITE_LEN) {
6965
0
    int n = (int )(reg->exact_end - reg->exact);
6966
0
    reg->threshold_len = reg->dist_min + n;
6967
0
  }
6968
6969
0
  return 0;
6970
0
}
6971
6972
static void
6973
set_optimize_map(regex_t* reg, OptMap* m)
6974
0
{
6975
0
  int i;
6976
6977
0
  for (i = 0; i < CHAR_MAP_SIZE; i++)
6978
0
    reg->map[i] = m->map[i];
6979
6980
0
  reg->optimize   = OPTIMIZE_MAP;
6981
0
  reg->dist_min   = m->mm.min;
6982
0
  reg->dist_max   = m->mm.max;
6983
6984
0
  if (reg->dist_min != INFINITE_LEN) {
6985
0
    reg->threshold_len = reg->dist_min + ONIGENC_MBC_MINLEN(reg->enc);
6986
0
  }
6987
0
}
6988
6989
static void
6990
set_sub_anchor(regex_t* reg, OptAnc* anc)
6991
0
{
6992
0
  reg->sub_anchor |= anc->left  & ANCR_BEGIN_LINE;
6993
0
  reg->sub_anchor |= anc->right & ANCR_END_LINE;
6994
0
}
6995
6996
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
6997
static void print_optimize_info(FILE* f, regex_t* reg);
6998
#endif
6999
7000
static int
7001
set_optimize_info_from_tree(Node* node, regex_t* reg, ParseEnv* scan_env)
7002
0
{
7003
0
  int r;
7004
0
  OptNode opt;
7005
0
  OptEnv env;
7006
7007
0
  env.enc            = reg->enc;
7008
0
  env.case_fold_flag = reg->case_fold_flag;
7009
0
  env.scan_env       = scan_env;
7010
0
  mml_clear(&env.mm);
7011
7012
0
  r = optimize_nodes(node, &opt, &env);
7013
0
  if (r != 0) return r;
7014
7015
0
  reg->anchor = opt.anc.left & (ANCR_BEGIN_BUF |
7016
0
        ANCR_BEGIN_POSITION | ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML |
7017
0
        ANCR_LOOK_BEHIND);
7018
7019
0
  if ((opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0)
7020
0
    reg->anchor &= ~ANCR_ANYCHAR_INF_ML;
7021
7022
0
  reg->anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF |
7023
0
                                  ANCR_PREC_READ_NOT);
7024
7025
0
  if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
7026
0
    reg->anc_dist_min = opt.len.min;
7027
0
    reg->anc_dist_max = opt.len.max;
7028
0
  }
7029
7030
0
  if (opt.sb.len > 0 || opt.sm.len > 0) {
7031
0
    select_opt_exact(reg->enc, &opt.sb, &opt.sm);
7032
0
    if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0) {
7033
0
      goto set_map;
7034
0
    }
7035
0
    else {
7036
0
      r = set_optimize_exact(reg, &opt.sb);
7037
0
      set_sub_anchor(reg, &opt.sb.anc);
7038
0
    }
7039
0
  }
7040
0
  else if (opt.map.value > 0) {
7041
0
  set_map:
7042
0
    set_optimize_map(reg, &opt.map);
7043
0
    set_sub_anchor(reg, &opt.map.anc);
7044
0
  }
7045
0
  else {
7046
0
    reg->sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
7047
0
    if (opt.len.max == 0)
7048
0
      reg->sub_anchor |= opt.anc.right & ANCR_END_LINE;
7049
0
  }
7050
7051
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
7052
  print_optimize_info(DBGFP, reg);
7053
#endif
7054
0
  return r;
7055
0
}
7056
#endif /* ONIG_DONT_OPTIMIZE */
7057
7058
static void
7059
clear_optimize_info(regex_t* reg)
7060
0
{
7061
0
  reg->optimize      = OPTIMIZE_NONE;
7062
0
  reg->anchor        = 0;
7063
0
  reg->anc_dist_min  = 0;
7064
0
  reg->anc_dist_max  = 0;
7065
0
  reg->sub_anchor    = 0;
7066
0
  reg->exact_end     = (UChar* )NULL;
7067
0
  reg->map_offset    = 0;
7068
0
  reg->threshold_len = 0;
7069
0
  if (IS_NOT_NULL(reg->exact)) {
7070
0
    xfree(reg->exact);
7071
0
    reg->exact = (UChar* )NULL;
7072
0
  }
7073
0
}
7074
7075
#if defined(ONIG_DEBUG_PARSE)  || defined(ONIG_DEBUG_MATCH) || \
7076
    defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE)
7077
7078
static void
7079
print_enc_string(FILE* fp, OnigEncoding enc,
7080
                 const UChar *s, const UChar *end)
7081
{
7082
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
7083
    const UChar *p;
7084
    OnigCodePoint code;
7085
7086
    p = s;
7087
    while (p < end) {
7088
      code = ONIGENC_MBC_TO_CODE(enc, p, end);
7089
      if (code >= 0x80) {
7090
        fprintf(fp, " 0x%04x ", (int )code);
7091
      }
7092
      else {
7093
        fputc((int )code, fp);
7094
      }
7095
7096
      p += enclen(enc, p);
7097
    }
7098
  }
7099
  else {
7100
    while (s < end) {
7101
      if (ONIGENC_MBC_MAXLEN(enc) == 1) {
7102
        if (*s >= 0x80) {
7103
          fprintf(fp, "\\x%02x", (unsigned int )*s);
7104
        }
7105
        else {
7106
          fputc((int )*s, fp);
7107
        }
7108
      }
7109
      else { /* for UTF-8 */
7110
        fputc((int )*s, fp);
7111
      }
7112
      s++;
7113
    }
7114
  }
7115
}
7116
7117
static void
7118
print_options(FILE* fp, OnigOptionType o)
7119
{
7120
  if ((o & ONIG_OPTION_IGNORECASE) != 0)      fprintf(fp, " IGNORECASE");
7121
  if ((o & ONIG_OPTION_EXTEND) != 0)          fprintf(fp, " EXTEND");
7122
  if ((o & ONIG_OPTION_MULTILINE) != 0)       fprintf(fp, " MULTILINE");
7123
  if ((o & ONIG_OPTION_SINGLELINE) != 0)      fprintf(fp, " SINGLELINE");
7124
  if ((o & ONIG_OPTION_FIND_LONGEST) != 0)    fprintf(fp, " FIND_LONGEST");
7125
  if ((o & ONIG_OPTION_FIND_NOT_EMPTY) != 0)  fprintf(fp, " FIND_NOT_EMPTY");
7126
  if ((o & ONIG_OPTION_NEGATE_SINGLELINE) != 0)  fprintf(fp, " NEGATE_SINGLELINE");
7127
  if ((o & ONIG_OPTION_DONT_CAPTURE_GROUP) != 0) fprintf(fp, " DONT_CAPTURE_GROUP");
7128
  if ((o & ONIG_OPTION_CAPTURE_GROUP) != 0)   fprintf(fp, " CAPTURE_GROUP");
7129
  if ((o & ONIG_OPTION_NOTBOL) != 0)          fprintf(fp, " NOTBOL");
7130
  if ((o & ONIG_OPTION_NOTEOL) != 0)          fprintf(fp, " NOTEOL");
7131
  if ((o & ONIG_OPTION_POSIX_REGION) != 0)    fprintf(fp, " POSIX_REGION");
7132
  if ((o & ONIG_OPTION_CHECK_VALIDITY_OF_STRING) != 0) fprintf(fp, " CHECK_VALIDITY_OF_STRING");
7133
  if ((o & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) fprintf(fp, " IGNORECASE_IS_ASCII");
7134
  if ((o & ONIG_OPTION_WORD_IS_ASCII) != 0)   fprintf(fp, " WORD_IS_ASCII");
7135
  if ((o & ONIG_OPTION_DIGIT_IS_ASCII) != 0)  fprintf(fp, " DIGIT_IS_ASCII");
7136
  if ((o & ONIG_OPTION_SPACE_IS_ASCII) != 0)  fprintf(fp, " SPACE_IS_ASCII");
7137
  if ((o & ONIG_OPTION_POSIX_IS_ASCII) != 0)  fprintf(fp, " POSIX_IS_ASCII");
7138
  if ((o & ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER) != 0) fprintf(fp, " TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER");
7139
  if ((o & ONIG_OPTION_TEXT_SEGMENT_WORD) != 0) fprintf(fp, " TEXT_SEGMENT_WORD");
7140
  if ((o & ONIG_OPTION_NOT_BEGIN_STRING) != 0) fprintf(fp, " NOT_BIGIN_STRING");
7141
  if ((o & ONIG_OPTION_NOT_END_STRING) != 0)   fprintf(fp, " NOT_END_STRING");
7142
  if ((o & ONIG_OPTION_NOT_BEGIN_POSITION) != 0) fprintf(fp, " NOT_BEGIN_POSITION");
7143
  if ((o & ONIG_OPTION_CALLBACK_EACH_MATCH) != 0) fprintf(fp, " CALLBACK_EACH_MATCH");
7144
}
7145
7146
#endif
7147
7148
#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
7149
7150
#ifndef ONIG_DONT_OPTIMIZE
7151
7152
static void
7153
print_distance_range(FILE* f, OnigLen a, OnigLen b)
7154
{
7155
  if (a == INFINITE_LEN)
7156
    fputs("inf", f);
7157
  else
7158
    fprintf(f, "(%u)", a);
7159
7160
  fputs("-", f);
7161
7162
  if (b == INFINITE_LEN)
7163
    fputs("inf", f);
7164
  else
7165
    fprintf(f, "(%u)", b);
7166
}
7167
7168
static void
7169
print_anchor(FILE* f, int anchor)
7170
{
7171
  int q = 0;
7172
7173
  fprintf(f, "[");
7174
7175
  if (anchor & ANCR_BEGIN_BUF) {
7176
    fprintf(f, "begin-buf");
7177
    q = 1;
7178
  }
7179
  if (anchor & ANCR_BEGIN_LINE) {
7180
    if (q) fprintf(f, ", ");
7181
    q = 1;
7182
    fprintf(f, "begin-line");
7183
  }
7184
  if (anchor & ANCR_BEGIN_POSITION) {
7185
    if (q) fprintf(f, ", ");
7186
    q = 1;
7187
    fprintf(f, "begin-pos");
7188
  }
7189
  if (anchor & ANCR_END_BUF) {
7190
    if (q) fprintf(f, ", ");
7191
    q = 1;
7192
    fprintf(f, "end-buf");
7193
  }
7194
  if (anchor & ANCR_SEMI_END_BUF) {
7195
    if (q) fprintf(f, ", ");
7196
    q = 1;
7197
    fprintf(f, "semi-end-buf");
7198
  }
7199
  if (anchor & ANCR_END_LINE) {
7200
    if (q) fprintf(f, ", ");
7201
    q = 1;
7202
    fprintf(f, "end-line");
7203
  }
7204
  if (anchor & ANCR_ANYCHAR_INF) {
7205
    if (q) fprintf(f, ", ");
7206
    q = 1;
7207
    fprintf(f, "anychar-inf");
7208
  }
7209
  if (anchor & ANCR_ANYCHAR_INF_ML) {
7210
    if (q) fprintf(f, ", ");
7211
    fprintf(f, "anychar-inf-ml");
7212
  }
7213
7214
  fprintf(f, "]");
7215
}
7216
7217
static void
7218
print_optimize_info(FILE* f, regex_t* reg)
7219
{
7220
  static const char* on[] =
7221
    { "NONE", "STR", "STR_FAST", "STR_FAST_STEP_FORWARD", "MAP" };
7222
7223
  fprintf(f, "optimize: %s\n", on[reg->optimize]);
7224
  fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
7225
  if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
7226
    print_distance_range(f, reg->anc_dist_min, reg->anc_dist_max);
7227
  fprintf(f, "\n");
7228
7229
  if (reg->optimize) {
7230
    fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
7231
    fprintf(f, "\n");
7232
  }
7233
  fprintf(f, "\n");
7234
7235
  if (reg->exact) {
7236
    UChar *p;
7237
    fprintf(f, "exact: [");
7238
    for (p = reg->exact; p < reg->exact_end; p++) {
7239
      fputc(*p, f);
7240
    }
7241
    fprintf(f, "]: length: %ld, dmin: %u, ",
7242
            (reg->exact_end - reg->exact), reg->dist_min);
7243
    if (reg->dist_max == INFINITE_LEN)
7244
      fprintf(f, "dmax: inf.\n");
7245
    else
7246
      fprintf(f, "dmax: %u\n", reg->dist_max);
7247
  }
7248
  else if (reg->optimize & OPTIMIZE_MAP) {
7249
    int c, i, n = 0;
7250
7251
    for (i = 0; i < CHAR_MAP_SIZE; i++)
7252
      if (reg->map[i]) n++;
7253
7254
    fprintf(f, "map: n=%d, dmin: %u, dmax: %u\n",
7255
            n, reg->dist_min, reg->dist_max);
7256
    if (n > 0) {
7257
      c = 0;
7258
      fputc('[', f);
7259
      for (i = 0; i < CHAR_MAP_SIZE; i++) {
7260
        if (reg->map[i] != 0) {
7261
          if (c > 0)  fputs(", ", f);
7262
          c++;
7263
          if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
7264
              ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
7265
            fputc(i, f);
7266
          else
7267
            fprintf(f, "0x%02x", i);
7268
        }
7269
      }
7270
      fprintf(f, "]\n");
7271
    }
7272
  }
7273
}
7274
#endif /* ONIG_DONT_OPTIMIZE */
7275
#endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
7276
7277
7278
extern RegexExt*
7279
onig_get_regex_ext(regex_t* reg)
7280
0
{
7281
0
  if (IS_NULL(reg->extp)) {
7282
0
    RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext));
7283
0
    if (IS_NULL(ext)) return 0;
7284
7285
0
    ext->pattern      = 0;
7286
0
    ext->pattern_end  = 0;
7287
0
#ifdef USE_CALLOUT
7288
0
    ext->tag_table    = 0;
7289
0
    ext->callout_num  = 0;
7290
0
    ext->callout_list_alloc = 0;
7291
0
    ext->callout_list = 0;
7292
0
#endif
7293
7294
0
    reg->extp = ext;
7295
0
  }
7296
7297
0
  return reg->extp;
7298
0
}
7299
7300
static void
7301
free_regex_ext(RegexExt* ext)
7302
0
{
7303
0
  if (IS_NOT_NULL(ext)) {
7304
0
    if (IS_NOT_NULL(ext->pattern))
7305
0
      xfree((void* )ext->pattern);
7306
7307
0
#ifdef USE_CALLOUT
7308
0
    if (IS_NOT_NULL(ext->tag_table))
7309
0
      onig_callout_tag_table_free(ext->tag_table);
7310
7311
0
    if (IS_NOT_NULL(ext->callout_list))
7312
0
      onig_free_reg_callout_list(ext->callout_num, ext->callout_list);
7313
0
#endif
7314
7315
0
    xfree(ext);
7316
0
  }
7317
0
}
7318
7319
extern int
7320
onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end)
7321
0
{
7322
0
  RegexExt* ext;
7323
0
  UChar* s;
7324
7325
0
  ext = onig_get_regex_ext(reg);
7326
0
  CHECK_NULL_RETURN_MEMERR(ext);
7327
7328
0
  s = onigenc_strdup(reg->enc, pattern, pattern_end);
7329
0
  CHECK_NULL_RETURN_MEMERR(s);
7330
7331
0
  ext->pattern     = s;
7332
0
  ext->pattern_end = s + (pattern_end - pattern);
7333
7334
0
  return ONIG_NORMAL;
7335
0
}
7336
7337
extern void
7338
onig_free_body(regex_t* reg)
7339
0
{
7340
0
  if (IS_NOT_NULL(reg)) {
7341
0
    ops_free(reg);
7342
0
    if (IS_NOT_NULL(reg->string_pool)) {
7343
0
      xfree(reg->string_pool);
7344
0
      reg->string_pool_end = reg->string_pool = 0;
7345
0
    }
7346
0
    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
7347
0
    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
7348
0
    if (IS_NOT_NULL(reg->extp)) {
7349
0
      free_regex_ext(reg->extp);
7350
0
      reg->extp = 0;
7351
0
    }
7352
7353
0
    onig_names_free(reg);
7354
0
  }
7355
0
}
7356
7357
extern void
7358
onig_free(regex_t* reg)
7359
0
{
7360
0
  if (IS_NOT_NULL(reg)) {
7361
0
    onig_free_body(reg);
7362
0
    xfree(reg);
7363
0
  }
7364
0
}
7365
7366
7367
#ifdef ONIG_DEBUG_PARSE
7368
static void print_tree P_((FILE* f, Node* node));
7369
#endif
7370
7371
extern int onig_init_for_match_at(regex_t* reg);
7372
7373
static int parse_and_tune(regex_t* reg, const UChar* pattern,
7374
  const UChar* pattern_end, ParseEnv *scan_env, Node** rroot,
7375
  OnigErrorInfo* einfo
7376
#ifdef USE_CALL
7377
  , UnsetAddrList* uslist
7378
#endif
7379
)
7380
0
{
7381
0
  int r;
7382
0
  Node* root;
7383
7384
0
  root = NULL_NODE;
7385
0
  if (IS_NOT_NULL(einfo)) {
7386
0
    einfo->enc = reg->enc;
7387
0
    einfo->par = (UChar* )NULL;
7388
0
  }
7389
7390
0
  r = onig_parse_tree(&root, pattern, pattern_end, reg, scan_env);
7391
0
  if (r != 0) goto err;
7392
7393
0
#ifdef USE_WHOLE_OPTIONS
7394
0
  if ((scan_env->flags & PE_FLAG_HAS_WHOLE_OPTIONS) != 0) {
7395
0
    r = check_whole_options_position(root);
7396
0
    if (r != 0) goto err;
7397
0
  }
7398
0
#endif
7399
7400
0
  r = reduce_string_list(root, reg->enc);
7401
0
  if (r != 0) goto err;
7402
7403
  /* mixed use named group and no-named group */
7404
0
  if (scan_env->num_named > 0 &&
7405
0
      IS_SYNTAX_BV(scan_env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
7406
0
      ! OPTON_CAPTURE_GROUP(reg->options)) {
7407
0
    if (scan_env->num_named != scan_env->num_mem)
7408
0
      r = disable_noname_group_capture(&root, reg, scan_env);
7409
0
    else
7410
0
      r = numbered_ref_check(root);
7411
7412
0
    if (r != 0) goto err;
7413
0
  }
7414
7415
0
  r = check_backrefs(root, scan_env);
7416
0
  if (r != 0) goto err;
7417
7418
0
#ifdef USE_CALL
7419
0
  if (scan_env->num_call > 0) {
7420
0
    r = unset_addr_list_init(uslist, scan_env->num_call);
7421
0
    if (r != 0) goto err;
7422
0
    scan_env->unset_addr_list = uslist;
7423
0
    r = tune_call(root, scan_env, 0);
7424
0
    if (r != 0) goto err_unset;
7425
0
    r = tune_call2(root);
7426
0
    if (r != 0) goto err_unset;
7427
0
    r = recursive_call_check_trav(root, scan_env, 0);
7428
0
    if (r  < 0) goto err_unset;
7429
0
    r = infinite_recursive_call_check_trav(root, scan_env);
7430
0
    if (r != 0) goto err_unset;
7431
7432
0
    tune_called_state(root, 0);
7433
0
  }
7434
7435
0
  reg->num_call = scan_env->num_call;
7436
0
#endif
7437
7438
#ifdef ONIG_DEBUG_PARSE
7439
  fprintf(DBGFP, "MAX PARSE DEPTH: %d\n", scan_env->max_parse_depth);
7440
#endif
7441
7442
0
  r = tune_tree(root, reg, 0, scan_env);
7443
0
  if (r != 0) {
7444
#ifdef ONIG_DEBUG_PARSE
7445
    fprintf(DBGFP, "TREE (error in tune)\n");
7446
    print_tree(DBGFP, root);
7447
    fprintf(DBGFP, "\n");
7448
#endif
7449
0
    goto err_unset;
7450
0
  }
7451
7452
0
  if (scan_env->backref_num != 0) {
7453
0
    set_parent_node_trav(root, NULL_NODE);
7454
0
    r = set_empty_repeat_node_trav(root, NULL_NODE, scan_env);
7455
0
    if (r != 0) goto err_unset;
7456
0
    set_empty_status_check_trav(root, scan_env);
7457
0
  }
7458
7459
0
  *rroot = root;
7460
0
  return r;
7461
7462
0
 err_unset:
7463
0
#ifdef USE_CALL
7464
0
  if (scan_env->num_call > 0) {
7465
0
    unset_addr_list_end(uslist);
7466
0
  }
7467
0
#endif
7468
0
 err:
7469
0
  if (IS_NOT_NULL(scan_env->error)) {
7470
0
    if (IS_NOT_NULL(einfo)) {
7471
0
      einfo->par     = scan_env->error;
7472
0
      einfo->par_end = scan_env->error_end;
7473
0
    }
7474
0
  }
7475
7476
0
  onig_node_free(root);
7477
0
  if (IS_NOT_NULL(scan_env->mem_env_dynamic))
7478
0
    xfree(scan_env->mem_env_dynamic);
7479
7480
0
  *rroot = NULL_NODE;
7481
0
  return r;
7482
0
}
7483
7484
extern int
7485
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
7486
             OnigErrorInfo* einfo)
7487
0
{
7488
0
  int r;
7489
0
  Node* root;
7490
0
  ParseEnv scan_env;
7491
0
#ifdef USE_CALL
7492
0
  UnsetAddrList uslist = {0};
7493
0
#endif
7494
7495
#if defined(ONIG_DEBUG_PARSE)  || defined(ONIG_DEBUG_MATCH) || \
7496
    defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE)
7497
  fprintf(DBGFP, "\nPATTERN: /");
7498
  print_enc_string(DBGFP, reg->enc, pattern, pattern_end);
7499
  fprintf(DBGFP, "/\n");
7500
  fprintf(DBGFP, "OPTIONS:");
7501
  print_options(DBGFP, reg->options);
7502
  fprintf(DBGFP, "\n");
7503
#endif
7504
7505
0
  if (reg->ops_alloc == 0) {
7506
0
    r = ops_init(reg, OPS_INIT_SIZE);
7507
0
    if (r != 0) {
7508
0
      if (IS_NOT_NULL(einfo)) {
7509
0
        einfo->enc = reg->enc;
7510
0
        einfo->par = (UChar* )NULL;
7511
0
      }
7512
0
      return r;
7513
0
    }
7514
0
  }
7515
0
  else
7516
0
    reg->ops_used = 0;
7517
7518
0
  r = parse_and_tune(reg, pattern, pattern_end, &scan_env, &root, einfo
7519
0
#ifdef USE_CALL
7520
0
                     , &uslist
7521
0
#endif
7522
0
                    );
7523
0
  if (r != 0) return r;
7524
7525
#ifdef ONIG_DEBUG_PARSE
7526
  fprintf(DBGFP, "TREE (after tune)\n");
7527
  print_tree(DBGFP, root);
7528
  fprintf(DBGFP, "\n");
7529
#endif
7530
7531
0
  reg->capture_history = scan_env.cap_history;
7532
0
  reg->push_mem_start  = scan_env.backtrack_mem | scan_env.cap_history;
7533
7534
0
#ifdef USE_CALLOUT
7535
0
  if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) {
7536
0
    reg->push_mem_end = reg->push_mem_start;
7537
0
  }
7538
0
  else {
7539
0
    if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
7540
0
      reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
7541
0
    else
7542
0
      reg->push_mem_end = reg->push_mem_start &
7543
0
                        (scan_env.backrefed_mem | scan_env.cap_history);
7544
0
  }
7545
#else
7546
  if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
7547
    reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
7548
  else
7549
    reg->push_mem_end = reg->push_mem_start &
7550
                      (scan_env.backrefed_mem | scan_env.cap_history);
7551
#endif
7552
7553
0
  clear_optimize_info(reg);
7554
0
#ifndef ONIG_DONT_OPTIMIZE
7555
0
  r = set_optimize_info_from_tree(root, reg, &scan_env);
7556
0
  if (r != 0)  {
7557
0
#ifdef USE_CALL
7558
0
    if (scan_env.num_call > 0) {
7559
0
      unset_addr_list_end(&uslist);
7560
0
    }
7561
0
#endif
7562
0
    goto err;
7563
0
  }
7564
0
#endif
7565
7566
0
  if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
7567
0
    xfree(scan_env.mem_env_dynamic);
7568
0
    scan_env.mem_env_dynamic = (MemEnv* )NULL;
7569
0
  }
7570
7571
0
  r = compile_tree(root, reg, &scan_env);
7572
0
  if (r == 0) {
7573
0
    if (scan_env.keep_num > 0) {
7574
0
      r = add_op(reg, OP_UPDATE_VAR);
7575
0
      if (r != 0) goto err;
7576
7577
0
      COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;
7578
0
      COP(reg)->update_var.id   = 0; /* not used */
7579
0
      COP(reg)->update_var.clear = FALSE;
7580
0
    }
7581
7582
0
    r = add_op(reg, OP_END);
7583
0
    if (r != 0) goto err;
7584
7585
0
#ifdef USE_CALL
7586
0
    if (scan_env.num_call > 0) {
7587
0
      r = fix_unset_addr_list(&uslist, reg);
7588
0
      unset_addr_list_end(&uslist);
7589
0
      if (r != 0) goto err;
7590
0
    }
7591
0
#endif
7592
7593
0
    r = ops_resize(reg, reg->ops_used);
7594
0
    if (r != ONIG_NORMAL) goto err;
7595
7596
0
    set_addr_in_repeat_range(reg);
7597
7598
0
    if ((reg->push_mem_end != 0)
7599
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
7600
        || (reg->num_repeat      != 0)
7601
        || (reg->num_empty_check != 0)
7602
#endif
7603
0
#ifdef USE_CALLOUT
7604
0
        || (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
7605
0
#endif
7606
0
#ifdef USE_CALL
7607
0
        || scan_env.num_call > 0
7608
0
#endif
7609
0
        )
7610
0
      reg->stack_pop_level = STACK_POP_LEVEL_ALL;
7611
0
    else {
7612
0
      if (reg->push_mem_start != 0)
7613
0
        reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
7614
0
      else
7615
0
        reg->stack_pop_level = STACK_POP_LEVEL_FREE;
7616
0
    }
7617
7618
0
    r = ops_make_string_pool(reg);
7619
0
    if (r != 0) goto err;
7620
0
  }
7621
0
#ifdef USE_CALL
7622
0
  else if (scan_env.num_call > 0) {
7623
0
    unset_addr_list_end(&uslist);
7624
0
  }
7625
0
#endif
7626
0
  onig_node_free(root);
7627
7628
#ifdef ONIG_DEBUG_COMPILE
7629
  onig_print_names(DBGFP, reg);
7630
  onig_print_compiled_byte_code_list(DBGFP, reg);
7631
#endif
7632
7633
0
#ifdef USE_DIRECT_THREADED_CODE
7634
  /* opcode -> opaddr */
7635
0
  onig_init_for_match_at(reg);
7636
0
#endif
7637
7638
0
  return r;
7639
7640
0
 err:
7641
0
  if (IS_NOT_NULL(scan_env.error)) {
7642
0
    if (IS_NOT_NULL(einfo)) {
7643
0
      einfo->par     = scan_env.error;
7644
0
      einfo->par_end = scan_env.error_end;
7645
0
    }
7646
0
  }
7647
7648
0
  onig_node_free(root);
7649
0
  if (IS_NOT_NULL(scan_env.mem_env_dynamic))
7650
0
      xfree(scan_env.mem_env_dynamic);
7651
0
  return r;
7652
0
}
7653
7654
7655
static int onig_inited = 0;
7656
7657
extern int
7658
onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag,
7659
              OnigEncoding enc, OnigSyntaxType* syntax)
7660
0
{
7661
0
  int r;
7662
7663
0
  xmemset(reg, 0, sizeof(*reg));
7664
7665
0
  if (onig_inited == 0) {
7666
#if 0
7667
    return ONIGERR_LIBRARY_IS_NOT_INITIALIZED;
7668
#else
7669
0
    r = onig_initialize(&enc, 1);
7670
0
    if (r != 0)
7671
0
      return ONIGERR_FAIL_TO_INITIALIZE;
7672
7673
0
    onig_warning("You didn't call onig_initialize() explicitly");
7674
0
#endif
7675
0
  }
7676
7677
0
  if (IS_NULL(reg))
7678
0
    return ONIGERR_INVALID_ARGUMENT;
7679
7680
0
  if (ONIGENC_IS_UNDEF(enc))
7681
0
    return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
7682
7683
0
  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
7684
0
      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
7685
0
    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
7686
0
  }
7687
7688
0
  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
7689
0
    option |= syntax->options;
7690
0
    option &= ~ONIG_OPTION_SINGLELINE;
7691
0
  }
7692
0
  else
7693
0
    option |= syntax->options;
7694
7695
0
  if ((option & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) {
7696
0
    case_fold_flag &= ~(INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR |
7697
0
                        ONIGENC_CASE_FOLD_TURKISH_AZERI);
7698
0
    case_fold_flag |= ONIGENC_CASE_FOLD_ASCII_ONLY;
7699
0
  }
7700
7701
0
  (reg)->enc            = enc;
7702
0
  (reg)->options        = option;
7703
0
  (reg)->syntax         = syntax;
7704
0
  (reg)->optimize       = 0;
7705
0
  (reg)->exact          = (UChar* )NULL;
7706
0
  (reg)->extp           = (RegexExt* )NULL;
7707
0
  (reg)->ops            = (Operation* )NULL;
7708
0
  (reg)->ops_curr       = (Operation* )NULL;
7709
0
  (reg)->ops_used       = 0;
7710
0
  (reg)->ops_alloc      = 0;
7711
0
  (reg)->name_table     = (void* )NULL;
7712
0
  (reg)->case_fold_flag = case_fold_flag;
7713
0
  return 0;
7714
0
}
7715
7716
extern int
7717
onig_new_without_alloc(regex_t* reg,
7718
                       const UChar* pattern, const UChar* pattern_end,
7719
                       OnigOptionType option, OnigEncoding enc,
7720
                       OnigSyntaxType* syntax, OnigErrorInfo* einfo)
7721
0
{
7722
0
  int r;
7723
7724
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
7725
0
  if (r != 0) return r;
7726
7727
0
  r = onig_compile(reg, pattern, pattern_end, einfo);
7728
0
  return r;
7729
0
}
7730
7731
extern int
7732
onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
7733
         OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
7734
         OnigErrorInfo* einfo)
7735
0
{
7736
0
  int r;
7737
7738
0
  *reg = (regex_t* )xmalloc(sizeof(regex_t));
7739
0
  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
7740
7741
0
  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
7742
0
  if (r != 0) {
7743
0
    xfree(*reg);
7744
0
    *reg = NULL;
7745
0
    return r;
7746
0
  }
7747
7748
0
  r = onig_compile(*reg, pattern, pattern_end, einfo);
7749
0
  if (r != 0) {
7750
0
    onig_free(*reg);
7751
0
    *reg = NULL;
7752
0
  }
7753
0
  return r;
7754
0
}
7755
7756
extern int
7757
onig_initialize(OnigEncoding encodings[], int n)
7758
0
{
7759
0
  int i;
7760
0
  int r;
7761
7762
0
  if (onig_inited != 0)
7763
0
    return 0;
7764
7765
0
  onigenc_init();
7766
7767
0
  onig_inited = 1;
7768
7769
0
  for (i = 0; i < n; i++) {
7770
0
    OnigEncoding enc = encodings[i];
7771
0
    r = onig_initialize_encoding(enc);
7772
0
    if (r != 0)
7773
0
      return r;
7774
0
  }
7775
7776
0
  return ONIG_NORMAL;
7777
0
}
7778
7779
typedef struct EndCallListItem {
7780
  struct EndCallListItem* next;
7781
  void (*func)(void);
7782
} EndCallListItemType;
7783
7784
static EndCallListItemType* EndCallTop;
7785
7786
extern void onig_add_end_call(void (*func)(void))
7787
0
{
7788
0
  EndCallListItemType* item;
7789
7790
0
  item = (EndCallListItemType* )xmalloc(sizeof(*item));
7791
0
  if (item == 0) return ;
7792
7793
0
  item->next = EndCallTop;
7794
0
  item->func = func;
7795
7796
0
  EndCallTop = item;
7797
0
}
7798
7799
static void
7800
exec_end_call_list(void)
7801
0
{
7802
0
  EndCallListItemType* prev;
7803
0
  void (*func)(void);
7804
7805
0
  while (EndCallTop != 0) {
7806
0
    func = EndCallTop->func;
7807
0
    (*func)();
7808
7809
0
    prev = EndCallTop;
7810
0
    EndCallTop = EndCallTop->next;
7811
0
    xfree(prev);
7812
0
  }
7813
0
}
7814
7815
extern int
7816
onig_end(void)
7817
0
{
7818
0
  exec_end_call_list();
7819
7820
0
#ifdef USE_CALLOUT
7821
0
  onig_global_callout_names_free();
7822
0
#endif
7823
7824
0
  onigenc_end();
7825
7826
0
  onig_inited = 0;
7827
7828
0
  return 0;
7829
0
}
7830
7831
extern int
7832
onig_is_in_code_range(const UChar* p, OnigCodePoint code)
7833
0
{
7834
0
  OnigCodePoint n, *data;
7835
0
  OnigCodePoint low, high, x;
7836
7837
0
  GET_CODE_POINT(n, p);
7838
0
  data = (OnigCodePoint* )p;
7839
0
  data++;
7840
7841
0
  for (low = 0, high = n; low < high; ) {
7842
0
    x = (low + high) >> 1;
7843
0
    if (code > data[x * 2 + 1])
7844
0
      low = x + 1;
7845
0
    else
7846
0
      high = x;
7847
0
  }
7848
7849
0
  return ((low < n && code >= data[low * 2]) ? 1 : 0);
7850
0
}
7851
7852
extern int
7853
onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
7854
0
{
7855
0
  int found;
7856
0
  CClassNode* cc = (CClassNode* )cc_arg;
7857
7858
0
  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
7859
0
    if (IS_NULL(cc->mbuf)) {
7860
0
      found = 0;
7861
0
    }
7862
0
    else {
7863
0
      found = onig_is_in_code_range(cc->mbuf->p, code) != 0;
7864
0
    }
7865
0
  }
7866
0
  else {
7867
0
    found = BITSET_AT(cc->bs, code) != 0;
7868
0
  }
7869
7870
0
  if (IS_NCCLASS_NOT(cc))
7871
0
    return !found;
7872
0
  else
7873
0
    return found;
7874
0
}
7875
7876
extern int
7877
onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
7878
0
{
7879
0
  int len;
7880
7881
0
  if (ONIGENC_MBC_MINLEN(enc) > 1) {
7882
0
    len = 2;
7883
0
  }
7884
0
  else {
7885
0
    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
7886
0
    if (len < 0) return 0;
7887
0
  }
7888
0
  return onig_is_code_in_cc_len(len, code, cc);
7889
0
}
7890
7891
7892
0
#define MANY_REPEAT_OF_ANYCHAR   20
7893
7894
typedef enum {
7895
  MJ_NO     = 0,
7896
  MJ_YES    = 1,
7897
  MJ_IGNORE = 2,
7898
} MJ_RESULT;
7899
7900
static MJ_RESULT
7901
mostly_just_anychar(Node* node, int in_reluctant)
7902
0
{
7903
0
  MJ_RESULT r;
7904
7905
0
  r = MJ_NO;
7906
0
  switch (ND_TYPE(node)) {
7907
0
  case ND_LIST:
7908
0
    {
7909
0
      int found = FALSE;
7910
0
      do {
7911
0
        r = mostly_just_anychar(ND_CAR(node), in_reluctant);
7912
0
        if (r == MJ_NO) break;
7913
0
        if (r == MJ_YES) found = TRUE;
7914
0
      } while (IS_NOT_NULL(node = ND_CDR(node)));
7915
0
      if (r == MJ_IGNORE) {
7916
0
        if (found == TRUE) r = MJ_YES;
7917
0
      }
7918
0
    }
7919
0
    break;
7920
7921
0
  case ND_ALT:
7922
0
    r = MJ_IGNORE;
7923
0
    do {
7924
0
      r = mostly_just_anychar(ND_CAR(node), in_reluctant);
7925
0
      if (r == MJ_YES) break;
7926
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
7927
0
    break;
7928
7929
0
  case ND_QUANT:
7930
0
    {
7931
0
      QuantNode* qn = QUANT_(node);
7932
7933
0
      if (qn->upper == 0)
7934
0
        r = MJ_IGNORE;
7935
0
      else {
7936
0
        if (in_reluctant == FALSE) {
7937
0
          if (qn->greedy != 0 &&
7938
0
              (! IS_INFINITE_REPEAT(qn->upper) &&
7939
0
               qn->upper <= MANY_REPEAT_OF_ANYCHAR)) {
7940
0
            in_reluctant = TRUE;
7941
0
          }
7942
0
        }
7943
0
        r = mostly_just_anychar(ND_BODY(node), in_reluctant);
7944
0
      }
7945
0
    }
7946
0
    break;
7947
7948
0
  case ND_ANCHOR:
7949
0
    switch (ANCHOR_(node)->type) {
7950
0
    case ANCR_PREC_READ:
7951
0
    case ANCR_PREC_READ_NOT:
7952
0
    case ANCR_LOOK_BEHIND:
7953
0
    case ANCR_LOOK_BEHIND_NOT:
7954
0
    case ANCR_TEXT_SEGMENT_BOUNDARY: /* \y */
7955
0
      r = MJ_IGNORE;
7956
0
      break;
7957
0
    default:
7958
0
      break;
7959
0
    }
7960
0
    break;
7961
7962
0
  case ND_BAG:
7963
0
    {
7964
0
      BagNode* en = BAG_(node);
7965
7966
0
      if (en->type == BAG_IF_ELSE) {
7967
0
        if (IS_NOT_NULL(en->te.Then)) {
7968
0
          r = mostly_just_anychar(en->te.Then, in_reluctant);
7969
0
          if (r == MJ_YES) break;
7970
0
        }
7971
0
        if (IS_NOT_NULL(en->te.Else)) {
7972
0
          r = mostly_just_anychar(en->te.Else, in_reluctant);
7973
0
        }
7974
0
      }
7975
0
      else {
7976
0
        r = mostly_just_anychar(ND_BODY(node), in_reluctant);
7977
0
      }
7978
0
    }
7979
0
    break;
7980
7981
0
  case ND_CTYPE:
7982
0
    if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
7983
0
      r = MJ_YES;
7984
0
    else
7985
0
      r = MJ_NO;
7986
0
    break;
7987
7988
0
  case ND_STRING:
7989
0
    if (ND_STRING_LEN(node) == 0) {
7990
0
      r = MJ_IGNORE;
7991
0
      break;
7992
0
    }
7993
    /* fall */
7994
0
  case ND_CCLASS:
7995
0
    r = MJ_NO;
7996
0
    break;
7997
7998
0
#ifdef USE_CALL
7999
0
  case ND_CALL:
8000
    /* ignore call */
8001
0
#endif
8002
0
  case ND_BACKREF:
8003
0
  case ND_GIMMICK:
8004
0
    r = MJ_IGNORE;
8005
0
    break;
8006
8007
0
  default:
8008
0
    break;
8009
0
  }
8010
8011
0
  return r;
8012
0
}
8013
8014
0
#define MAX_CALLS_IN_DETECT   10
8015
8016
typedef struct {
8017
  int prec_read;
8018
  int look_behind;
8019
  int backref;
8020
  int backref_with_level;
8021
  int call;
8022
  int is_keep;
8023
  int anychar_reluctant_many;
8024
  int empty_check_nest_level;
8025
  int max_empty_check_nest_level;
8026
  int heavy_element;
8027
} SlowElementCount;
8028
8029
static int
8030
detect_can_be_slow(Node* node, SlowElementCount* ct, int ncall, int calls[])
8031
0
{
8032
0
  int r;
8033
8034
0
  r = 0;
8035
0
  switch (ND_TYPE(node)) {
8036
0
  case ND_LIST:
8037
0
  case ND_ALT:
8038
0
    do {
8039
0
      r = detect_can_be_slow(ND_CAR(node), ct, ncall, calls);
8040
0
      if (r != 0) return r;
8041
0
    } while (IS_NOT_NULL(node = ND_CDR(node)));
8042
0
    break;
8043
8044
0
  case ND_QUANT:
8045
0
    {
8046
0
      int prev_heavy_element;
8047
0
      QuantNode* qn;
8048
0
      Node* body;
8049
8050
0
      qn = QUANT_(node);
8051
0
      body = ND_BODY(node);
8052
8053
0
      if (qn->emptiness != BODY_IS_NOT_EMPTY) {
8054
0
        prev_heavy_element = ct->heavy_element;
8055
0
        ct->empty_check_nest_level++;
8056
0
        if (ct->empty_check_nest_level > ct->max_empty_check_nest_level)
8057
0
          ct->max_empty_check_nest_level = ct->empty_check_nest_level;
8058
0
      }
8059
0
      else if (IS_INFINITE_REPEAT(qn->upper) || qn->upper > 0) {
8060
0
        MJ_RESULT mr = mostly_just_anychar(body, (qn->greedy == 0));
8061
0
        if (mr == MJ_YES)
8062
0
          ct->anychar_reluctant_many++;
8063
0
      }
8064
8065
0
      r = detect_can_be_slow(body, ct, ncall, calls);
8066
8067
0
      if (qn->emptiness != BODY_IS_NOT_EMPTY) {
8068
0
        if (ND_IS_INPEEK(node)) {
8069
0
          if (ct->empty_check_nest_level > 2) {
8070
0
            if (prev_heavy_element == ct->heavy_element)
8071
0
              ct->heavy_element++;
8072
0
          }
8073
0
        }
8074
0
        ct->empty_check_nest_level--;
8075
0
      }
8076
0
    }
8077
0
    break;
8078
8079
0
  case ND_ANCHOR:
8080
0
    switch (ANCHOR_(node)->type) {
8081
0
    case ANCR_PREC_READ:
8082
0
    case ANCR_PREC_READ_NOT:
8083
0
      ct->prec_read++;
8084
0
      break;
8085
0
    case ANCR_LOOK_BEHIND:
8086
0
    case ANCR_LOOK_BEHIND_NOT:
8087
0
      ct->look_behind++;
8088
0
      break;
8089
0
    default:
8090
0
      break;
8091
0
    }
8092
8093
0
    if (ANCHOR_HAS_BODY(ANCHOR_(node)))
8094
0
      r = detect_can_be_slow(ND_BODY(node), ct, ncall, calls);
8095
0
    break;
8096
8097
0
  case ND_BAG:
8098
0
    {
8099
0
      BagNode* en = BAG_(node);
8100
8101
0
      r = detect_can_be_slow(ND_BODY(node), ct, ncall, calls);
8102
0
      if (r != 0) return r;
8103
8104
0
      if (en->type == BAG_IF_ELSE) {
8105
0
        if (IS_NOT_NULL(en->te.Then)) {
8106
0
          r = detect_can_be_slow(en->te.Then, ct, ncall, calls);
8107
0
          if (r != 0) return r;
8108
0
        }
8109
0
        if (IS_NOT_NULL(en->te.Else)) {
8110
0
          r = detect_can_be_slow(en->te.Else, ct, ncall, calls);
8111
0
          if (r != 0) return r;
8112
0
        }
8113
0
      }
8114
0
    }
8115
0
    break;
8116
8117
0
#ifdef USE_BACKREF_WITH_LEVEL
8118
0
  case ND_BACKREF:
8119
0
    if (ND_IS_NEST_LEVEL(node))
8120
0
      ct->heavy_element++;
8121
0
    else
8122
0
      ct->backref++;
8123
0
    break;
8124
0
#endif
8125
8126
0
#ifdef USE_CALL
8127
0
  case ND_CALL:
8128
0
    {
8129
0
      int i;
8130
0
      int found;
8131
0
      int gnum;
8132
8133
0
      gnum = CALL_(node)->called_gnum;
8134
0
      ct->call++;
8135
8136
0
      if (ND_IS_RECURSION(node) && ND_IS_INPEEK(node) &&
8137
0
          ND_IS_IN_REAL_REPEAT(node)) {
8138
0
         ct->heavy_element += 10;
8139
0
      }
8140
8141
0
      found = FALSE;
8142
0
      for (i = 0; i < ncall; i++) {
8143
0
        if (gnum == calls[i]) {
8144
0
          found = TRUE;
8145
0
          break;
8146
0
        }
8147
0
      }
8148
8149
0
      if (! found) {
8150
0
        if (ncall + 1 < MAX_CALLS_IN_DETECT) {
8151
0
          calls[ncall] = gnum;
8152
0
          r = detect_can_be_slow(ND_BODY(node), ct, ncall + 1, calls);
8153
0
        }
8154
0
        else {
8155
0
          ct->heavy_element++;
8156
0
        }
8157
0
      }
8158
0
    }
8159
0
    break;
8160
0
#endif
8161
0
  case ND_GIMMICK:
8162
0
    {
8163
0
      GimmickNode* g = GIMMICK_(node);
8164
0
      if (g->type == GIMMICK_SAVE && g->detail_type == SAVE_KEEP)
8165
0
        ct->is_keep = TRUE;
8166
0
    }
8167
0
    break;
8168
8169
0
  default:
8170
0
    break;
8171
0
  }
8172
8173
0
  return r;
8174
0
}
8175
8176
extern int
8177
onig_detect_can_be_slow_pattern(const UChar* pattern,
8178
  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
8179
  OnigSyntaxType* syntax)
8180
0
{
8181
0
  int r;
8182
0
  regex_t* reg;
8183
0
  Node* root;
8184
0
  ParseEnv scan_env;
8185
0
  SlowElementCount count;
8186
0
  int calls[MAX_CALLS_IN_DETECT];
8187
0
#ifdef USE_CALL
8188
0
  UnsetAddrList  uslist = {0};
8189
0
#endif
8190
8191
0
  reg = (regex_t* )xmalloc(sizeof(regex_t));
8192
0
  if (IS_NULL(reg)) return ONIGERR_MEMORY;
8193
8194
0
  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
8195
0
  if (r != 0) {
8196
0
    xfree(reg);
8197
0
    return r;
8198
0
  }
8199
8200
0
  r = parse_and_tune(reg, pattern, pattern_end, &scan_env, &root, NULL
8201
0
#ifdef USE_CALL
8202
0
                     , &uslist
8203
0
#endif
8204
0
                    );
8205
0
  if (r != 0) goto err;
8206
8207
0
#ifdef USE_CALL
8208
0
  if (scan_env.num_call > 0) {
8209
0
    unset_addr_list_end(&uslist);
8210
0
  }
8211
0
#endif
8212
8213
0
  count.prec_read          = 0;
8214
0
  count.look_behind        = 0;
8215
0
  count.backref            = 0;
8216
0
  count.backref_with_level = 0;
8217
0
  count.call               = 0;
8218
0
  count.is_keep            = FALSE;
8219
0
  count.anychar_reluctant_many     = 0;
8220
0
  count.empty_check_nest_level     = 0;
8221
0
  count.max_empty_check_nest_level = 0;
8222
0
  count.heavy_element = 0;
8223
8224
0
  r = detect_can_be_slow(root, &count, 0, calls);
8225
0
  if (r == 0) {
8226
0
    int n;
8227
8228
0
    n = count.prec_read + count.look_behind
8229
0
      + count.backref + count.backref_with_level + count.call
8230
0
      + count.anychar_reluctant_many;
8231
8232
0
    if (count.is_keep) count.max_empty_check_nest_level++;
8233
8234
0
    if (count.max_empty_check_nest_level > 2)
8235
0
      n += count.max_empty_check_nest_level - 2;
8236
0
    if (count.heavy_element != 0) {
8237
0
      if (count.heavy_element < 0x10000)
8238
0
        n += count.heavy_element << 8;
8239
0
      else
8240
0
        n += count.heavy_element;
8241
0
    }
8242
8243
0
    r = n;
8244
8245
#ifdef ONIG_DEBUG_PARSE
8246
    fprintf(DBGFP, "-- detect can be slow --\n");
8247
    fprintf(DBGFP, "  prec_read:          %d\n", count.prec_read);
8248
    fprintf(DBGFP, "  look_behind:        %d\n", count.look_behind);
8249
    fprintf(DBGFP, "  backref:            %d\n", count.backref);
8250
    fprintf(DBGFP, "  backref_with_level: %d\n", count.backref_with_level);
8251
    fprintf(DBGFP, "  call:               %d\n", count.call);
8252
    fprintf(DBGFP, "  is_keep:            %d\n", count.is_keep);
8253
    fprintf(DBGFP, "  any_reluctant_many: %d\n", count.anychar_reluctant_many);
8254
    fprintf(DBGFP, "  max_empty_check_nest_level: %d\n", count.max_empty_check_nest_level);
8255
    fprintf(DBGFP, "  heavy_element:      %d\n", count.heavy_element);
8256
    fprintf(DBGFP, "  r:                  %d\n", r);
8257
    fprintf(DBGFP, "\n");
8258
#endif
8259
0
  }
8260
8261
0
  if (IS_NOT_NULL(scan_env.mem_env_dynamic))
8262
0
    xfree(scan_env.mem_env_dynamic);
8263
8264
0
 err:
8265
0
  onig_node_free(root);
8266
0
  onig_free(reg);
8267
0
  return r;
8268
0
}
8269
8270
8271
#ifdef ONIG_DEBUG_PARSE
8272
8273
#ifdef USE_CALL
8274
static void
8275
p_string(FILE* f, int len, UChar* s)
8276
{
8277
  fputs(":", f);
8278
  while (len-- > 0) { fputc(*s++, f); }
8279
}
8280
#endif
8281
8282
static void
8283
Indent(FILE* f, int indent)
8284
{
8285
  int i;
8286
  for (i = 0; i < indent; i++) putc(' ', f);
8287
}
8288
8289
static void
8290
print_indent_tree(FILE* f, Node* node, int indent)
8291
{
8292
  static char* emptiness_name[] = { "", " empty", " empty_mem", " empty_rec" };
8293
8294
  int i;
8295
  NodeType type;
8296
  UChar* p;
8297
  int add = 3;
8298
8299
  Indent(f, indent);
8300
  if (IS_NULL(node)) {
8301
    fprintf(f, "ERROR: null node!!!\n");
8302
    exit(0);
8303
  }
8304
8305
  type = ND_TYPE(node);
8306
  switch (type) {
8307
  case ND_LIST:
8308
  case ND_ALT:
8309
    if (type == ND_LIST)
8310
      fprintf(f, "<list:%p>\n", node);
8311
    else
8312
      fprintf(f, "<alt:%p>\n", node);
8313
8314
    print_indent_tree(f, ND_CAR(node), indent + add);
8315
    while (IS_NOT_NULL(node = ND_CDR(node))) {
8316
      if (ND_TYPE(node) != type) {
8317
        fprintf(f, "ERROR: list/alt right is not a cons. %d\n", ND_TYPE(node));
8318
        exit(0);
8319
      }
8320
      print_indent_tree(f, ND_CAR(node), indent + add);
8321
    }
8322
    break;
8323
8324
  case ND_STRING:
8325
    {
8326
      char* str;
8327
      char* mode;
8328
8329
      if (ND_STRING_IS_CRUDE(node))
8330
        mode = "-crude";
8331
      else if (ND_IS_IGNORECASE(node))
8332
        mode = "-ignorecase";
8333
      else
8334
        mode = "";
8335
8336
      if (STR_(node)->s == STR_(node)->end)
8337
        str = "empty-string";
8338
      else
8339
        str = "string";
8340
8341
      fprintf(f, "<%s%s:%p>", str, mode, node);
8342
      for (p = STR_(node)->s; p < STR_(node)->end; p++) {
8343
        if (*p >= 0x20 && *p < 0x7f)
8344
          fputc(*p, f);
8345
        else {
8346
          fprintf(f, " 0x%02x", *p);
8347
        }
8348
      }
8349
    }
8350
    break;
8351
8352
  case ND_CCLASS:
8353
#define CCLASS_MBUF_MAX_OUTPUT_NUM   10
8354
8355
    fprintf(f, "<cclass:%p>", node);
8356
    if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
8357
    if (CCLASS_(node)->mbuf) {
8358
      BBuf* bbuf = CCLASS_(node)->mbuf;
8359
      fprintf(f, " mbuf(%u) ", bbuf->used);
8360
      for (i = 0; i < bbuf->used && i < CCLASS_MBUF_MAX_OUTPUT_NUM; i++) {
8361
        if (i > 0) fprintf(f, ",");
8362
        fprintf(f, "%0x", bbuf->p[i]);
8363
      }
8364
      if (i < bbuf->used) fprintf(f, "...");
8365
    }
8366
    break;
8367
8368
  case ND_CTYPE:
8369
    fprintf(f, "<ctype:%p> ", node);
8370
    switch (CTYPE_(node)->ctype) {
8371
    case CTYPE_ANYCHAR:
8372
      fprintf(f, "anychar");
8373
      break;
8374
8375
    case ONIGENC_CTYPE_WORD:
8376
      if (CTYPE_(node)->not != 0)
8377
        fputs("not word", f);
8378
      else
8379
        fputs("word",     f);
8380
8381
      if (CTYPE_(node)->ascii_mode != 0)
8382
        fputs(" (ascii)", f);
8383
8384
      break;
8385
8386
    default:
8387
      fprintf(f, "ERROR: undefined ctype.\n");
8388
      exit(0);
8389
    }
8390
    break;
8391
8392
  case ND_ANCHOR:
8393
    fprintf(f, "<anchor:%p> ", node);
8394
    switch (ANCHOR_(node)->type) {
8395
    case ANCR_BEGIN_BUF:        fputs("begin buf",      f); break;
8396
    case ANCR_END_BUF:          fputs("end buf",        f); break;
8397
    case ANCR_BEGIN_LINE:       fputs("begin line",     f); break;
8398
    case ANCR_END_LINE:         fputs("end line",       f); break;
8399
    case ANCR_SEMI_END_BUF:     fputs("semi end buf",   f); break;
8400
    case ANCR_BEGIN_POSITION:   fputs("begin position", f); break;
8401
8402
    case ANCR_WORD_BOUNDARY:    fputs("word boundary",     f); break;
8403
    case ANCR_NO_WORD_BOUNDARY: fputs("not word boundary", f); break;
8404
#ifdef USE_WORD_BEGIN_END
8405
    case ANCR_WORD_BEGIN:       fputs("word begin", f);     break;
8406
    case ANCR_WORD_END:         fputs("word end", f);       break;
8407
#endif
8408
    case ANCR_TEXT_SEGMENT_BOUNDARY:
8409
      fputs("text-segment boundary", f); break;
8410
    case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
8411
      fputs("no text-segment boundary", f); break;
8412
    case ANCR_PREC_READ:
8413
      fprintf(f, "prec read\n");
8414
      print_indent_tree(f, ND_BODY(node), indent + add);
8415
      break;
8416
    case ANCR_PREC_READ_NOT:
8417
      fprintf(f, "prec read not\n");
8418
      print_indent_tree(f, ND_BODY(node), indent + add);
8419
      break;
8420
    case ANCR_LOOK_BEHIND:
8421
      fprintf(f, "look behind\n");
8422
      print_indent_tree(f, ND_BODY(node), indent + add);
8423
      break;
8424
    case ANCR_LOOK_BEHIND_NOT:
8425
      fprintf(f, "look behind not\n");
8426
      print_indent_tree(f, ND_BODY(node), indent + add);
8427
      break;
8428
8429
    default:
8430
      fprintf(f, "ERROR: undefined anchor type.\n");
8431
      break;
8432
    }
8433
    break;
8434
8435
  case ND_BACKREF:
8436
    {
8437
      int* p;
8438
      BackRefNode* br = BACKREF_(node);
8439
      p = BACKREFS_P(br);
8440
      fprintf(f, "<backref%s:%p>", ND_IS_CHECKER(node) ? "-checker" : "", node);
8441
      for (i = 0; i < br->back_num; i++) {
8442
        if (i > 0) fputs(", ", f);
8443
        fprintf(f, "%d", p[i]);
8444
      }
8445
#ifdef USE_BACKREF_WITH_LEVEL
8446
      if (ND_IS_NEST_LEVEL(node)) {
8447
        fprintf(f, ", level: %d", br->nest_level);
8448
      }
8449
#endif
8450
    }
8451
    break;
8452
8453
#ifdef USE_CALL
8454
  case ND_CALL:
8455
    {
8456
      CallNode* cn = CALL_(node);
8457
      fprintf(f, "<call:%p>", node);
8458
      fprintf(f, " num: %d, name", cn->called_gnum);
8459
      p_string(f, cn->name_end - cn->name, cn->name);
8460
      if (ND_IS_RECURSION(node)) fprintf(f, ", recursion");
8461
      if (ND_IS_INPEEK(node))    fprintf(f, ", in-peek");
8462
      if (ND_IS_IN_REAL_REPEAT(node)) fprintf(f, ", in-real-repeat");
8463
    }
8464
    break;
8465
#endif
8466
8467
  case ND_QUANT:
8468
    {
8469
      fprintf(f, "<quantifier:%p>{%d,%d}%s%s%s", node,
8470
              QUANT_(node)->lower, QUANT_(node)->upper,
8471
              (QUANT_(node)->greedy ? "" : "?"),
8472
              QUANT_(node)->include_referred == 0 ? "" : " referred",
8473
              emptiness_name[QUANT_(node)->emptiness]);
8474
      if (ND_IS_INPEEK(node)) fprintf(f, ", in-peek");
8475
      fprintf(f, "\n");
8476
      print_indent_tree(f, ND_BODY(node), indent + add);
8477
    }
8478
    break;
8479
8480
  case ND_BAG:
8481
    {
8482
      BagNode* bn = BAG_(node);
8483
      fprintf(f, "<bag:%p> ", node);
8484
      if (bn->type == BAG_IF_ELSE) {
8485
        Node* Then;
8486
        Node* Else;
8487
8488
        fprintf(f, "if-else\n");
8489
        print_indent_tree(f, ND_BODY(node), indent + add);
8490
8491
        Then = bn->te.Then;
8492
        Else = bn->te.Else;
8493
        if (IS_NULL(Then)) {
8494
          Indent(f, indent + add);
8495
          fprintf(f, "THEN empty\n");
8496
        }
8497
        else
8498
          print_indent_tree(f, Then, indent + add);
8499
8500
        if (IS_NULL(Else)) {
8501
          Indent(f, indent + add);
8502
          fprintf(f, "ELSE empty\n");
8503
        }
8504
        else
8505
          print_indent_tree(f, Else, indent + add);
8506
      }
8507
      else {
8508
        switch (bn->type) {
8509
        case BAG_OPTION:
8510
          fprintf(f, "option:%d", bn->o.options);
8511
          break;
8512
        case BAG_MEMORY:
8513
          fprintf(f, "memory:%d", bn->m.regnum);
8514
          if (ND_IS_CALLED(node)) {
8515
            fprintf(f, ", called");
8516
            if (ND_IS_RECURSION(node))
8517
              fprintf(f, ", recursion");
8518
          }
8519
          else if (ND_IS_REFERENCED(node))
8520
            fprintf(f, ", referenced");
8521
8522
          if (ND_IS_FIXED_ADDR(node))
8523
            fprintf(f, ", fixed-addr");
8524
          if ((bn->m.called_state & IN_PEEK) != 0)
8525
            fprintf(f, ", in-peek");
8526
          break;
8527
        case BAG_STOP_BACKTRACK:
8528
          fprintf(f, "stop-bt");
8529
          break;
8530
        default:
8531
          break;
8532
        }
8533
        fprintf(f, "\n");
8534
        print_indent_tree(f, ND_BODY(node), indent + add);
8535
      }
8536
    }
8537
    break;
8538
8539
  case ND_GIMMICK:
8540
    fprintf(f, "<gimmick:%p> ", node);
8541
    switch (GIMMICK_(node)->type) {
8542
    case GIMMICK_FAIL:
8543
      fprintf(f, "fail");
8544
      break;
8545
    case GIMMICK_SAVE:
8546
      fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
8547
      break;
8548
    case GIMMICK_UPDATE_VAR:
8549
      fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
8550
      break;
8551
#ifdef USE_CALLOUT
8552
    case GIMMICK_CALLOUT:
8553
      switch (GIMMICK_(node)->detail_type) {
8554
      case ONIG_CALLOUT_OF_CONTENTS:
8555
        fprintf(f, "callout:contents:%d", GIMMICK_(node)->num);
8556
        break;
8557
      case ONIG_CALLOUT_OF_NAME:
8558
        fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num);
8559
        break;
8560
      }
8561
#endif
8562
    }
8563
    break;
8564
8565
  default:
8566
    fprintf(f, "print_indent_tree: undefined node type %d\n", ND_TYPE(node));
8567
    break;
8568
  }
8569
8570
  if (type != ND_LIST && type != ND_ALT && type != ND_QUANT &&
8571
      type != ND_BAG)
8572
    fprintf(f, "\n");
8573
  fflush(f);
8574
}
8575
8576
static void
8577
print_tree(FILE* f, Node* node)
8578
{
8579
  print_indent_tree(f, node, 0);
8580
}
8581
#endif