Coverage Report

Created: 2026-03-31 07:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ruby/regexec.c
Line
Count
Source
1
/**********************************************************************
2
  regexec.c -  Onigmo (Oniguruma-mod) (regular expression library)
3
**********************************************************************/
4
/*-
5
 * Copyright (c) 2002-2018  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6
 * Copyright (c) 2011-2019  K.Takata  <kentkt AT csc DOT jp>
7
 * All rights reserved.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 */
30
31
#include "regint.h"
32
33
#ifdef RUBY
34
# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35
#else
36
# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37
#endif
38
39
#ifndef USE_TOKEN_THREADED_VM
40
# ifdef __GNUC__
41
#  define USE_TOKEN_THREADED_VM 1
42
# else
43
#  define USE_TOKEN_THREADED_VM 0
44
# endif
45
#endif
46
47
#ifdef RUBY
48
48.9M
# define ENC_DUMMY_FLAG (1<<24)
49
static inline int
50
rb_enc_asciicompat(OnigEncoding enc)
51
48.9M
{
52
48.9M
  return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53
48.9M
}
54
# undef ONIGENC_IS_MBC_ASCII_WORD
55
# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56
48.9M
    (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57
48.9M
   onigenc_ascii_is_code_ctype( \
58
0
        ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59
#endif /* RUBY */
60
61
#ifdef USE_CRNL_AS_LINE_TERMINATOR
62
# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63
0
  (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64
0
   ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65
# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66
611M
  is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67
static int
68
is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69
                  const UChar *end, OnigOptionType option, int check_prev)
70
611M
{
71
611M
  if (IS_NEWLINE_CRLF(option)) {
72
0
    if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73
0
      if (check_prev) {
74
0
        const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75
0
        if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76
0
          return 0;
77
0
        else
78
0
          return 1;
79
0
      }
80
0
      else
81
0
        return 1;
82
0
    }
83
0
    else {
84
0
      const UChar *pnext = p + enclen(enc, p, end);
85
0
      if (pnext < end &&
86
0
          ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87
0
          ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88
0
        return 1;
89
0
      if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90
0
        return 1;
91
0
      return 0;
92
0
    }
93
0
  }
94
611M
  else {
95
611M
    return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96
611M
  }
97
611M
}
98
#else /* USE_CRNL_AS_LINE_TERMINATOR */
99
# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100
  ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101
#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103
#ifdef USE_CAPTURE_HISTORY
104
static void history_tree_free(OnigCaptureTreeNode* node);
105
106
static void
107
history_tree_clear(OnigCaptureTreeNode* node)
108
{
109
  int i;
110
111
  if (IS_NOT_NULL(node)) {
112
    for (i = 0; i < node->num_childs; i++) {
113
      if (IS_NOT_NULL(node->childs[i])) {
114
        history_tree_free(node->childs[i]);
115
      }
116
    }
117
    for (i = 0; i < node->allocated; i++) {
118
      node->childs[i] = (OnigCaptureTreeNode* )0;
119
    }
120
    node->num_childs = 0;
121
    node->beg = ONIG_REGION_NOTPOS;
122
    node->end = ONIG_REGION_NOTPOS;
123
    node->group = -1;
124
    xfree(node->childs);
125
    node->childs = (OnigCaptureTreeNode** )0;
126
  }
127
}
128
129
static void
130
history_tree_free(OnigCaptureTreeNode* node)
131
{
132
  history_tree_clear(node);
133
  xfree(node);
134
}
135
136
static void
137
history_root_free(OnigRegion* r)
138
{
139
  if (IS_NOT_NULL(r->history_root)) {
140
    history_tree_free(r->history_root);
141
    r->history_root = (OnigCaptureTreeNode* )0;
142
  }
143
}
144
145
static OnigCaptureTreeNode*
146
history_node_new(void)
147
{
148
  OnigCaptureTreeNode* node;
149
150
  node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151
  CHECK_NULL_RETURN(node);
152
  node->childs     = (OnigCaptureTreeNode** )0;
153
  node->allocated  = 0;
154
  node->num_childs = 0;
155
  node->group      = -1;
156
  node->beg        = ONIG_REGION_NOTPOS;
157
  node->end        = ONIG_REGION_NOTPOS;
158
159
  return node;
160
}
161
162
static int
163
history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164
{
165
# define HISTORY_TREE_INIT_ALLOC_SIZE  8
166
167
  if (parent->num_childs >= parent->allocated) {
168
    int n, i;
169
170
    if (IS_NULL(parent->childs)) {
171
      n = HISTORY_TREE_INIT_ALLOC_SIZE;
172
      parent->childs =
173
        (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174
      CHECK_NULL_RETURN_MEMERR(parent->childs);
175
    }
176
    else {
177
      OnigCaptureTreeNode** tmp;
178
      n = parent->allocated * 2;
179
      tmp =
180
        (OnigCaptureTreeNode** )xrealloc(parent->childs,
181
                                         sizeof(OnigCaptureTreeNode*) * n);
182
      if (tmp == 0) {
183
        history_tree_clear(parent);
184
        return ONIGERR_MEMORY;
185
      }
186
      parent->childs = tmp;
187
    }
188
    for (i = parent->allocated; i < n; i++) {
189
      parent->childs[i] = (OnigCaptureTreeNode* )0;
190
    }
191
    parent->allocated = n;
192
  }
193
194
  parent->childs[parent->num_childs] = child;
195
  parent->num_childs++;
196
  return 0;
197
}
198
199
static OnigCaptureTreeNode*
200
history_tree_clone(OnigCaptureTreeNode* node)
201
{
202
  int i, r;
203
  OnigCaptureTreeNode *clone, *child;
204
205
  clone = history_node_new();
206
  CHECK_NULL_RETURN(clone);
207
208
  clone->beg = node->beg;
209
  clone->end = node->end;
210
  for (i = 0; i < node->num_childs; i++) {
211
    child = history_tree_clone(node->childs[i]);
212
    if (IS_NULL(child)) {
213
      history_tree_free(clone);
214
      return (OnigCaptureTreeNode* )0;
215
    }
216
    r = history_tree_add_child(clone, child);
217
    if (r != 0) {
218
      history_tree_free(child);
219
      history_tree_free(clone);
220
      return (OnigCaptureTreeNode* )0;
221
    }
222
  }
223
224
  return clone;
225
}
226
227
extern  OnigCaptureTreeNode*
228
onig_get_capture_tree(OnigRegion* region)
229
{
230
  return region->history_root;
231
}
232
#endif /* USE_CAPTURE_HISTORY */
233
234
#ifdef USE_MATCH_CACHE
235
236
/*
237
Glossary for "match cache"
238
239
"match cache" or "match cache optimization"
240
The `Regexp#match` optimization by using a cache.
241
242
"cache opcode"
243
A cacheable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc).
244
It is corresponding to some cache points.
245
246
"cache point"
247
A cacheable point on matching.
248
Usually, one-to-one corresponding between a cache opcode and a cache point exists,
249
but cache opcodes between `OP_REPEAT` and `OP_REPEAT_INC` have some corresponding
250
cache points depending on repetition counts.
251
252
"match cache point"
253
A pair of a cache point and a position on an input string.
254
We encode a match cache point to an integer value by the following equation:
255
"match cache point" = "position on input string" * "total number of cache points" + "cache point"
256
257
"match cache buffer"
258
A bit-array for memoizing (recording) match cache points once backtracked.
259
*/
260
261
static OnigPosition count_num_cache_opcodes_inner(
262
    const regex_t* reg,
263
    MemNumType current_repeat_mem, int lookaround_nesting,
264
    UChar** pp, long* num_cache_opcodes_ptr
265
)
266
362k
{
267
362k
  UChar* p = *pp;
268
362k
  UChar* pend = reg->p + reg->used;
269
362k
  LengthType len;
270
362k
  MemNumType repeat_mem;
271
362k
  OnigEncoding enc = reg->enc;
272
362k
  long num_cache_opcodes = *num_cache_opcodes_ptr;
273
362k
  OnigPosition result;
274
275
4.42M
  while (p < pend) {
276
4.34M
    switch (*p++) {
277
0
      case OP_FINISH:
278
77.1k
      case OP_END:
279
77.1k
        break;
280
281
342k
      case OP_EXACT1: p++; break;
282
124k
      case OP_EXACT2: p += 2; break;
283
50.6k
      case OP_EXACT3: p += 3; break;
284
56.2k
      case OP_EXACT4: p += 4; break;
285
29.5k
      case OP_EXACT5: p += 5; break;
286
227k
      case OP_EXACTN:
287
227k
        GET_LENGTH_INC(len, p); p += len; break;
288
3.57k
      case OP_EXACTMB2N1: p += 2; break;
289
384
      case OP_EXACTMB2N2: p += 4; break;
290
1.27k
      case OP_EXACTMB2N3: p += 6; break;
291
141
      case OP_EXACTMB2N:
292
141
        GET_LENGTH_INC(len, p); p += len * 2; break;
293
4.24k
      case OP_EXACTMB3N:
294
4.24k
        GET_LENGTH_INC(len, p); p += len * 3; break;
295
429
      case OP_EXACTMBN:
296
429
        {
297
429
          int mb_len;
298
429
          GET_LENGTH_INC(mb_len, p);
299
429
          GET_LENGTH_INC(len, p);
300
429
          p += mb_len * len;
301
429
        }
302
429
        break;
303
304
13.6k
      case OP_EXACT1_IC:
305
13.6k
        len = enclen(enc, p, pend); p += len; break;
306
19.1k
      case OP_EXACTN_IC:
307
19.1k
        GET_LENGTH_INC(len, p); p += len; break;
308
309
4.00k
      case OP_CCLASS:
310
21.8k
      case OP_CCLASS_NOT:
311
21.8k
        p += SIZE_BITSET; break;
312
33.7k
      case OP_CCLASS_MB:
313
41.6k
      case OP_CCLASS_MB_NOT:
314
41.6k
        GET_LENGTH_INC(len, p); p += len; break;
315
2.78k
      case OP_CCLASS_MIX:
316
10.4k
      case OP_CCLASS_MIX_NOT:
317
10.4k
        p += SIZE_BITSET;
318
10.4k
        GET_LENGTH_INC(len, p);
319
10.4k
        p += len;
320
10.4k
        break;
321
322
172k
      case OP_ANYCHAR:
323
607k
      case OP_ANYCHAR_ML:
324
607k
        break;
325
87.0k
      case OP_ANYCHAR_STAR:
326
107k
      case OP_ANYCHAR_ML_STAR:
327
107k
        num_cache_opcodes++; break;
328
71.1k
      case OP_ANYCHAR_STAR_PEEK_NEXT:
329
103k
      case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
330
103k
        p++; num_cache_opcodes++; break;
331
332
3.00k
      case OP_WORD:
333
4.60k
      case OP_NOT_WORD:
334
36.4k
      case OP_WORD_BOUND:
335
44.1k
      case OP_NOT_WORD_BOUND:
336
44.1k
      case OP_WORD_BEGIN:
337
44.1k
      case OP_WORD_END:
338
44.1k
        break;
339
340
6.04k
      case OP_ASCII_WORD:
341
12.4k
      case OP_NOT_ASCII_WORD:
342
17.5k
      case OP_ASCII_WORD_BOUND:
343
21.6k
      case OP_NOT_ASCII_WORD_BOUND:
344
21.6k
      case OP_ASCII_WORD_BEGIN:
345
21.6k
      case OP_ASCII_WORD_END:
346
21.6k
        break;
347
348
4.92k
      case OP_BEGIN_BUF:
349
5.88k
      case OP_END_BUF:
350
21.6k
      case OP_BEGIN_LINE:
351
41.9k
      case OP_END_LINE:
352
42.6k
      case OP_SEMI_END_BUF:
353
44.9k
      case OP_BEGIN_POSITION:
354
44.9k
        break;
355
356
322
      case OP_BACKREF1:
357
707
      case OP_BACKREF2:
358
1.25k
      case OP_BACKREFN:
359
1.70k
      case OP_BACKREFN_IC:
360
2.30k
      case OP_BACKREF_MULTI:
361
2.62k
      case OP_BACKREF_MULTI_IC:
362
3.01k
      case OP_BACKREF_WITH_LEVEL:
363
3.01k
        goto impossible;
364
365
3.69k
      case OP_MEMORY_START:
366
60.8k
      case OP_MEMORY_START_PUSH:
367
69.2k
      case OP_MEMORY_END_PUSH:
368
69.2k
      case OP_MEMORY_END_PUSH_REC:
369
118k
      case OP_MEMORY_END:
370
118k
      case OP_MEMORY_END_REC:
371
118k
        p += SIZE_MEMNUM;
372
        // A memory (capture) in look-around is found.
373
118k
        if (lookaround_nesting != 0) {
374
157
          goto impossible;
375
157
        }
376
118k
        break;
377
378
118k
      case OP_KEEP:
379
223
        break;
380
381
0
      case OP_FAIL:
382
0
        break;
383
763k
      case OP_JUMP:
384
763k
        p += SIZE_RELADDR;
385
763k
        break;
386
785k
      case OP_PUSH:
387
785k
        p += SIZE_RELADDR;
388
785k
        num_cache_opcodes++;
389
785k
        break;
390
0
      case OP_POP:
391
0
        break;
392
0
      case OP_PUSH_OR_JUMP_EXACT1:
393
81.8k
      case OP_PUSH_IF_PEEK_NEXT:
394
81.8k
        p += SIZE_RELADDR + 1; num_cache_opcodes++; break;
395
18.5k
      case OP_REPEAT:
396
41.3k
      case OP_REPEAT_NG:
397
41.3k
        if (current_repeat_mem != -1) {
398
          // A nested OP_REPEAT is not yet supported.
399
5.19k
          goto impossible;
400
5.19k
        }
401
36.1k
        GET_MEMNUM_INC(repeat_mem, p);
402
36.1k
        p += SIZE_RELADDR;
403
36.1k
        if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
404
5.02k
          long dummy_num_cache_opcodes = 0;
405
5.02k
          result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &dummy_num_cache_opcodes);
406
5.02k
          if (result < 0 || dummy_num_cache_opcodes < 0) {
407
748
            goto fail;
408
748
          }
409
31.0k
        } else {
410
31.0k
          if (reg->repeat_range[repeat_mem].lower == 0) {
411
19.8k
            num_cache_opcodes++;
412
19.8k
          }
413
31.0k
          result = count_num_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &p, &num_cache_opcodes);
414
31.0k
          if (result < 0 || num_cache_opcodes < 0) {
415
6.72k
            goto fail;
416
6.72k
          }
417
24.3k
          OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
418
24.3k
          if (repeat_range->lower < repeat_range->upper) {
419
21.8k
            num_cache_opcodes++;
420
21.8k
          }
421
24.3k
        }
422
28.6k
        break;
423
28.6k
      case OP_REPEAT_INC:
424
28.6k
      case OP_REPEAT_INC_NG:
425
28.6k
        GET_MEMNUM_INC(repeat_mem, p);
426
28.6k
        if (repeat_mem != current_repeat_mem) {
427
          // A lone or invalid OP_REPEAT_INC is found.
428
0
          goto impossible;
429
0
        }
430
28.6k
        goto exit;
431
28.6k
      case OP_REPEAT_INC_SG:
432
644
      case OP_REPEAT_INC_NG_SG:
433
644
        goto impossible;
434
75.9k
      case OP_NULL_CHECK_START:
435
75.9k
        p += SIZE_MEMNUM;
436
75.9k
        break;
437
50.2k
      case OP_NULL_CHECK_END:
438
50.2k
      case OP_NULL_CHECK_END_MEMST_PUSH:
439
50.2k
        p += SIZE_MEMNUM;
440
50.2k
        break;
441
5.26k
      case OP_NULL_CHECK_END_MEMST:
442
5.26k
        p += SIZE_MEMNUM;
443
5.26k
        break;
444
445
47.1k
      case OP_PUSH_POS:
446
47.1k
        if (lookaround_nesting < 0) {
447
          // A look-around nested in a atomic grouping is found.
448
131
          goto impossible;
449
131
        }
450
47.0k
        result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
451
47.0k
        if (result < 0 || num_cache_opcodes < 0) {
452
140
          goto fail;
453
140
        }
454
46.9k
        break;
455
46.9k
      case OP_PUSH_POS_NOT:
456
24.0k
        if (lookaround_nesting < 0) {
457
          // A look-around nested in a atomic grouping is found.
458
11
          goto impossible;
459
11
        }
460
24.0k
        p += SIZE_RELADDR;
461
24.0k
        result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
462
24.0k
        if (result < 0 || num_cache_opcodes < 0) {
463
179
          goto fail;
464
179
        }
465
23.8k
        break;
466
23.8k
      case OP_PUSH_LOOK_BEHIND_NOT:
467
3.48k
        if (lookaround_nesting < 0) {
468
          // A look-around nested in a atomic grouping is found.
469
239
          goto impossible;
470
239
        }
471
3.24k
        p += SIZE_RELADDR;
472
3.24k
        p += SIZE_LENGTH;
473
3.24k
        result = count_num_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &p, &num_cache_opcodes);
474
3.24k
        if (result < 0 || num_cache_opcodes < 0) {
475
1.20k
          goto fail;
476
1.20k
        }
477
2.03k
        break;
478
145k
      case OP_PUSH_STOP_BT:
479
145k
        if (lookaround_nesting != 0) {
480
          // A nested atomic grouping is found.
481
12.6k
          goto impossible;
482
12.6k
        }
483
132k
        result = count_num_cache_opcodes_inner(reg, current_repeat_mem, -1, &p, &num_cache_opcodes);
484
132k
        if (result < 0 || num_cache_opcodes < 0) {
485
15.9k
          goto fail;
486
15.9k
        }
487
116k
        break;
488
116k
      case OP_POP_POS:
489
70.6k
      case OP_FAIL_POS:
490
72.7k
      case OP_FAIL_LOOK_BEHIND_NOT:
491
189k
      case OP_POP_STOP_BT:
492
189k
        goto exit;
493
4.93k
      case OP_LOOK_BEHIND:
494
4.93k
        p += SIZE_LENGTH;
495
4.93k
        break;
496
497
487
      case OP_PUSH_ABSENT_POS:
498
487
      case OP_ABSENT_END:
499
487
      case OP_ABSENT:
500
487
        goto impossible;
501
502
19.5k
      case OP_CALL:
503
19.5k
      case OP_RETURN:
504
19.5k
        goto impossible;
505
506
164
      case OP_CONDITION:
507
164
        goto impossible;
508
509
0
      case OP_STATE_CHECK_PUSH:
510
0
      case OP_STATE_CHECK_PUSH_OR_JUMP:
511
0
      case OP_STATE_CHECK:
512
0
      case OP_STATE_CHECK_ANYCHAR_STAR:
513
0
      case OP_STATE_CHECK_ANYCHAR_ML_STAR:
514
0
        goto impossible;
515
516
0
      case OP_SET_OPTION_PUSH:
517
0
      case OP_SET_OPTION:
518
0
        p += SIZE_OPTION;
519
0
        break;
520
521
21
      default:
522
21
        goto bytecode_error;
523
4.34M
    }
524
4.34M
  }
525
526
295k
exit:
527
295k
  *pp = p;
528
295k
  *num_cache_opcodes_ptr = num_cache_opcodes;
529
295k
  return 0;
530
531
24.9k
fail:
532
24.9k
  *num_cache_opcodes_ptr = num_cache_opcodes;
533
24.9k
  return result;
534
535
42.2k
impossible:
536
42.2k
  *num_cache_opcodes_ptr = NUM_CACHE_OPCODES_IMPOSSIBLE;
537
42.2k
  return 0;
538
539
21
bytecode_error:
540
21
  return ONIGERR_UNDEFINED_BYTECODE;
541
362k
}
542
543
/* count the total number of cache opcodes for allocating a match cache buffer. */
544
static OnigPosition
545
count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr)
546
119k
{
547
119k
  UChar* p = reg->p;
548
119k
  *num_cache_opcodes_ptr = 0;
549
119k
  OnigPosition result = count_num_cache_opcodes_inner(reg, -1, 0, &p, num_cache_opcodes_ptr);
550
119k
  if (result == 0 && *num_cache_opcodes_ptr >= 0 && p != reg->p + reg->used) {
551
674
    return ONIGERR_UNDEFINED_BYTECODE;
552
674
  }
553
554
118k
  return result;
555
119k
}
556
557
static OnigPosition
558
init_cache_opcodes_inner(
559
    const regex_t* reg,
560
    MemNumType current_repeat_mem, int lookaround_nesting,
561
    OnigCacheOpcode** cache_opcodes_ptr, UChar** pp, long* num_cache_points_ptr
562
)
563
129k
{
564
129k
  UChar* p = *pp;
565
129k
  UChar* pend = reg->p + reg->used;
566
129k
  UChar* pbegin;
567
129k
  LengthType len;
568
129k
  MemNumType repeat_mem;
569
129k
  OnigEncoding enc = reg->enc;
570
129k
  long cache_point = *num_cache_points_ptr;
571
129k
  OnigCacheOpcode *cache_opcodes = *cache_opcodes_ptr;
572
129k
  OnigPosition result;
573
574
493k
# define INC_CACHE_OPCODES if (cache_opcodes != NULL) {\
575
490k
    cache_opcodes->addr = pbegin;\
576
490k
    cache_opcodes->cache_point = cache_point;\
577
490k
    cache_opcodes->outer_repeat_mem = current_repeat_mem;\
578
490k
    cache_opcodes->num_cache_points_at_outer_repeat = 0;\
579
490k
    cache_opcodes->num_cache_points_in_outer_repeat = 0;\
580
490k
    cache_opcodes->lookaround_nesting = lookaround_nesting;\
581
490k
    cache_opcodes->match_addr = NULL;\
582
490k
    cache_point += lookaround_nesting != 0 ? 2 : 1;\
583
490k
    cache_opcodes++;\
584
490k
  }
585
586
1.76M
  while (p < pend) {
587
1.72M
    pbegin = p;
588
1.72M
    switch (*p++) {
589
0
      case OP_FINISH:
590
42.4k
      case OP_END:
591
42.4k
        break;
592
593
145k
      case OP_EXACT1: p++; break;
594
64.9k
      case OP_EXACT2: p += 2; break;
595
12.8k
      case OP_EXACT3: p += 3; break;
596
26.2k
      case OP_EXACT4: p += 4; break;
597
12.3k
      case OP_EXACT5: p += 5; break;
598
75.8k
      case OP_EXACTN:
599
75.8k
        GET_LENGTH_INC(len, p); p += len; break;
600
2.63k
      case OP_EXACTMB2N1: p += 2; break;
601
294
      case OP_EXACTMB2N2: p += 4; break;
602
1.24k
      case OP_EXACTMB2N3: p += 6; break;
603
141
      case OP_EXACTMB2N:
604
141
        GET_LENGTH_INC(len, p); p += len * 2; break;
605
2.00k
      case OP_EXACTMB3N:
606
2.00k
        GET_LENGTH_INC(len, p); p += len * 3; break;
607
397
      case OP_EXACTMBN:
608
397
        {
609
397
          int mb_len;
610
397
          GET_LENGTH_INC(mb_len, p);
611
397
          GET_LENGTH_INC(len, p);
612
397
          p += mb_len * len;
613
397
        }
614
397
        break;
615
616
2.91k
      case OP_EXACT1_IC:
617
2.91k
        len = enclen(enc, p, pend); p += len; break;
618
5.54k
      case OP_EXACTN_IC:
619
5.54k
        GET_LENGTH_INC(len, p); p += len; break;
620
621
1.85k
      case OP_CCLASS:
622
8.75k
      case OP_CCLASS_NOT:
623
8.75k
        p += SIZE_BITSET; break;
624
283
      case OP_CCLASS_MB:
625
2.51k
      case OP_CCLASS_MB_NOT:
626
2.51k
        GET_LENGTH_INC(len, p); p += len; break;
627
513
      case OP_CCLASS_MIX:
628
6.58k
      case OP_CCLASS_MIX_NOT:
629
6.58k
        p += SIZE_BITSET;
630
6.58k
        GET_LENGTH_INC(len, p);
631
6.58k
        p += len;
632
6.58k
        break;
633
634
148k
      case OP_ANYCHAR:
635
189k
      case OP_ANYCHAR_ML:
636
189k
        break;
637
49.9k
      case OP_ANYCHAR_STAR:
638
59.3k
      case OP_ANYCHAR_ML_STAR:
639
59.3k
        INC_CACHE_OPCODES;
640
59.3k
        break;
641
30.5k
      case OP_ANYCHAR_STAR_PEEK_NEXT:
642
46.3k
      case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
643
46.3k
        p++;
644
46.3k
        INC_CACHE_OPCODES;
645
46.3k
        break;
646
647
810
      case OP_WORD:
648
2.19k
      case OP_NOT_WORD:
649
7.02k
      case OP_WORD_BOUND:
650
13.3k
      case OP_NOT_WORD_BOUND:
651
13.3k
      case OP_WORD_BEGIN:
652
13.3k
      case OP_WORD_END:
653
13.3k
        break;
654
655
5.23k
      case OP_ASCII_WORD:
656
6.24k
      case OP_NOT_ASCII_WORD:
657
7.66k
      case OP_ASCII_WORD_BOUND:
658
8.75k
      case OP_NOT_ASCII_WORD_BOUND:
659
8.75k
      case OP_ASCII_WORD_BEGIN:
660
8.75k
      case OP_ASCII_WORD_END:
661
8.75k
        break;
662
663
1.89k
      case OP_BEGIN_BUF:
664
2.35k
      case OP_END_BUF:
665
4.83k
      case OP_BEGIN_LINE:
666
11.3k
      case OP_END_LINE:
667
11.6k
      case OP_SEMI_END_BUF:
668
12.9k
      case OP_BEGIN_POSITION:
669
12.9k
        break;
670
671
0
      case OP_BACKREF1:
672
0
      case OP_BACKREF2:
673
0
      case OP_BACKREFN:
674
0
      case OP_BACKREFN_IC:
675
0
      case OP_BACKREF_MULTI:
676
0
      case OP_BACKREF_MULTI_IC:
677
0
      case OP_BACKREF_WITH_LEVEL:
678
0
        goto unexpected_bytecode_error;
679
680
1.43k
      case OP_MEMORY_START:
681
13.5k
      case OP_MEMORY_START_PUSH:
682
13.6k
      case OP_MEMORY_END_PUSH:
683
13.6k
      case OP_MEMORY_END_PUSH_REC:
684
27.0k
      case OP_MEMORY_END:
685
27.0k
      case OP_MEMORY_END_REC:
686
27.0k
        p += SIZE_MEMNUM;
687
27.0k
        if (lookaround_nesting != 0) {
688
0
          goto unexpected_bytecode_error;
689
0
        }
690
27.0k
        break;
691
692
27.0k
      case OP_KEEP:
693
70
        break;
694
695
0
      case OP_FAIL:
696
0
        break;
697
352k
      case OP_JUMP:
698
352k
        p += SIZE_RELADDR;
699
352k
        break;
700
345k
      case OP_PUSH:
701
345k
        p += SIZE_RELADDR;
702
345k
        INC_CACHE_OPCODES;
703
345k
        break;
704
0
      case OP_POP:
705
0
        break;
706
0
      case OP_PUSH_OR_JUMP_EXACT1:
707
29.3k
      case OP_PUSH_IF_PEEK_NEXT:
708
29.3k
        p += SIZE_RELADDR + 1;
709
29.3k
        INC_CACHE_OPCODES;
710
29.3k
        break;
711
4.64k
      case OP_REPEAT:
712
11.3k
      case OP_REPEAT_NG:
713
11.3k
        GET_MEMNUM_INC(repeat_mem, p);
714
11.3k
        p += SIZE_RELADDR;
715
11.3k
        if (reg->repeat_range[repeat_mem].lower == 0 && reg->repeat_range[repeat_mem].upper == 0) {
716
2.65k
          long dummy_num_cache_points = 0;
717
2.65k
          OnigCacheOpcode* dummy_cache_opcodes = NULL;
718
2.65k
          result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &dummy_cache_opcodes, &p, &dummy_num_cache_points);
719
2.65k
          if (result != 0) {
720
0
            goto fail;
721
0
          }
722
8.66k
        } else {
723
8.66k
          if (reg->repeat_range[repeat_mem].lower == 0) {
724
5.47k
            INC_CACHE_OPCODES;
725
5.47k
          }
726
8.66k
          {
727
8.66k
            long num_cache_points_in_repeat = 0;
728
8.66k
            long num_cache_points_at_repeat = cache_point;
729
8.66k
            OnigCacheOpcode* cache_opcodes_in_repeat = cache_opcodes;
730
8.66k
            result = init_cache_opcodes_inner(reg, repeat_mem, lookaround_nesting, &cache_opcodes, &p, &num_cache_points_in_repeat);
731
8.66k
            if (result != 0) {
732
0
              goto fail;
733
0
            }
734
8.66k
            OnigRepeatRange *repeat_range = &reg->repeat_range[repeat_mem];
735
8.66k
            if (repeat_range->lower < repeat_range->upper) {
736
7.54k
              INC_CACHE_OPCODES;
737
7.54k
              cache_point -= lookaround_nesting != 0 ? 2 : 1;
738
7.54k
            }
739
8.66k
            int repeat_bounds = repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower;
740
8.66k
            cache_point += num_cache_points_in_repeat * repeat_range->lower + (num_cache_points_in_repeat + (lookaround_nesting != 0 ? 2 : 1)) * repeat_bounds;
741
26.8k
            for (; cache_opcodes_in_repeat < cache_opcodes; cache_opcodes_in_repeat++) {
742
18.1k
              cache_opcodes_in_repeat->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;
743
18.1k
              cache_opcodes_in_repeat->num_cache_points_in_outer_repeat = num_cache_points_in_repeat;
744
18.1k
            }
745
8.66k
          }
746
8.66k
        }
747
11.3k
        break;
748
11.3k
      case OP_REPEAT_INC:
749
11.3k
      case OP_REPEAT_INC_NG:
750
11.3k
        p += SIZE_MEMNUM;
751
11.3k
        goto exit;
752
0
      case OP_REPEAT_INC_SG:
753
0
      case OP_REPEAT_INC_NG_SG:
754
0
        goto unexpected_bytecode_error;
755
21.2k
      case OP_NULL_CHECK_START:
756
21.2k
        p += SIZE_MEMNUM;
757
21.2k
        break;
758
17.7k
      case OP_NULL_CHECK_END:
759
17.7k
      case OP_NULL_CHECK_END_MEMST_PUSH:
760
17.7k
        p += SIZE_MEMNUM;
761
17.7k
        break;
762
3.52k
      case OP_NULL_CHECK_END_MEMST:
763
3.52k
        p += SIZE_MEMNUM;
764
3.52k
        break;
765
766
17.5k
      case OP_PUSH_POS:
767
32.5k
        lookaround:
768
32.5k
        {
769
32.5k
          OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes;
770
32.5k
          result = init_cache_opcodes_inner(reg, current_repeat_mem, lookaround_nesting + 1, &cache_opcodes, &p, &cache_point);
771
32.5k
          if (result != 0) {
772
0
            goto fail;
773
0
          }
774
32.5k
          UChar* match_addr = p - 1;
775
166k
          for (; cache_opcodes_in_lookaround < cache_opcodes; cache_opcodes_in_lookaround++) {
776
133k
            if (cache_opcodes_in_lookaround->match_addr == NULL) {
777
133k
              cache_opcodes_in_lookaround->match_addr = match_addr;
778
133k
            }
779
133k
          }
780
32.5k
        }
781
0
        break;
782
14.5k
      case OP_PUSH_POS_NOT:
783
14.5k
        p += SIZE_RELADDR;
784
14.5k
        goto lookaround;
785
497
      case OP_PUSH_LOOK_BEHIND_NOT:
786
497
        p += SIZE_RELADDR;
787
497
        p += SIZE_LENGTH;
788
497
        goto lookaround;
789
43.1k
      case OP_PUSH_STOP_BT:
790
43.1k
        {
791
43.1k
          OnigCacheOpcode* cache_opcodes_in_atomic = cache_opcodes;
792
43.1k
          result = init_cache_opcodes_inner(reg, current_repeat_mem, -1, &cache_opcodes, &p, &cache_point);
793
43.1k
          if (result != 0) {
794
0
            goto fail;
795
0
          }
796
43.1k
          UChar* match_addr = p - 1;
797
105k
          for (; cache_opcodes_in_atomic < cache_opcodes; cache_opcodes_in_atomic++) {
798
62.4k
            if (cache_opcodes_in_atomic->match_addr == NULL) {
799
62.4k
              cache_opcodes_in_atomic->match_addr = match_addr;
800
62.4k
            }
801
62.4k
          }
802
43.1k
        }
803
0
        break;
804
17.5k
      case OP_POP_POS:
805
32.0k
      case OP_FAIL_POS:
806
32.5k
      case OP_FAIL_LOOK_BEHIND_NOT:
807
75.6k
      case OP_POP_STOP_BT:
808
75.6k
        goto exit;
809
4.32k
      case OP_LOOK_BEHIND:
810
4.32k
        p += SIZE_LENGTH;
811
4.32k
        break;
812
813
0
      case OP_ABSENT_END:
814
0
      case OP_ABSENT:
815
0
        goto unexpected_bytecode_error;
816
817
0
      case OP_CALL:
818
0
      case OP_RETURN:
819
0
        goto unexpected_bytecode_error;
820
821
0
      case OP_CONDITION:
822
0
        goto unexpected_bytecode_error;
823
824
0
      case OP_STATE_CHECK_PUSH:
825
0
      case OP_STATE_CHECK_PUSH_OR_JUMP:
826
0
      case OP_STATE_CHECK:
827
0
      case OP_STATE_CHECK_ANYCHAR_STAR:
828
0
      case OP_STATE_CHECK_ANYCHAR_ML_STAR:
829
0
        goto unexpected_bytecode_error;
830
831
0
      case OP_SET_OPTION_PUSH:
832
0
      case OP_SET_OPTION:
833
0
        p += SIZE_OPTION;
834
0
        break;
835
836
0
      default:
837
0
        goto bytecode_error;
838
1.72M
    }
839
1.72M
  }
840
841
129k
exit:
842
129k
  *cache_opcodes_ptr = cache_opcodes;
843
129k
  *pp = p;
844
129k
  *num_cache_points_ptr = cache_point;
845
129k
  return 0;
846
847
0
fail:
848
0
  return result;
849
850
0
unexpected_bytecode_error:
851
0
  return ONIGERR_UNEXPECTED_BYTECODE;
852
853
0
bytecode_error:
854
0
  return ONIGERR_UNDEFINED_BYTECODE;
855
129k
}
856
857
/* collect cache opcodes from the given regex program, and compute the total number of cache points. */
858
static OnigPosition
859
init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes_ptr, long* num_cache_points_ptr)
860
42.4k
{
861
42.4k
  UChar* p = reg->p;
862
42.4k
  *num_cache_points_ptr = 0;
863
42.4k
  OnigPosition result = init_cache_opcodes_inner(reg, -1, 0, &cache_opcodes_ptr, &p, num_cache_points_ptr);
864
42.4k
  if (result == 0 && p != reg->p + reg->used) {
865
0
    return ONIGERR_UNDEFINED_BYTECODE;
866
0
  }
867
868
42.4k
  return result;
869
42.4k
}
870
#else
871
static OnigPosition
872
count_num_cache_opcodes(regex_t* reg, long* num_cache_opcodes)
873
{
874
  *num_cache_opcodes = NUM_CACHE_OPCODES_IMPOSSIBLE;
875
  return 0;
876
}
877
#endif /* USE_MATCH_CACHE */
878
879
extern int
880
onig_check_linear_time(OnigRegexType* reg)
881
0
{
882
0
  long num_cache_opcodes = 0;
883
0
  count_num_cache_opcodes(reg, &num_cache_opcodes);
884
0
  return num_cache_opcodes != NUM_CACHE_OPCODES_IMPOSSIBLE;
885
0
}
886
887
extern void
888
onig_region_clear(OnigRegion* region)
889
1.17M
{
890
1.17M
  int i;
891
892
13.0M
  for (i = 0; i < region->num_regs; i++) {
893
11.8M
    region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
894
11.8M
  }
895
#ifdef USE_CAPTURE_HISTORY
896
  history_root_free(region);
897
#endif
898
1.17M
}
899
900
extern int
901
onig_region_resize(OnigRegion* region, int n)
902
1.17M
{
903
1.17M
  region->num_regs = n;
904
905
1.17M
  if (n < ONIG_NREGION)
906
838k
    n = ONIG_NREGION;
907
908
1.17M
  if (region->allocated == 0) {
909
1.17M
    region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
910
1.17M
    if (region->beg == 0)
911
0
      return ONIGERR_MEMORY;
912
913
1.17M
    region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
914
1.17M
    if (region->end == 0) {
915
0
      xfree(region->beg);
916
0
      return ONIGERR_MEMORY;
917
0
    }
918
919
1.17M
    region->allocated = n;
920
1.17M
  }
921
0
  else if (region->allocated < n) {
922
0
    OnigPosition *tmp;
923
924
0
    region->allocated = 0;
925
0
    tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
926
0
    if (tmp == 0) {
927
0
      xfree(region->beg);
928
0
      xfree(region->end);
929
0
      return ONIGERR_MEMORY;
930
0
    }
931
0
    region->beg = tmp;
932
0
    tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
933
0
    if (tmp == 0) {
934
0
      xfree(region->beg);
935
0
      xfree(region->end);
936
0
      return ONIGERR_MEMORY;
937
0
    }
938
0
    region->end = tmp;
939
940
0
    region->allocated = n;
941
0
  }
942
943
1.17M
  return 0;
944
1.17M
}
945
946
static int
947
onig_region_resize_clear(OnigRegion* region, int n)
948
1.17M
{
949
1.17M
  int r;
950
951
1.17M
  r = onig_region_resize(region, n);
952
1.17M
  if (r != 0) return r;
953
1.17M
  onig_region_clear(region);
954
1.17M
  return 0;
955
1.17M
}
956
957
extern int
958
onig_region_set(OnigRegion* region, int at, int beg, int end)
959
0
{
960
0
  if (at < 0) return ONIGERR_INVALID_ARGUMENT;
961
962
0
  if (at >= region->allocated) {
963
0
    int r = onig_region_resize(region, at + 1);
964
0
    if (r < 0) return r;
965
0
  }
966
967
0
  region->beg[at] = beg;
968
0
  region->end[at] = end;
969
0
  return 0;
970
0
}
971
972
extern void
973
onig_region_init(OnigRegion* region)
974
0
{
975
0
  region->num_regs     = 0;
976
0
  region->allocated    = 0;
977
0
  region->beg          = (OnigPosition* )0;
978
0
  region->end          = (OnigPosition* )0;
979
#ifdef USE_CAPTURE_HISTORY
980
  region->history_root = (OnigCaptureTreeNode* )0;
981
#endif
982
0
}
983
984
extern OnigRegion*
985
onig_region_new(void)
986
0
{
987
0
  OnigRegion* r;
988
989
0
  r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
990
0
  if (r)
991
0
    onig_region_init(r);
992
0
  return r;
993
0
}
994
995
extern void
996
onig_region_free(OnigRegion* r, int free_self)
997
1.17M
{
998
1.17M
  if (r) {
999
1.17M
    if (r->allocated > 0) {
1000
1.17M
      xfree(r->beg);
1001
1.17M
      xfree(r->end);
1002
1.17M
    }
1003
#ifdef USE_CAPTURE_HISTORY
1004
    history_root_free(r);
1005
#endif
1006
1.17M
    if (free_self) {
1007
0
      xfree(r);
1008
0
    }
1009
1.17M
    else {
1010
1.17M
      memset(r, 0, sizeof(OnigRegion));
1011
1.17M
    }
1012
1.17M
  }
1013
1.17M
}
1014
1015
extern void
1016
onig_region_copy(OnigRegion* to, const OnigRegion* from)
1017
0
{
1018
0
#define RREGC_SIZE   (sizeof(int) * from->num_regs)
1019
0
  int i, r;
1020
1021
0
  if (to == from) return;
1022
1023
0
  r = onig_region_resize(to, from->num_regs);
1024
0
  if (r) return;
1025
1026
0
  for (i = 0; i < from->num_regs; i++) {
1027
0
    to->beg[i] = from->beg[i];
1028
0
    to->end[i] = from->end[i];
1029
0
  }
1030
0
  to->num_regs = from->num_regs;
1031
1032
#ifdef USE_CAPTURE_HISTORY
1033
  history_root_free(to);
1034
1035
  if (IS_NOT_NULL(from->history_root)) {
1036
    to->history_root = history_tree_clone(from->history_root);
1037
  }
1038
#endif
1039
0
}
1040
1041
1042
/** stack **/
1043
492M
#define INVALID_STACK_INDEX   -1
1044
1045
/* stack type */
1046
/* used by normal-POP */
1047
#define STK_ALT                      0x0001
1048
4.65M
#define STK_LOOK_BEHIND_NOT          0x0002
1049
3.25M
#define STK_POS_NOT                  0x0003
1050
/* handled by normal-POP */
1051
3.31G
#define STK_MEM_START                0x0100
1052
604M
#define STK_MEM_END                  0x8200
1053
850M
#define STK_REPEAT_INC               0x0300
1054
#define STK_STATE_CHECK_MARK         0x1000
1055
/* avoided by normal-POP */
1056
839M
#define STK_NULL_CHECK_START         0x3000
1057
154M
#define STK_NULL_CHECK_END           0x5000  /* for recursive call */
1058
29.4M
#define STK_MEM_END_MARK             0x8400
1059
26.2M
#define STK_POS                      0x0500  /* used when POP-POS */
1060
553M
#define STK_STOP_BT                  0x0600  /* mark for "(?>...)" */
1061
2.53G
#define STK_REPEAT                   0x0700
1062
2.88G
#define STK_CALL_FRAME               0x0800
1063
2.80G
#define STK_RETURN                   0x0900
1064
544M
#define STK_VOID                     0x0a00  /* for fill a blank */
1065
80.8M
#define STK_ABSENT_POS               0x0b00  /* for absent */
1066
33.4M
#define STK_ABSENT                   0x0c00  /* absent inner loop marker */
1067
1.92G
#define STK_MATCH_CACHE_POINT        0x0d00  /* for the match cache optimization */
1068
1.26G
#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00
1069
1070
/* stack type check mask */
1071
2.82G
#define STK_MASK_POP_USED            0x00ff
1072
873M
#define STK_MASK_TO_VOID_TARGET      0x10ff
1073
735M
#define STK_MASK_MEM_END_OR_MARK     0x8000  /* MEM_END or MEM_END_MARK */
1074
1075
#ifdef USE_MATCH_CACHE
1076
1.17M
#define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\
1077
1.17M
  (msa).match_cache_status = MATCH_CACHE_STATUS_UNINIT;\
1078
1.17M
  (msa).num_fails = 0;\
1079
1.17M
  (msa).num_cache_opcodes = NUM_CACHE_OPCODES_UNINIT;\
1080
1.17M
  (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1081
1.17M
  (msa).num_cache_points = 0;\
1082
1.17M
  (msa).match_cache_buf = (uint8_t*)NULL;\
1083
1.17M
} while(0)
1084
1.17M
#define MATCH_ARG_FREE_MATCH_CACHE(msa) do {\
1085
1.17M
  xfree((msa).cache_opcodes);\
1086
1.17M
  xfree((msa).match_cache_buf);\
1087
1.17M
  (msa).cache_opcodes = (OnigCacheOpcode*)NULL;\
1088
1.17M
  (msa).match_cache_buf = (uint8_t*)NULL;\
1089
1.17M
} while(0)
1090
#else
1091
#define MATCH_ARG_INIT_MATCH_CACHE(msa)
1092
#define MATCH_ARG_FREE_MATCH_CACHE(msa)
1093
#endif
1094
1095
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1096
1.17M
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1097
1.17M
  (msa).stack_p  = (void* )0;\
1098
1.17M
  (msa).options  = (arg_option);\
1099
1.17M
  (msa).region   = (arg_region);\
1100
1.17M
  (msa).start    = (arg_start);\
1101
1.17M
  (msa).gpos     = (arg_gpos);\
1102
1.17M
  (msa).best_len = ONIG_MISMATCH;\
1103
1.17M
  (msa).counter  = 0;\
1104
1.17M
  (msa).end_time = 0;\
1105
1.17M
  MATCH_ARG_INIT_MATCH_CACHE(msa);\
1106
1.17M
} while(0)
1107
#else
1108
# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
1109
  (msa).stack_p  = (void* )0;\
1110
  (msa).options  = (arg_option);\
1111
  (msa).region   = (arg_region);\
1112
  (msa).start    = (arg_start);\
1113
  (msa).gpos     = (arg_gpos);\
1114
  (msa).counter  = 0;\
1115
  (msa).end_time = 0;\
1116
  MATCH_ARG_INIT_MATCH_CACHE(msa);\
1117
} while(0)
1118
#endif
1119
1120
#ifdef USE_COMBINATION_EXPLOSION_CHECK
1121
1122
# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE  16
1123
1124
# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {  \
1125
  if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
1126
    unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
1127
    offset = ((offset) * (state_num)) >> 3;\
1128
    if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
1129
      if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
1130
        (msa).state_check_buff = (void* )xmalloc(size);\
1131
        CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
1132
      }\
1133
      else \
1134
        (msa).state_check_buff = (void* )xalloca(size);\
1135
      xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
1136
              (size_t )(size - (offset))); \
1137
      (msa).state_check_buff_size = size;\
1138
    }\
1139
    else {\
1140
      (msa).state_check_buff = (void* )0;\
1141
      (msa).state_check_buff_size = 0;\
1142
    }\
1143
  }\
1144
  else {\
1145
    (msa).state_check_buff = (void* )0;\
1146
    (msa).state_check_buff_size = 0;\
1147
  }\
1148
  } while(0)
1149
1150
# define MATCH_ARG_FREE(msa) do {\
1151
  xfree((msa).stack_p);\
1152
  if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
1153
    xfree((msa).state_check_buff);\
1154
  }\
1155
  MATCH_ARG_FREE_MATCH_CACHE(msa);\
1156
} while(0)
1157
#else /* USE_COMBINATION_EXPLOSION_CHECK */
1158
1.17M
# define MATCH_ARG_FREE(msa) do {\
1159
1.17M
  xfree((msa).stack_p);\
1160
1.17M
  MATCH_ARG_FREE_MATCH_CACHE(msa);\
1161
1.17M
} while (0)
1162
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1163
1164
1165
1166
15.3M
#define MAX_PTR_NUM 100
1167
1168
7.67M
#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num)  do {\
1169
7.67M
  if (ptr_num > MAX_PTR_NUM) {\
1170
124k
    alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
1171
124k
    heap_addr  = alloc_addr;\
1172
124k
    if (msa->stack_p) {\
1173
8.43k
      stk_alloc = (OnigStackType* )(msa->stack_p);\
1174
8.43k
      stk_base  = stk_alloc;\
1175
8.43k
      stk       = stk_base;\
1176
8.43k
      stk_end   = stk_base + msa->stack_n;\
1177
8.43k
    }\
1178
124k
    else {\
1179
115k
      stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1180
115k
      stk_base  = stk_alloc;\
1181
115k
      stk       = stk_base;\
1182
115k
      stk_end   = stk_base + (stack_num);\
1183
115k
    }\
1184
124k
  }\
1185
7.67M
  else if (msa->stack_p) {\
1186
3.49M
    alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1187
3.49M
    heap_addr  = NULL;\
1188
3.49M
    stk_alloc  = (OnigStackType* )(msa->stack_p);\
1189
3.49M
    stk_base   = stk_alloc;\
1190
3.49M
    stk        = stk_base;\
1191
3.49M
    stk_end    = stk_base + msa->stack_n;\
1192
3.49M
  }\
1193
7.54M
  else {\
1194
4.05M
    alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1195
4.05M
                       + sizeof(OnigStackType) * (stack_num));\
1196
4.05M
    heap_addr  = NULL;\
1197
4.05M
    stk_alloc  = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1198
4.05M
    stk_base   = stk_alloc;\
1199
4.05M
    stk        = stk_base;\
1200
4.05M
    stk_end    = stk_base + (stack_num);\
1201
4.05M
  }\
1202
7.67M
} while(0)
1203
1204
7.67M
#define STACK_SAVE do{\
1205
7.67M
  if (stk_base != stk_alloc) {\
1206
175k
    msa->stack_p = stk_base;\
1207
175k
    msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1208
175k
  };\
1209
7.67M
} while(0)
1210
1211
static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1212
1213
extern unsigned int
1214
onig_get_match_stack_limit_size(void)
1215
0
{
1216
0
  return MatchStackLimitSize;
1217
0
}
1218
1219
extern int
1220
onig_set_match_stack_limit_size(unsigned int size)
1221
0
{
1222
0
  MatchStackLimitSize = size;
1223
0
  return 0;
1224
0
}
1225
1226
static int
1227
stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1228
             OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1229
465k
{
1230
465k
  size_t n;
1231
465k
  OnigStackType *x, *stk_base, *stk_end, *stk;
1232
1233
465k
  stk_base = *arg_stk_base;
1234
465k
  stk_end  = *arg_stk_end;
1235
465k
  stk      = *arg_stk;
1236
1237
465k
  n = stk_end - stk_base;
1238
465k
  if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1239
170k
    x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1240
170k
    if (IS_NULL(x)) {
1241
0
      STACK_SAVE;
1242
0
      return ONIGERR_MEMORY;
1243
0
    }
1244
170k
    xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1245
170k
    n *= 2;
1246
170k
  }
1247
294k
  else {
1248
294k
    unsigned int limit_size = MatchStackLimitSize;
1249
294k
    n *= 2;
1250
294k
    if (limit_size != 0 && n > limit_size) {
1251
0
      if ((unsigned int )(stk_end - stk_base) == limit_size)
1252
0
        return ONIGERR_MATCH_STACK_LIMIT_OVER;
1253
0
      else
1254
0
        n = limit_size;
1255
0
    }
1256
294k
    x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1257
294k
    if (IS_NULL(x)) {
1258
0
      STACK_SAVE;
1259
0
      return ONIGERR_MEMORY;
1260
0
    }
1261
294k
  }
1262
465k
  *arg_stk      = x + (stk - stk_base);
1263
465k
  *arg_stk_base = x;
1264
465k
  *arg_stk_end  = x + n;
1265
465k
  return 0;
1266
465k
}
1267
1268
3.15G
#define STACK_ENSURE(n) do {\
1269
3.15G
  if (stk_end - stk < (n)) {\
1270
465k
    int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1271
465k
    if (r != 0) {\
1272
0
      STACK_SAVE;\
1273
0
      xfree(xmalloc_base);\
1274
0
      return r;\
1275
0
    }\
1276
465k
  }\
1277
3.15G
} while(0)
1278
1279
774M
#define STACK_AT(index)        (stk_base + (index))
1280
385M
#define GET_STACK_INDEX(stk)   ((stk) - stk_base)
1281
1282
369M
#define STACK_PUSH_TYPE(stack_type) do {\
1283
369M
  STACK_ENSURE(1);\
1284
369M
  stk->type = (stack_type);\
1285
369M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1286
369M
  STACK_INC;\
1287
369M
} while(0)
1288
1289
873M
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1290
1291
#ifdef USE_COMBINATION_EXPLOSION_CHECK
1292
# define STATE_CHECK_POS(s,snum) \
1293
  (((s) - str) * num_comb_exp_check + ((snum) - 1))
1294
# define STATE_CHECK_VAL(v,snum) do {\
1295
  if (state_check_buff != NULL) {\
1296
    ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1297
    (v) = state_check_buff[x/8] & (1<<(x%8));\
1298
  }\
1299
  else (v) = 0;\
1300
} while(0)
1301
1302
1303
# define ELSE_IF_STATE_CHECK_MARK(stk) \
1304
  else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1305
    ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1306
    state_check_buff[x/8] |= (1<<(x%8));        \
1307
  }
1308
1309
# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1310
  STACK_ENSURE(1);\
1311
  stk->type = (stack_type);\
1312
  stk->u.state.pcode     = (pat);\
1313
  stk->u.state.pstr      = (s);\
1314
  stk->u.state.pstr_prev = (sprev);\
1315
  stk->u.state.state_check = 0;\
1316
  stk->u.state.pkeep     = (keep);\
1317
  STACK_INC;\
1318
} while(0)
1319
1320
# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1321
  stk->type = (stack_type);\
1322
  stk->u.state.pcode = (pat);\
1323
  stk->u.state.state_check = 0;\
1324
  STACK_INC;\
1325
} while(0)
1326
1327
# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1328
  STACK_ENSURE(1);\
1329
  stk->type = STK_ALT;\
1330
  stk->u.state.pcode     = (pat);\
1331
  stk->u.state.pstr      = (s);\
1332
  stk->u.state.pstr_prev = (sprev);\
1333
  stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1334
  stk->u.state.pkeep     = (keep);\
1335
  STACK_INC;\
1336
} while(0)
1337
1338
# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1339
  if (state_check_buff != NULL) {\
1340
    STACK_ENSURE(1);\
1341
    stk->type = STK_STATE_CHECK_MARK;\
1342
    stk->u.state.pstr = (s);\
1343
    stk->u.state.state_check = (snum);\
1344
    STACK_INC;\
1345
  }\
1346
} while(0)
1347
1348
#else /* USE_COMBINATION_EXPLOSION_CHECK */
1349
1350
# define ELSE_IF_STATE_CHECK_MARK(stk)
1351
1352
1.80G
# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1353
1.80G
  STACK_ENSURE(1);\
1354
1.80G
  stk->type = (stack_type);\
1355
1.80G
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356
1.80G
  stk->u.state.pcode     = (pat);\
1357
1.80G
  stk->u.state.pstr      = (s);\
1358
1.80G
  stk->u.state.pstr_prev = (sprev);\
1359
1.80G
  stk->u.state.pkeep     = (keep);\
1360
1.80G
  STACK_INC;\
1361
1.80G
} while(0)
1362
1363
7.67M
# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1364
7.67M
  stk->type = (stack_type);\
1365
7.67M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1366
7.67M
  stk->u.state.pcode = (pat);\
1367
7.67M
  STACK_INC;\
1368
7.67M
} while(0)
1369
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1370
1371
1.78G
#define STACK_PUSH_ALT(pat,s,sprev,keep)     STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1372
7.19M
#define STACK_PUSH_POS(s,sprev,keep)         STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1373
10.4M
#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1374
74.5M
#define STACK_PUSH_ABSENT                    STACK_PUSH_TYPE(STK_ABSENT)
1375
295M
#define STACK_PUSH_STOP_BT                   STACK_PUSH_TYPE(STK_STOP_BT)
1376
#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1377
9.59M
        STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1378
1379
102M
#define STACK_PUSH_REPEAT(id, pat) do {\
1380
102M
  STACK_ENSURE(1);\
1381
102M
  stk->type = STK_REPEAT;\
1382
102M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1383
102M
  stk->u.repeat.num    = (id);\
1384
102M
  stk->u.repeat.pcode  = (pat);\
1385
102M
  stk->u.repeat.count  = 0;\
1386
102M
  STACK_INC;\
1387
102M
} while(0)
1388
1389
176M
#define STACK_PUSH_REPEAT_INC(sindex) do {\
1390
176M
  STACK_ENSURE(1);\
1391
176M
  stk->type = STK_REPEAT_INC;\
1392
176M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1393
176M
  stk->u.repeat_inc.si  = (sindex);\
1394
176M
  STACK_INC;\
1395
176M
} while(0)
1396
1397
151M
#define STACK_PUSH_MEM_START(mnum, s) do {\
1398
151M
  STACK_ENSURE(1);\
1399
151M
  stk->type = STK_MEM_START;\
1400
151M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1401
151M
  stk->u.mem.num      = (mnum);\
1402
151M
  stk->u.mem.pstr     = (s);\
1403
151M
  stk->u.mem.start    = mem_start_stk[mnum];\
1404
151M
  stk->u.mem.end      = mem_end_stk[mnum];\
1405
151M
  mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1406
151M
  mem_end_stk[mnum]   = INVALID_STACK_INDEX;\
1407
151M
  STACK_INC;\
1408
151M
} while(0)
1409
1410
35.7M
#define STACK_PUSH_MEM_END(mnum, s) do {\
1411
35.7M
  STACK_ENSURE(1);\
1412
35.7M
  stk->type = STK_MEM_END;\
1413
35.7M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1414
35.7M
  stk->u.mem.num    = (mnum);\
1415
35.7M
  stk->u.mem.pstr   = (s);\
1416
35.7M
  stk->u.mem.start  = mem_start_stk[mnum];\
1417
35.7M
  stk->u.mem.end    = mem_end_stk[mnum];\
1418
35.7M
  mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1419
35.7M
  STACK_INC;\
1420
35.7M
} while(0)
1421
1422
29.4M
#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1423
29.4M
  STACK_ENSURE(1);\
1424
29.4M
  stk->type = STK_MEM_END_MARK;\
1425
29.4M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1426
29.4M
  stk->u.mem.num = (mnum);\
1427
29.4M
  STACK_INC;\
1428
29.4M
} while(0)
1429
1430
33.8M
#define STACK_GET_MEM_START(mnum, k) do {\
1431
33.8M
  int level = 0;\
1432
33.8M
  k = stk;\
1433
735M
  while (k > stk_base) {\
1434
735M
    k--;\
1435
735M
    if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1436
735M
      && k->u.mem.num == (mnum)) {\
1437
64.6M
      level++;\
1438
64.6M
    }\
1439
735M
    else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1440
98.5M
      if (level == 0) break;\
1441
98.5M
      level--;\
1442
64.6M
    }\
1443
735M
  }\
1444
33.8M
} while(0)
1445
1446
#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1447
  int level = 0;\
1448
  while (k < stk) {\
1449
    if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1450
      if (level == 0) (start) = k->u.mem.pstr;\
1451
      level++;\
1452
    }\
1453
    else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1454
      level--;\
1455
      if (level == 0) {\
1456
        (end) = k->u.mem.pstr;\
1457
        break;\
1458
      }\
1459
    }\
1460
    k++;\
1461
  }\
1462
} while(0)
1463
1464
158M
#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1465
158M
  STACK_ENSURE(1);\
1466
158M
  stk->type = STK_NULL_CHECK_START;\
1467
158M
  stk->null_check = (OnigStackIndex)(stk - stk_base);\
1468
158M
  stk->u.null_check.num  = (cnum);\
1469
158M
  stk->u.null_check.pstr = (s);\
1470
158M
  STACK_INC;\
1471
158M
} while(0)
1472
1473
5.53M
#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1474
5.53M
  STACK_ENSURE(1);\
1475
5.53M
  stk->type = STK_NULL_CHECK_END;\
1476
5.53M
  stk->null_check = (OnigStackIndex)(stk - stk_base);\
1477
5.53M
  stk->u.null_check.num  = (cnum);\
1478
5.53M
  STACK_INC;\
1479
5.53M
} while(0)
1480
1481
32.2M
#define STACK_PUSH_CALL_FRAME(pat) do {\
1482
32.2M
  STACK_ENSURE(1);\
1483
32.2M
  stk->type = STK_CALL_FRAME;\
1484
32.2M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1485
32.2M
  stk->u.call_frame.ret_addr = (pat);\
1486
32.2M
  STACK_INC;\
1487
32.2M
} while(0)
1488
1489
12.0M
#define STACK_PUSH_RETURN do {\
1490
12.0M
  STACK_ENSURE(1);\
1491
12.0M
  stk->type = STK_RETURN;\
1492
12.0M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1493
12.0M
  STACK_INC;\
1494
12.0M
} while(0)
1495
1496
80.8M
#define STACK_PUSH_ABSENT_POS(start, end) do {\
1497
80.8M
  STACK_ENSURE(1);\
1498
80.8M
  stk->type = STK_ABSENT_POS;\
1499
80.8M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1500
80.8M
  stk->u.absent_pos.abs_pstr = (start);\
1501
80.8M
  stk->u.absent_pos.end_pstr = (end);\
1502
80.8M
  STACK_INC;\
1503
80.8M
} while(0)
1504
1505
85.0M
#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\
1506
85.0M
  STACK_ENSURE(1);\
1507
85.0M
  stk->type = STK_MATCH_CACHE_POINT;\
1508
85.0M
  stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1509
85.0M
  stk->u.match_cache_point.index = (match_cache_point_index);\
1510
85.0M
  stk->u.match_cache_point.mask = (match_cache_point_mask);\
1511
85.0M
  STACK_INC;\
1512
85.0M
} while(0)
1513
1514
1515
#ifdef ONIG_DEBUG
1516
# define STACK_BASE_CHECK(p, at) \
1517
  if ((p) < stk_base) {\
1518
    fprintf(stderr, "at %s\n", at);\
1519
    goto stack_error;\
1520
  }
1521
#else
1522
# define STACK_BASE_CHECK(p, at)
1523
#endif
1524
1525
#ifdef ONIG_DEBUG_MATCH_CACHE
1526
# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);
1527
#else
1528
82.4M
# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0)
1529
#endif
1530
1531
#ifdef USE_MATCH_CACHE
1532
723M
# define INC_NUM_FAILS msa->num_fails++
1533
1.30G
# define MEMOIZE_MATCH_CACHE_POINT do {\
1534
1.30G
    if (stk->type == STK_MATCH_CACHE_POINT) {\
1535
54.5M
      msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\
1536
54.5M
      MATCH_CACHE_DEBUG_MEMOIZE(stk);\
1537
54.5M
    }\
1538
1.30G
    else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\
1539
9.95M
      memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1540
9.95M
      MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1541
9.95M
    }\
1542
1.30G
  } while(0)
1543
96.1M
# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\
1544
96.1M
    if (stkp->type == STK_MATCH_CACHE_POINT) {\
1545
6.22M
      stkp->type = STK_VOID;\
1546
6.22M
      memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\
1547
6.22M
      MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1548
6.22M
    }\
1549
96.1M
  } while(0)
1550
13.1M
# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\
1551
13.1M
    if (stk->type == STK_MATCH_CACHE_POINT) {\
1552
11.7M
      memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\
1553
11.7M
      MATCH_CACHE_DEBUG_MEMOIZE(stkp);\
1554
11.7M
    }\
1555
13.1M
  } while(0)
1556
#else
1557
# define INC_NUM_FAILS ((void) 0)
1558
# define MEMOIZE_MATCH_CACHE_POINT ((void) 0)
1559
# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) ((void) 0)
1560
#endif
1561
1562
0
#define STACK_POP_ONE do {\
1563
0
  stk--;\
1564
0
  STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1565
0
} while(0)
1566
1567
1.51G
#define STACK_POP  do {\
1568
1.51G
  switch (pop_level) {\
1569
519M
  case STACK_POP_LEVEL_FREE:\
1570
709M
    while (1) {\
1571
709M
      stk--;\
1572
709M
      STACK_BASE_CHECK(stk, "STACK_POP"); \
1573
709M
      if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1574
709M
      ELSE_IF_STATE_CHECK_MARK(stk);\
1575
189M
      MEMOIZE_MATCH_CACHE_POINT;\
1576
189M
    }\
1577
519M
    break;\
1578
331M
  case STACK_POP_LEVEL_MEM_START:\
1579
740M
    while (1) {\
1580
740M
      stk--;\
1581
740M
      STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1582
740M
      if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1583
740M
      else if (stk->type == STK_MEM_START) {\
1584
85.4M
        mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1585
85.4M
        mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1586
85.4M
      }\
1587
740M
      ELSE_IF_STATE_CHECK_MARK(stk);\
1588
409M
      MEMOIZE_MATCH_CACHE_POINT;\
1589
409M
    }\
1590
331M
    break;\
1591
662M
  default:\
1592
1.37G
    while (1) {\
1593
1.37G
      stk--;\
1594
1.37G
      STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1595
1.37G
      if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
1596
1.37G
      else if (stk->type == STK_MEM_START) {\
1597
63.5M
        mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1598
63.5M
        mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1599
63.5M
      }\
1600
709M
      else if (stk->type == STK_REPEAT_INC) {\
1601
169M
        STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1602
169M
      }\
1603
646M
      else if (stk->type == STK_MEM_END) {\
1604
34.8M
        mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1605
34.8M
        mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1606
34.8M
      }\
1607
1.37G
      ELSE_IF_STATE_CHECK_MARK(stk);\
1608
709M
      MEMOIZE_MATCH_CACHE_POINT;\
1609
709M
    }\
1610
662M
    break;\
1611
1.51G
  }\
1612
1.51G
} while(0)
1613
1614
700k
#define STACK_POP_TIL_POS_NOT  do {\
1615
3.25M
  while (1) {\
1616
3.25M
    stk--;\
1617
3.25M
    STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1618
3.25M
    if (stk->type == STK_POS_NOT) break;\
1619
3.25M
    else if (stk->type == STK_MEM_START) {\
1620
927
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1621
927
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1622
927
    }\
1623
2.55M
    else if (stk->type == STK_REPEAT_INC) {\
1624
7.97k
      STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1625
7.97k
    }\
1626
2.55M
    else if (stk->type == STK_MEM_END) {\
1627
848
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1628
848
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1629
848
    }\
1630
2.54M
    else if (IS_TO_VOID_TARGET(stk)) {\
1631
2.21M
      INC_NUM_FAILS;\
1632
2.21M
    }\
1633
3.25M
    ELSE_IF_STATE_CHECK_MARK(stk);\
1634
2.55M
    MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\
1635
2.55M
  }\
1636
700k
} while(0)
1637
1638
4.65M
#define STACK_POP_TIL_LOOK_BEHIND_NOT  do {\
1639
4.65M
  while (1) {\
1640
4.65M
    stk--;\
1641
4.65M
    STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1642
4.65M
    if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1643
4.65M
    else if (stk->type == STK_MEM_START) {\
1644
0
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1645
0
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1646
0
    }\
1647
267
    else if (stk->type == STK_REPEAT_INC) {\
1648
0
      STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1649
0
    }\
1650
267
    else if (stk->type == STK_MEM_END) {\
1651
0
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1652
0
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1653
0
    }\
1654
4.65M
    ELSE_IF_STATE_CHECK_MARK(stk);\
1655
267
  }\
1656
4.65M
} while(0)
1657
1658
7.95M
#define STACK_POP_TIL_ABSENT  do {\
1659
33.4M
  while (1) {\
1660
33.4M
    stk--;\
1661
33.4M
    STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1662
33.4M
    if (stk->type == STK_ABSENT) break;\
1663
33.4M
    else if (stk->type == STK_MEM_START) {\
1664
11.0k
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1665
11.0k
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1666
11.0k
    }\
1667
25.5M
    else if (stk->type == STK_REPEAT_INC) {\
1668
7.10M
      STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1669
7.10M
    }\
1670
25.5M
    else if (stk->type == STK_MEM_END) {\
1671
201
      mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1672
201
      mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
1673
201
    }\
1674
33.4M
    ELSE_IF_STATE_CHECK_MARK(stk);\
1675
25.5M
  }\
1676
7.95M
} while(0)
1677
1678
80.8M
#define STACK_POP_ABSENT_POS(start, end) do {\
1679
80.8M
  stk--;\
1680
80.8M
  STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1681
80.8M
  (start) = stk->u.absent_pos.abs_pstr;\
1682
80.8M
  (end) = stk->u.absent_pos.end_pstr;\
1683
80.8M
} while(0)
1684
1685
6.71M
#define STACK_POS_END(k) do {\
1686
6.71M
  k = stk;\
1687
100M
  while (1) {\
1688
100M
    k--;\
1689
100M
    STACK_BASE_CHECK(k, "STACK_POS_END"); \
1690
100M
    if (IS_TO_VOID_TARGET(k)) {\
1691
87.2M
      INC_NUM_FAILS;\
1692
87.2M
      k->type = STK_VOID;\
1693
87.2M
    }\
1694
100M
    else if (k->type == STK_POS) {\
1695
6.71M
      k->type = STK_VOID;\
1696
6.71M
      break;\
1697
6.71M
    }\
1698
100M
    MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\
1699
93.6M
  }\
1700
6.71M
} while(0)
1701
1702
210M
#define STACK_STOP_BT_END do {\
1703
210M
  OnigStackType *k = stk;\
1704
770M
  while (1) {\
1705
770M
    k--;\
1706
770M
    STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1707
770M
    if (IS_TO_VOID_TARGET(k)) {\
1708
232M
      INC_NUM_FAILS;\
1709
232M
      k->type = STK_VOID;\
1710
232M
    }\
1711
770M
    else if (k->type == STK_STOP_BT) {\
1712
210M
      k->type = STK_VOID;\
1713
210M
      break;\
1714
210M
    }\
1715
538M
    else if (k->type == STK_MATCH_CACHE_POINT) {\
1716
9.96M
      k->type = STK_ATOMIC_MATCH_CACHE_POINT;\
1717
9.96M
    }\
1718
770M
  }\
1719
210M
} while(0)
1720
1721
1.73M
#define STACK_STOP_BT_FAIL do {\
1722
14.8M
  while (1) {\
1723
14.8M
    stk--;\
1724
14.8M
    STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \
1725
14.8M
    if (stk->type == STK_STOP_BT) {\
1726
1.73M
      stk->type = STK_VOID;\
1727
1.73M
      break;\
1728
1.73M
    }\
1729
14.8M
    MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\
1730
13.1M
  }\
1731
1.73M
} while(0)
1732
1733
89.5M
#define STACK_NULL_CHECK(isnull,id,s) do {\
1734
89.5M
  OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1735
297M
  while (1) {\
1736
297M
    k--;\
1737
297M
    STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1738
297M
    if (k->type == STK_NULL_CHECK_START) {\
1739
91.9M
      if (k->u.null_check.num == (id)) {\
1740
89.5M
        (isnull) = (k->u.null_check.pstr == (s));\
1741
89.5M
        break;\
1742
89.5M
      }\
1743
91.9M
    }\
1744
297M
  }\
1745
89.5M
} while(0)
1746
1747
#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1748
  int level = 0;\
1749
  OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1750
  while (1) {\
1751
    k--;\
1752
    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1753
    if (k->type == STK_NULL_CHECK_START) {\
1754
      if (k->u.null_check.num == (id)) {\
1755
        if (level == 0) {\
1756
          (isnull) = (k->u.null_check.pstr == (s));\
1757
          break;\
1758
        }\
1759
        else level--;\
1760
      }\
1761
    }\
1762
    else if (k->type == STK_NULL_CHECK_END) {\
1763
      level++;\
1764
    }\
1765
  }\
1766
} while(0)
1767
1768
130M
#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
1769
130M
  OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1770
211M
  while (1) {\
1771
211M
    k--;\
1772
211M
    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1773
211M
    if (k->type == STK_NULL_CHECK_START) {\
1774
144M
      if (k->u.null_check.num == (id)) {\
1775
130M
        if (k->u.null_check.pstr != (s)) {\
1776
28.7M
          (isnull) = 0;\
1777
28.7M
          break;\
1778
28.7M
        }\
1779
130M
        else {\
1780
102M
          UChar* endp;\
1781
102M
          (isnull) = 1;\
1782
640M
          while (k < stk) {\
1783
572M
            if (k->type == STK_MEM_START) {\
1784
101M
              if (k->u.mem.end == INVALID_STACK_INDEX) {\
1785
5.41M
                (isnull) = 0; break;\
1786
5.41M
              }\
1787
101M
              if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1788
95.8M
                endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1789
95.8M
              else\
1790
95.8M
                endp = (UChar* )k->u.mem.end;\
1791
95.8M
              if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1792
28.9M
                (isnull) = 0; break;\
1793
28.9M
              }\
1794
95.8M
              else if (endp != s) {\
1795
2.31M
                (isnull) = -1; /* empty, but position changed */ \
1796
2.31M
              }\
1797
95.8M
            }\
1798
572M
            k++;\
1799
538M
          }\
1800
102M
          break;\
1801
102M
        }\
1802
130M
      }\
1803
144M
    }\
1804
211M
  }\
1805
130M
} while(0)
1806
1807
9.67M
#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1808
9.67M
  int level = 0;\
1809
9.67M
  OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1810
172M
  while (1) {\
1811
172M
    k--;\
1812
172M
    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1813
172M
    if (k->type == STK_NULL_CHECK_START) {\
1814
23.6M
      if (k->u.null_check.num == (id)) {\
1815
22.3M
        if (level == 0) {\
1816
9.67M
          if (k->u.null_check.pstr != (s)) {\
1817
4.91M
            (isnull) = 0;\
1818
4.91M
            break;\
1819
4.91M
          }\
1820
9.67M
          else {\
1821
4.76M
            UChar* endp;\
1822
4.76M
            (isnull) = 1;\
1823
18.0M
            while (k < stk) {\
1824
13.9M
              if (k->type == STK_MEM_START) {\
1825
1.70M
                if (k->u.mem.end == INVALID_STACK_INDEX) {\
1826
377k
                  (isnull) = 0; break;\
1827
377k
                }\
1828
1.70M
                if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1829
1.32M
                  endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1830
1.32M
                else\
1831
1.32M
                  endp = (UChar* )k->u.mem.end;\
1832
1.32M
                if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1833
244k
                  (isnull) = 0; break;\
1834
244k
                }\
1835
1.32M
                else if (endp != s) {\
1836
3.18k
                  (isnull) = -1; /* empty, but position changed */ \
1837
3.18k
                }\
1838
1.32M
              }\
1839
13.9M
              k++;\
1840
13.2M
            }\
1841
4.76M
            break;\
1842
4.76M
          }\
1843
9.67M
        }\
1844
22.3M
        else {\
1845
12.6M
          level--;\
1846
12.6M
        }\
1847
22.3M
      }\
1848
23.6M
    }\
1849
172M
    else if (k->type == STK_NULL_CHECK_END) {\
1850
12.6M
      if (k->u.null_check.num == (id)) level++;\
1851
12.6M
    }\
1852
172M
  }\
1853
9.67M
} while(0)
1854
1855
62.0M
#define STACK_GET_REPEAT(id, k) do {\
1856
62.0M
  int level = 0;\
1857
62.0M
  k = stk;\
1858
1.21G
  while (1) {\
1859
1.21G
    k--;\
1860
1.21G
    STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1861
1.21G
    if (k->type == STK_REPEAT) {\
1862
121M
      if (level == 0) {\
1863
121M
        if (k->u.repeat.num == (id)) {\
1864
62.0M
          break;\
1865
62.0M
        }\
1866
121M
      }\
1867
121M
    }\
1868
1.21G
    else if (k->type == STK_CALL_FRAME) level--;\
1869
1.09G
    else if (k->type == STK_RETURN)     level++;\
1870
1.21G
  }\
1871
62.0M
} while(0)
1872
1873
12.0M
#define STACK_RETURN(addr)  do {\
1874
12.0M
  int level = 0;\
1875
12.0M
  OnigStackType* k = stk;\
1876
477M
  while (1) {\
1877
477M
    k--;\
1878
477M
    STACK_BASE_CHECK(k, "STACK_RETURN"); \
1879
477M
    if (k->type == STK_CALL_FRAME) {\
1880
52.1M
      if (level == 0) {\
1881
12.0M
        (addr) = k->u.call_frame.ret_addr;\
1882
12.0M
        break;\
1883
12.0M
      }\
1884
52.1M
      else level--;\
1885
52.1M
    }\
1886
477M
    else if (k->type == STK_RETURN)\
1887
424M
      level++;\
1888
477M
  }\
1889
12.0M
} while(0)
1890
1891
1892
30.4M
#define STRING_CMP(s1,s2,len) do {\
1893
32.8M
  while (len-- > 0) {\
1894
16.9M
    if (*s1++ != *s2++) goto fail;\
1895
16.9M
  }\
1896
30.4M
} while(0)
1897
1898
4.19M
#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1899
4.19M
  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1900
4.19M
    goto fail; \
1901
4.19M
} while(0)
1902
1903
static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1904
                         UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1905
8.86M
{
1906
8.86M
  UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1907
8.86M
  UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1908
8.86M
  UChar *p1, *p2, *end1, *s2;
1909
8.86M
  int len1, len2;
1910
1911
8.86M
  s2   = *ps2;
1912
8.86M
  end1 = s1 + mblen;
1913
12.1M
  while (s1 < end1) {
1914
5.89M
    len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1915
5.89M
    len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1916
5.89M
    if (len1 != len2) return 0;
1917
5.89M
    p1 = buf1;
1918
5.89M
    p2 = buf2;
1919
9.14M
    while (len1-- > 0) {
1920
5.89M
      if (*p1 != *p2) return 0;
1921
3.25M
      p1++;
1922
3.25M
      p2++;
1923
3.25M
    }
1924
5.89M
  }
1925
1926
6.22M
  *ps2 = s2;
1927
6.22M
  return 1;
1928
8.86M
}
1929
1930
1.55M
#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1931
1.55M
  is_fail = 0;\
1932
1.77M
  while (len-- > 0) {\
1933
422k
    if (*s1++ != *s2++) {\
1934
200k
      is_fail = 1; break;\
1935
200k
    }\
1936
422k
  }\
1937
1.55M
} while(0)
1938
1939
4.18M
#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1940
4.18M
  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1941
4.18M
    is_fail = 1; \
1942
4.18M
  else \
1943
4.18M
    is_fail = 0; \
1944
4.18M
} while(0)
1945
1946
1947
#define IS_EMPTY_STR           (str == end)
1948
43.4M
#define ON_STR_BEGIN(s)        ((s) == str)
1949
25.5M
#define ON_STR_END(s)          ((s) == end)
1950
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1951
# define DATA_ENSURE_CHECK1    (s < right_range)
1952
# define DATA_ENSURE_CHECK(n)  (s + (n) <= right_range)
1953
# define DATA_ENSURE(n)        if (s + (n) > right_range) goto fail
1954
# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1955
# define ABSENT_END_POS        right_range
1956
#else
1957
978M
# define DATA_ENSURE_CHECK1    (s < end)
1958
0
# define DATA_ENSURE_CHECK(n)  (s + (n) <= end)
1959
2.77G
# define DATA_ENSURE(n)        if (s + (n) > end) goto fail
1960
5.74M
# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1961
171M
# define ABSENT_END_POS        end
1962
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1963
1964
int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
1965
1966
static inline int
1967
enclen_approx(OnigEncoding enc, const OnigUChar* p, const OnigUChar* e)
1968
1.16G
{
1969
1.16G
    if (enc->max_enc_len == enc->min_enc_len) {
1970
1.14G
        return (p < e ? enc->min_enc_len : 0);
1971
1.14G
    }
1972
23.6M
    else {
1973
23.6M
        return onigenc_mbclen_approximate(p, e, enc);
1974
23.6M
    }
1975
1.16G
}
1976
1977
1978
#ifdef USE_CAPTURE_HISTORY
1979
static int
1980
make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1981
                          OnigStackType* stk_top, UChar* str, regex_t* reg)
1982
{
1983
  int n, r;
1984
  OnigCaptureTreeNode* child;
1985
  OnigStackType* k = *kp;
1986
1987
  while (k < stk_top) {
1988
    if (k->type == STK_MEM_START) {
1989
      n = k->u.mem.num;
1990
      if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1991
          BIT_STATUS_AT(reg->capture_history, n) != 0) {
1992
        child = history_node_new();
1993
        CHECK_NULL_RETURN_MEMERR(child);
1994
        child->group = n;
1995
        child->beg = k->u.mem.pstr - str;
1996
        r = history_tree_add_child(node, child);
1997
        if (r != 0) {
1998
          history_tree_free(child);
1999
          return r;
2000
        }
2001
        *kp = (k + 1);
2002
        r = make_capture_history_tree(child, kp, stk_top, str, reg);
2003
        if (r != 0) return r;
2004
2005
        k = *kp;
2006
        child->end = k->u.mem.pstr - str;
2007
      }
2008
    }
2009
    else if (k->type == STK_MEM_END) {
2010
      if (k->u.mem.num == node->group) {
2011
        node->end = k->u.mem.pstr - str;
2012
        *kp = k;
2013
        return 0;
2014
      }
2015
    }
2016
    k++;
2017
  }
2018
2019
  return 1; /* 1: root node ending. */
2020
}
2021
#endif /* USE_CAPTURE_HISTORY */
2022
2023
#ifdef USE_BACKREF_WITH_LEVEL
2024
static int
2025
mem_is_in_memp(int mem, int num, UChar* memp)
2026
40.3M
{
2027
40.3M
  int i;
2028
40.3M
  MemNumType m;
2029
2030
40.4M
  for (i = 0; i < num; i++) {
2031
40.3M
    GET_MEMNUM_INC(m, memp);
2032
40.3M
    if (mem == (int )m) return 1;
2033
40.3M
  }
2034
66.7k
  return 0;
2035
40.3M
}
2036
2037
static int backref_match_at_nested_level(regex_t* reg,
2038
         OnigStackType* top, OnigStackType* stk_base,
2039
         int ignore_case, int case_fold_flag,
2040
         int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
2041
20.4M
{
2042
20.4M
  UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
2043
20.4M
  int level;
2044
20.4M
  OnigStackType* k;
2045
2046
20.4M
  level = 0;
2047
20.4M
  k = top;
2048
20.4M
  k--;
2049
189M
  while (k >= stk_base) {
2050
189M
    if (k->type == STK_CALL_FRAME) {
2051
11.6M
      level--;
2052
11.6M
    }
2053
177M
    else if (k->type == STK_RETURN) {
2054
1.96M
      level++;
2055
1.96M
    }
2056
175M
    else if (level == nest) {
2057
90.9M
      if (k->type == STK_MEM_START) {
2058
20.3M
        if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2059
20.2M
          pstart = k->u.mem.pstr;
2060
20.2M
          if (pend != NULL_UCHARP) {
2061
20.0M
            if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
2062
20.0M
            p  = pstart;
2063
20.0M
            ss = *s;
2064
2065
20.0M
            if (ignore_case != 0) {
2066
483k
              if (string_cmp_ic(reg->enc, case_fold_flag,
2067
483k
                                pstart, &ss, pend - pstart, send) == 0)
2068
11.0k
                return 0; /* or goto next_mem; */
2069
483k
            }
2070
19.5M
            else {
2071
19.5M
              while (p < pend) {
2072
277k
                if (*p++ != *ss++) return 0; /* or goto next_mem; */
2073
277k
              }
2074
19.5M
            }
2075
2076
19.7M
            *s = ss;
2077
19.7M
            return 1;
2078
20.0M
          }
2079
20.2M
        }
2080
20.3M
      }
2081
70.5M
      else if (k->type == STK_MEM_END) {
2082
20.0M
        if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
2083
20.0M
          pend = k->u.mem.pstr;
2084
20.0M
        }
2085
20.0M
      }
2086
90.9M
    }
2087
169M
    k--;
2088
169M
  }
2089
2090
422k
  return 0;
2091
20.4M
}
2092
#endif /* USE_BACKREF_WITH_LEVEL */
2093
2094
2095
#ifdef ONIG_DEBUG_STATISTICS
2096
2097
# ifdef _WIN32
2098
#  include <windows.h>
2099
static LARGE_INTEGER ts, te, freq;
2100
#  define GETTIME(t)    QueryPerformanceCounter(&(t))
2101
#  define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
2102
                            * 1000000 / freq.QuadPart)
2103
# else /* _WIN32 */
2104
2105
#  define USE_TIMEOFDAY
2106
2107
#  ifdef USE_TIMEOFDAY
2108
#   ifdef HAVE_SYS_TIME_H
2109
#    include <sys/time.h>
2110
#   endif
2111
#   ifdef HAVE_UNISTD_H
2112
#    include <unistd.h>
2113
#   endif
2114
static struct timeval ts, te;
2115
#   define GETTIME(t)      gettimeofday(&(t), (struct timezone* )0)
2116
#   define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
2117
                            (((te).tv_sec - (ts).tv_sec)*1000000))
2118
#  else /* USE_TIMEOFDAY */
2119
#   ifdef HAVE_SYS_TIMES_H
2120
#    include <sys/times.h>
2121
#   endif
2122
static struct tms ts, te;
2123
#   define GETTIME(t)       times(&(t))
2124
#   define TIMEDIFF(te,ts)  ((te).tms_utime - (ts).tms_utime)
2125
#  endif /* USE_TIMEOFDAY */
2126
2127
# endif /* _WIN32 */
2128
2129
static int OpCounter[256];
2130
static int OpPrevCounter[256];
2131
static unsigned long OpTime[256];
2132
static int OpCurr = OP_FINISH;
2133
static int OpPrevTarget = OP_FAIL;
2134
static int MaxStackDepth = 0;
2135
2136
# define MOP_IN(opcode) do {\
2137
  if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
2138
  OpCurr = opcode;\
2139
  OpCounter[opcode]++;\
2140
  GETTIME(ts);\
2141
} while(0)
2142
2143
# define MOP_OUT do {\
2144
  GETTIME(te);\
2145
  OpTime[OpCurr] += TIMEDIFF(te, ts);\
2146
} while(0)
2147
2148
extern void
2149
onig_statistics_init(void)
2150
{
2151
  int i;
2152
  for (i = 0; i < 256; i++) {
2153
    OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2154
  }
2155
  MaxStackDepth = 0;
2156
# ifdef _WIN32
2157
  QueryPerformanceFrequency(&freq);
2158
# endif
2159
}
2160
2161
extern void
2162
onig_print_statistics(FILE* f)
2163
{
2164
  int i;
2165
  fprintf(f, "   count      prev        time\n");
2166
  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2167
    fprintf(f, "%8d: %8d: %10lu: %s\n",
2168
            OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2169
  }
2170
  fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2171
}
2172
2173
# define STACK_INC do {\
2174
  stk++;\
2175
  if (stk - stk_base > MaxStackDepth) \
2176
    MaxStackDepth = stk - stk_base;\
2177
} while(0)
2178
2179
#else /* ONIG_DEBUG_STATISTICS */
2180
3.05G
# define STACK_INC     stk++
2181
2182
# define MOP_IN(opcode)
2183
# define MOP_OUT
2184
#endif /* ONIG_DEBUG_STATISTICS */
2185
2186
2187
#ifdef ONIG_DEBUG_MATCH
2188
static const char *
2189
stack_type_str(int stack_type)
2190
{
2191
  switch (stack_type) {
2192
    case STK_ALT:   return "Alt   ";
2193
    case STK_LOOK_BEHIND_NOT: return "LBNot ";
2194
    case STK_POS_NOT:   return "PosNot";
2195
    case STK_MEM_START:   return "MemS  ";
2196
    case STK_MEM_END:   return "MemE  ";
2197
    case STK_REPEAT_INC:  return "RepInc";
2198
    case STK_STATE_CHECK_MARK:  return "StChMk";
2199
    case STK_NULL_CHECK_START:  return "NulChS";
2200
    case STK_NULL_CHECK_END:  return "NulChE";
2201
    case STK_MEM_END_MARK:  return "MemEMk";
2202
    case STK_POS:   return "Pos   ";
2203
    case STK_STOP_BT:   return "StopBt";
2204
    case STK_REPEAT:    return "Rep   ";
2205
    case STK_CALL_FRAME:  return "Call  ";
2206
    case STK_RETURN:    return "Ret   ";
2207
    case STK_VOID:    return "Void  ";
2208
    case STK_ABSENT_POS:  return "AbsPos";
2209
    case STK_ABSENT:    return "Absent";
2210
    case STK_MATCH_CACHE_POINT: return "MCache";
2211
    default:      return "      ";
2212
  }
2213
}
2214
#endif
2215
#ifdef USE_MATCH_CACHE
2216
2217
static long
2218
bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcodes, const UChar* p)
2219
249M
{
2220
249M
  long l = 0, r = num_cache_opcodes - 1, m = 0;
2221
2222
942M
  while (l <= r) {
2223
799M
    m = (l + r) / 2;
2224
799M
    if (cache_opcodes[m].addr == p) break;
2225
693M
    if (cache_opcodes[m].addr < p) l = m + 1;
2226
363M
    else r = m - 1;
2227
693M
  }
2228
249M
  return m;
2229
249M
}
2230
2231
static long
2232
find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr)
2233
249M
{
2234
249M
  long m;
2235
249M
  const OnigCacheOpcode* cache_opcode;
2236
249M
  const OnigRepeatRange* range;
2237
249M
  const OnigStackType *stkp;
2238
249M
  int count = 0;
2239
249M
  int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
2240
249M
  long cache_point;
2241
249M
  long num_cache_points_at_outer_repeat;
2242
249M
  long num_cache_points_in_outer_repeat;
2243
2244
249M
  m = bsearch_cache_opcodes(cache_opcodes, num_cache_opcodes, p);
2245
2246
249M
  if (!(0 <= m && m < num_cache_opcodes && cache_opcodes[m].addr == p)) {
2247
143M
    return -1;
2248
143M
  }
2249
2250
106M
  cache_opcode = &cache_opcodes[m];
2251
106M
  *cache_opcode_ptr = &cache_opcodes[m];
2252
106M
  cache_point = cache_opcode->cache_point;
2253
106M
  if (cache_opcode->outer_repeat_mem == -1) {
2254
97.2M
    return cache_point;
2255
97.2M
  }
2256
2257
8.87M
  num_cache_points_at_outer_repeat = cache_opcode->num_cache_points_at_outer_repeat;
2258
8.87M
  num_cache_points_in_outer_repeat = cache_opcode->num_cache_points_in_outer_repeat;
2259
2260
8.87M
  range = &reg->repeat_range[cache_opcode->outer_repeat_mem];
2261
2262
8.87M
  stkp = &stk[repeat_stk[cache_opcode->outer_repeat_mem]];
2263
8.87M
  count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
2264
2265
8.87M
  if (count < range->lower) {
2266
2.99M
    return num_cache_points_at_outer_repeat +
2267
2.99M
      num_cache_points_in_outer_repeat * count +
2268
2.99M
      cache_point;
2269
2.99M
  }
2270
2271
5.88M
  if (range->upper == 0x7fffffff) {
2272
1.20M
    return num_cache_points_at_outer_repeat +
2273
1.20M
      num_cache_points_in_outer_repeat * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) +
2274
1.20M
      cache_point;
2275
1.20M
  }
2276
2277
4.68M
  return num_cache_points_at_outer_repeat +
2278
4.68M
    num_cache_points_in_outer_repeat * (range->lower - 1) +
2279
4.68M
    (num_cache_points_in_outer_repeat + 1) * (count - range->lower + 1) +
2280
4.68M
    cache_point;
2281
5.88M
}
2282
2283
static int
2284
check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2285
12.4M
{
2286
12.4M
  if (match_cache_point_mask & 0x80) {
2287
1.02M
    return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0;
2288
1.02M
  }
2289
11.4M
  else {
2290
11.4M
    return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0;
2291
11.4M
  }
2292
12.4M
}
2293
2294
static void
2295
memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask)
2296
27.9M
{
2297
27.9M
  match_cache_buf[match_cache_point_index] |= match_cache_point_mask;
2298
27.9M
  if (match_cache_point_mask & 0x80) {
2299
3.67M
    match_cache_buf[match_cache_point_index + 1] |= 0x01;
2300
3.67M
  }
2301
24.2M
  else {
2302
24.2M
    match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1;
2303
24.2M
  }
2304
27.9M
}
2305
2306
#endif /* USE_MATCH_CACHE */
2307
2308
/* match data(str - end) from position (sstart). */
2309
/* if sstart == str then set sprev to NULL. */
2310
static OnigPosition
2311
match_at(regex_t* reg, const UChar* str, const UChar* end,
2312
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2313
         const UChar* right_range,
2314
#endif
2315
         const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2316
7.67M
{
2317
7.67M
  static const UChar FinishCode[] = { OP_FINISH };
2318
2319
7.67M
  int i, num_mem, pop_level;
2320
7.67M
  ptrdiff_t n, best_len;
2321
7.67M
  LengthType tlen, tlen2;
2322
7.67M
  MemNumType mem;
2323
7.67M
  RelAddrType addr;
2324
7.67M
  OnigOptionType option = reg->options;
2325
7.67M
  OnigEncoding encode = reg->enc;
2326
7.67M
  OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2327
7.67M
  UChar *s, *q, *sbegin;
2328
7.67M
  UChar *p = reg->p;
2329
7.67M
  UChar *pbegin = p;
2330
7.67M
  UChar *pkeep;
2331
7.67M
  char *alloca_base;
2332
7.67M
  char *xmalloc_base = NULL;
2333
7.67M
  OnigStackType *stk_alloc, *stk_base = NULL, *stk, *stk_end;
2334
7.67M
  OnigStackType *stkp; /* used as any purpose. */
2335
7.67M
  OnigStackIndex si;
2336
7.67M
  OnigStackIndex *repeat_stk;
2337
7.67M
  OnigStackIndex *mem_start_stk, *mem_end_stk;
2338
#ifdef USE_COMBINATION_EXPLOSION_CHECK
2339
  int scv;
2340
  unsigned char* state_check_buff = msa->state_check_buff;
2341
  int num_comb_exp_check = reg->num_comb_exp_check;
2342
#endif
2343
2344
7.67M
#if USE_TOKEN_THREADED_VM
2345
7.67M
# define OP_OFFSET  1
2346
7.67M
# define VM_LOOP JUMP;
2347
7.67M
# define VM_LOOP_END
2348
5.92G
# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2349
7.67M
# define DEFAULT L_DEFAULT:
2350
545M
# define NEXT sprev = sbegin; JUMP
2351
5.95G
# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2352
2353
7.67M
  RB_GNUC_EXTENSION static const void *oplabels[] = {
2354
7.67M
    &&L_OP_FINISH,               /* matching process terminator (no more alternative) */
2355
7.67M
    &&L_OP_END,                  /* pattern code terminator (success end) */
2356
2357
7.67M
    &&L_OP_EXACT1,               /* single byte, N = 1 */
2358
7.67M
    &&L_OP_EXACT2,               /* single byte, N = 2 */
2359
7.67M
    &&L_OP_EXACT3,               /* single byte, N = 3 */
2360
7.67M
    &&L_OP_EXACT4,               /* single byte, N = 4 */
2361
7.67M
    &&L_OP_EXACT5,               /* single byte, N = 5 */
2362
7.67M
    &&L_OP_EXACTN,               /* single byte */
2363
7.67M
    &&L_OP_EXACTMB2N1,           /* mb-length = 2 N = 1 */
2364
7.67M
    &&L_OP_EXACTMB2N2,           /* mb-length = 2 N = 2 */
2365
7.67M
    &&L_OP_EXACTMB2N3,           /* mb-length = 2 N = 3 */
2366
7.67M
    &&L_OP_EXACTMB2N,            /* mb-length = 2 */
2367
7.67M
    &&L_OP_EXACTMB3N,            /* mb-length = 3 */
2368
7.67M
    &&L_OP_EXACTMBN,             /* other length */
2369
2370
7.67M
    &&L_OP_EXACT1_IC,            /* single byte, N = 1, ignore case */
2371
7.67M
    &&L_OP_EXACTN_IC,            /* single byte,        ignore case */
2372
2373
7.67M
    &&L_OP_CCLASS,
2374
7.67M
    &&L_OP_CCLASS_MB,
2375
7.67M
    &&L_OP_CCLASS_MIX,
2376
7.67M
    &&L_OP_CCLASS_NOT,
2377
7.67M
    &&L_OP_CCLASS_MB_NOT,
2378
7.67M
    &&L_OP_CCLASS_MIX_NOT,
2379
2380
7.67M
    &&L_OP_ANYCHAR,                 /* "."  */
2381
7.67M
    &&L_OP_ANYCHAR_ML,              /* "."  multi-line */
2382
7.67M
    &&L_OP_ANYCHAR_STAR,            /* ".*" */
2383
7.67M
    &&L_OP_ANYCHAR_ML_STAR,         /* ".*" multi-line */
2384
7.67M
    &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2385
7.67M
    &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2386
2387
7.67M
    &&L_OP_WORD,
2388
7.67M
    &&L_OP_NOT_WORD,
2389
7.67M
    &&L_OP_WORD_BOUND,
2390
7.67M
    &&L_OP_NOT_WORD_BOUND,
2391
7.67M
# ifdef USE_WORD_BEGIN_END
2392
7.67M
    &&L_OP_WORD_BEGIN,
2393
7.67M
    &&L_OP_WORD_END,
2394
# else
2395
    &&L_DEFAULT,
2396
    &&L_DEFAULT,
2397
# endif
2398
7.67M
    &&L_OP_ASCII_WORD,
2399
7.67M
    &&L_OP_NOT_ASCII_WORD,
2400
7.67M
    &&L_OP_ASCII_WORD_BOUND,
2401
7.67M
    &&L_OP_NOT_ASCII_WORD_BOUND,
2402
7.67M
# ifdef USE_WORD_BEGIN_END
2403
7.67M
    &&L_OP_ASCII_WORD_BEGIN,
2404
7.67M
    &&L_OP_ASCII_WORD_END,
2405
# else
2406
    &&L_DEFAULT,
2407
    &&L_DEFAULT,
2408
# endif
2409
2410
7.67M
    &&L_OP_BEGIN_BUF,
2411
7.67M
    &&L_OP_END_BUF,
2412
7.67M
    &&L_OP_BEGIN_LINE,
2413
7.67M
    &&L_OP_END_LINE,
2414
7.67M
    &&L_OP_SEMI_END_BUF,
2415
7.67M
    &&L_OP_BEGIN_POSITION,
2416
2417
7.67M
    &&L_OP_BACKREF1,
2418
7.67M
    &&L_OP_BACKREF2,
2419
7.67M
    &&L_OP_BACKREFN,
2420
7.67M
    &&L_OP_BACKREFN_IC,
2421
7.67M
    &&L_OP_BACKREF_MULTI,
2422
7.67M
    &&L_OP_BACKREF_MULTI_IC,
2423
7.67M
# ifdef USE_BACKREF_WITH_LEVEL
2424
7.67M
    &&L_OP_BACKREF_WITH_LEVEL,   /* \k<xxx+n>, \k<xxx-n> */
2425
# else
2426
    &&L_DEFAULT,
2427
# endif
2428
7.67M
    &&L_OP_MEMORY_START,
2429
7.67M
    &&L_OP_MEMORY_START_PUSH,    /* push back-tracker to stack */
2430
7.67M
    &&L_OP_MEMORY_END_PUSH,      /* push back-tracker to stack */
2431
7.67M
# ifdef USE_SUBEXP_CALL
2432
7.67M
    &&L_OP_MEMORY_END_PUSH_REC,  /* push back-tracker to stack */
2433
# else
2434
    &&L_DEFAULT,
2435
# endif
2436
7.67M
    &&L_OP_MEMORY_END,
2437
7.67M
# ifdef USE_SUBEXP_CALL
2438
7.67M
    &&L_OP_MEMORY_END_REC,       /* push marker to stack */
2439
# else
2440
    &&L_DEFAULT,
2441
# endif
2442
2443
7.67M
    &&L_OP_KEEP,
2444
2445
7.67M
    &&L_OP_FAIL,                 /* pop stack and move */
2446
7.67M
    &&L_OP_JUMP,
2447
7.67M
    &&L_OP_PUSH,
2448
7.67M
    &&L_OP_POP,
2449
# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2450
    &&L_OP_PUSH_OR_JUMP_EXACT1,  /* if match exact then push, else jump. */
2451
# else
2452
7.67M
    &&L_DEFAULT,
2453
7.67M
# endif
2454
7.67M
    &&L_OP_PUSH_IF_PEEK_NEXT,    /* if match exact then push, else none. */
2455
7.67M
    &&L_OP_REPEAT,               /* {n,m} */
2456
7.67M
    &&L_OP_REPEAT_NG,            /* {n,m}? (non greedy) */
2457
7.67M
    &&L_OP_REPEAT_INC,
2458
7.67M
    &&L_OP_REPEAT_INC_NG,        /* non greedy */
2459
7.67M
    &&L_OP_REPEAT_INC_SG,        /* search and get in stack */
2460
7.67M
    &&L_OP_REPEAT_INC_NG_SG,     /* search and get in stack (non greedy) */
2461
7.67M
    &&L_OP_NULL_CHECK_START,     /* null loop checker start */
2462
7.67M
    &&L_OP_NULL_CHECK_END,       /* null loop checker end   */
2463
7.67M
# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2464
7.67M
    &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2465
# else
2466
    &&L_DEFAULT,
2467
# endif
2468
7.67M
# ifdef USE_SUBEXP_CALL
2469
7.67M
    &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2470
# else
2471
    &&L_DEFAULT,
2472
# endif
2473
2474
7.67M
    &&L_OP_PUSH_POS,             /* (?=...)  start */
2475
7.67M
    &&L_OP_POP_POS,              /* (?=...)  end   */
2476
7.67M
    &&L_OP_PUSH_POS_NOT,         /* (?!...)  start */
2477
7.67M
    &&L_OP_FAIL_POS,             /* (?!...)  end   */
2478
7.67M
    &&L_OP_PUSH_STOP_BT,         /* (?>...)  start */
2479
7.67M
    &&L_OP_POP_STOP_BT,          /* (?>...)  end   */
2480
7.67M
    &&L_OP_LOOK_BEHIND,          /* (?<=...) start (no needs end opcode) */
2481
7.67M
    &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2482
7.67M
    &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end   */
2483
7.67M
    &&L_OP_PUSH_ABSENT_POS,      /* (?~...)  start */
2484
7.67M
    &&L_OP_ABSENT,               /* (?~...)  start of inner loop */
2485
7.67M
    &&L_OP_ABSENT_END,           /* (?~...)  end   */
2486
2487
7.67M
# ifdef USE_SUBEXP_CALL
2488
7.67M
    &&L_OP_CALL,                 /* \g<name> */
2489
7.67M
    &&L_OP_RETURN,
2490
# else
2491
    &&L_DEFAULT,
2492
    &&L_DEFAULT,
2493
# endif
2494
7.67M
    &&L_OP_CONDITION,
2495
2496
# ifdef USE_COMBINATION_EXPLOSION_CHECK
2497
    &&L_OP_STATE_CHECK_PUSH,         /* combination explosion check and push */
2498
    &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump  */
2499
    &&L_OP_STATE_CHECK,              /* check only */
2500
# else
2501
7.67M
    &&L_DEFAULT,
2502
7.67M
    &&L_DEFAULT,
2503
7.67M
    &&L_DEFAULT,
2504
7.67M
# endif
2505
# ifdef USE_COMBINATION_EXPLOSION_CHECK
2506
    &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2507
    &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2508
# else
2509
7.67M
    &&L_DEFAULT,
2510
7.67M
    &&L_DEFAULT,
2511
7.67M
# endif
2512
    /* no need: IS_DYNAMIC_OPTION() == 0 */
2513
# if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */
2514
    &&L_OP_SET_OPTION_PUSH,    /* set option and push recover option */
2515
    &&L_OP_SET_OPTION          /* set option */
2516
# else
2517
7.67M
    &&L_DEFAULT,
2518
7.67M
    &&L_DEFAULT
2519
7.67M
# endif
2520
7.67M
  };
2521
#else /* USE_TOKEN_THREADED_VM */
2522
2523
# define OP_OFFSET  0
2524
# define VM_LOOP                                \
2525
  while (1) {                                   \
2526
  OPCODE_EXEC_HOOK;                             \
2527
  pbegin = p;                                   \
2528
  sbegin = s;                                   \
2529
  switch (*p++) {
2530
# define VM_LOOP_END } sprev = sbegin; }
2531
# define CASE(x) case x:
2532
# define DEFAULT default:
2533
# define NEXT break
2534
# define JUMP continue; break
2535
#endif /* USE_TOKEN_THREADED_VM */
2536
2537
2538
7.67M
#ifdef USE_SUBEXP_CALL
2539
/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2540
   etc. Additional space is required. */
2541
15.3M
# define ADD_NUMMEM 1
2542
#else
2543
/* Stack #0 not is used. */
2544
# define ADD_NUMMEM 0
2545
#endif
2546
2547
7.67M
  n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2548
2549
7.67M
  STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2550
7.67M
  pop_level = reg->stack_pop_level;
2551
7.67M
  num_mem = reg->num_mem;
2552
7.67M
  repeat_stk = (OnigStackIndex* )alloca_base;
2553
2554
7.67M
  mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2555
7.67M
  mem_end_stk   = mem_start_stk + (num_mem + ADD_NUMMEM);
2556
7.67M
  {
2557
7.67M
    OnigStackIndex *pp = mem_start_stk;
2558
33.5M
    for (; pp < repeat_stk + n; pp += 2) {
2559
25.8M
      pp[0] = INVALID_STACK_INDEX;
2560
25.8M
      pp[1] = INVALID_STACK_INDEX;
2561
25.8M
    }
2562
7.67M
  }
2563
#ifndef USE_SUBEXP_CALL
2564
  mem_start_stk--; /* for index start from 1,
2565
                      mem_start_stk[1]..mem_start_stk[num_mem] */
2566
  mem_end_stk--;   /* for index start from 1,
2567
                      mem_end_stk[1]..mem_end_stk[num_mem] */
2568
#endif
2569
2570
#ifdef ONIG_DEBUG_MATCH
2571
  fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2572
          (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2573
  fprintf(stderr, "size: %d, start offset: %d\n",
2574
          (int )(end - str), (int )(sstart - str));
2575
  fprintf(stderr, "\n ofs> str                   stk:type   addr:opcode\n");
2576
#endif
2577
2578
7.67M
  STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode);  /* bottom stack */
2579
7.67M
  best_len = ONIG_MISMATCH;
2580
7.67M
  s = (UChar* )sstart;
2581
7.67M
  pkeep = (UChar* )sstart;
2582
2583
2584
#ifdef ONIG_DEBUG_MATCH
2585
# define OPCODE_EXEC_HOOK                                               \
2586
    if (s) {                                                            \
2587
      UChar *op, *q, *bp, buf[50];                                      \
2588
      int len;                                                          \
2589
      op = p - OP_OFFSET;                                               \
2590
      fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2591
      bp = buf;                                                         \
2592
      q = s;                                                            \
2593
      if (*op != OP_FINISH) {    /* s may not be a valid pointer if OP_FINISH. */ \
2594
        for (i = 0; i < 7 && q < end; i++) {                            \
2595
          len = enclen(encode, q, end);                                 \
2596
          while (len-- > 0) *bp++ = *q++;                               \
2597
        }                                                               \
2598
        if (q < end) { xmemcpy(bp, "...", 3); bp += 3; }                \
2599
      }                                                                 \
2600
      xmemcpy(bp, "\"", 1); bp += 1;                                    \
2601
      *bp = 0;                                                          \
2602
      fputs((char* )buf, stderr);                                       \
2603
      for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);         \
2604
      fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":",                    \
2605
          stk - stk_base - 1,                                           \
2606
          (stk > stk_base) ? stack_type_str(stk[-1].type) : "      ",   \
2607
          (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p);           \
2608
      onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2609
      fprintf(stderr, "\n");                                            \
2610
    }
2611
#else
2612
4.55G
# define OPCODE_EXEC_HOOK ((void) 0)
2613
7.67M
#endif
2614
2615
7.67M
#ifdef USE_MATCH_CACHE
2616
#ifdef ONIG_DEBUG_MATCH_CACHE
2617
#define MATCH_CACHE_DEBUG fprintf(stderr, "MATCH CACHE: cache %ld (p=%p index=%ld mask=%d)\n", match_cache_point, pbegin, match_cache_point_index, match_cache_point_mask)
2618
#define MATCH_CACHE_DEBUG_HIT fprintf(stderr, "MATCH CACHE: cache hit\n")
2619
#else
2620
106M
#define MATCH_CACHE_DEBUG ((void) 0)
2621
23.3M
#define MATCH_CACHE_DEBUG_HIT ((void) 0)
2622
7.67M
#endif
2623
2624
23.3M
#define MATCH_CACHE_HIT ((void) 0)
2625
2626
2.50G
#  define CHECK_MATCH_CACHE do {\
2627
2.50G
  if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\
2628
249M
    const OnigCacheOpcode *cache_opcode;\
2629
249M
    long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\
2630
249M
    if (cache_point >= 0) {\
2631
106M
      long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\
2632
106M
      long match_cache_point_index = match_cache_point >> 3;\
2633
106M
      uint8_t match_cache_point_mask = 1 << (match_cache_point & 7);\
2634
106M
      MATCH_CACHE_DEBUG;\
2635
106M
      if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\
2636
23.3M
        MATCH_CACHE_DEBUG_HIT; MATCH_CACHE_HIT;\
2637
23.3M
        if (cache_opcode->lookaround_nesting == 0) goto fail;\
2638
23.3M
        else if (cache_opcode->lookaround_nesting < 0) {\
2639
3.62M
          if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2640
1.73M
            STACK_STOP_BT_FAIL;\
2641
1.73M
            goto fail;\
2642
1.73M
          }\
2643
3.62M
          else goto fail;\
2644
3.62M
        }\
2645
12.4M
        else {\
2646
8.87M
          if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\
2647
2.34M
            p = cache_opcode->match_addr;\
2648
2.34M
            MOP_OUT;\
2649
2.34M
            JUMP;\
2650
2.34M
          }\
2651
8.87M
          else goto fail;\
2652
8.87M
        }\
2653
23.3M
      }\
2654
106M
      STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\
2655
85.0M
    }\
2656
249M
  }\
2657
2.50G
} while (0)
2658
#else
2659
#  define CHECK_MATCH_CACHE ((void) 0)
2660
#endif
2661
2662
7.67M
  VM_LOOP {
2663
7.67M
    CASE(OP_END)  MOP_IN(OP_END);
2664
1.16M
      n = s - sstart;
2665
1.16M
      if (n > best_len) {
2666
1.16M
        OnigRegion* region;
2667
1.16M
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2668
1.16M
        if (IS_FIND_LONGEST(option)) {
2669
0
          if (n > msa->best_len) {
2670
0
            msa->best_len = n;
2671
0
            msa->best_s   = (UChar* )sstart;
2672
0
          }
2673
0
          else
2674
0
            goto end_best_len;
2675
0
        }
2676
1.16M
#endif
2677
1.16M
        best_len = n;
2678
1.16M
        region = msa->region;
2679
1.16M
        if (region) {
2680
1.16M
          region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2681
1.16M
          region->end[0] = s - str;
2682
11.8M
          for (i = 1; i <= num_mem; i++) {
2683
10.6M
            if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2684
1.02M
              if (BIT_STATUS_AT(reg->bt_mem_start, i))
2685
423k
                region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2686
597k
              else
2687
597k
                region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2688
2689
1.02M
              region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2690
1.02M
                                ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2691
1.02M
                                : (UChar* )((void* )mem_end_stk[i])) - str;
2692
1.02M
            }
2693
9.65M
            else {
2694
9.65M
              region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2695
9.65M
            }
2696
10.6M
          }
2697
2698
#ifdef USE_CAPTURE_HISTORY
2699
          if (reg->capture_history != 0) {
2700
            int r;
2701
            OnigCaptureTreeNode* node;
2702
2703
            if (IS_NULL(region->history_root)) {
2704
              region->history_root = node = history_node_new();
2705
              CHECK_NULL_RETURN_MEMERR(node);
2706
            }
2707
            else {
2708
              node = region->history_root;
2709
              history_tree_clear(node);
2710
            }
2711
2712
            node->group = 0;
2713
            node->beg   = ((pkeep > s) ? s : pkeep) - str;
2714
            node->end   = s - str;
2715
2716
            stkp = stk_base;
2717
            r = make_capture_history_tree(region->history_root, &stkp,
2718
                stk, (UChar* )str, reg);
2719
            if (r < 0) {
2720
              best_len = r; /* error code */
2721
              goto finish;
2722
            }
2723
          }
2724
#endif /* USE_CAPTURE_HISTORY */
2725
1.16M
        } /* if (region) */
2726
1.16M
      } /* n > best_len */
2727
2728
1.16M
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2729
1.16M
    end_best_len:
2730
1.16M
#endif
2731
1.16M
      MOP_OUT;
2732
2733
1.16M
      if (IS_FIND_CONDITION(option)) {
2734
0
        if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2735
0
          best_len = ONIG_MISMATCH;
2736
0
          goto fail; /* for retry */
2737
0
        }
2738
0
        if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2739
0
          goto fail; /* for retry */
2740
0
        }
2741
0
      }
2742
2743
      /* default behavior: return first-matching result. */
2744
1.16M
      goto finish;
2745
2746
412M
    CASE(OP_EXACT1)  MOP_IN(OP_EXACT1);
2747
412M
      DATA_ENSURE(1);
2748
408M
      if (*p != *s) goto fail;
2749
29.4M
      p++; s++;
2750
29.4M
      MOP_OUT;
2751
29.4M
      NEXT;
2752
2753
36.5M
    CASE(OP_EXACT1_IC)  MOP_IN(OP_EXACT1_IC);
2754
36.5M
      {
2755
36.5M
        int len;
2756
36.5M
        UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2757
2758
36.5M
        DATA_ENSURE(1);
2759
34.4M
        len = ONIGENC_MBC_CASE_FOLD(encode,
2760
                    /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2761
34.4M
                    case_fold_flag,
2762
34.4M
                    &s, end, lowbuf);
2763
34.4M
        DATA_ENSURE(0);
2764
34.4M
        q = lowbuf;
2765
35.5M
        while (len-- > 0) {
2766
34.4M
          if (*p != *q) {
2767
33.4M
            goto fail;
2768
33.4M
          }
2769
1.05M
          p++; q++;
2770
1.05M
        }
2771
34.4M
      }
2772
1.05M
      MOP_OUT;
2773
1.05M
      NEXT;
2774
2775
210M
    CASE(OP_EXACT2)  MOP_IN(OP_EXACT2);
2776
210M
      DATA_ENSURE(2);
2777
201M
      if (*p != *s) goto fail;
2778
18.5M
      p++; s++;
2779
18.5M
      if (*p != *s) goto fail;
2780
224k
      sprev = s;
2781
224k
      p++; s++;
2782
224k
      MOP_OUT;
2783
224k
      JUMP;
2784
2785
45.4M
    CASE(OP_EXACT3)  MOP_IN(OP_EXACT3);
2786
45.4M
      DATA_ENSURE(3);
2787
44.6M
      if (*p != *s) goto fail;
2788
852k
      p++; s++;
2789
852k
      if (*p != *s) goto fail;
2790
590k
      p++; s++;
2791
590k
      if (*p != *s) goto fail;
2792
361k
      sprev = s;
2793
361k
      p++; s++;
2794
361k
      MOP_OUT;
2795
361k
      JUMP;
2796
2797
29.4M
    CASE(OP_EXACT4)  MOP_IN(OP_EXACT4);
2798
29.4M
      DATA_ENSURE(4);
2799
28.3M
      if (*p != *s) goto fail;
2800
1.69M
      p++; s++;
2801
1.69M
      if (*p != *s) goto fail;
2802
300k
      p++; s++;
2803
300k
      if (*p != *s) goto fail;
2804
198k
      p++; s++;
2805
198k
      if (*p != *s) goto fail;
2806
143k
      sprev = s;
2807
143k
      p++; s++;
2808
143k
      MOP_OUT;
2809
143k
      JUMP;
2810
2811
44.5M
    CASE(OP_EXACT5)  MOP_IN(OP_EXACT5);
2812
44.5M
      DATA_ENSURE(5);
2813
44.0M
      if (*p != *s) goto fail;
2814
835k
      p++; s++;
2815
835k
      if (*p != *s) goto fail;
2816
120k
      p++; s++;
2817
120k
      if (*p != *s) goto fail;
2818
79.4k
      p++; s++;
2819
79.4k
      if (*p != *s) goto fail;
2820
47.8k
      p++; s++;
2821
47.8k
      if (*p != *s) goto fail;
2822
45.0k
      sprev = s;
2823
45.0k
      p++; s++;
2824
45.0k
      MOP_OUT;
2825
45.0k
      JUMP;
2826
2827
238M
    CASE(OP_EXACTN)  MOP_IN(OP_EXACTN);
2828
238M
      GET_LENGTH_INC(tlen, p);
2829
238M
      DATA_ENSURE(tlen);
2830
268M
      while (tlen-- > 0) {
2831
268M
        if (*p++ != *s++) goto fail;
2832
268M
      }
2833
260k
      sprev = s - 1;
2834
260k
      MOP_OUT;
2835
260k
      JUMP;
2836
2837
72.3M
    CASE(OP_EXACTN_IC)  MOP_IN(OP_EXACTN_IC);
2838
72.3M
      {
2839
72.3M
        int len;
2840
72.3M
        UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2841
2842
72.3M
        GET_LENGTH_INC(tlen, p);
2843
72.3M
        endp = p + tlen;
2844
2845
76.3M
        while (p < endp) {
2846
76.1M
          sprev = s;
2847
76.1M
          DATA_ENSURE(1);
2848
74.0M
          len = ONIGENC_MBC_CASE_FOLD(encode,
2849
                      /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2850
74.0M
                      case_fold_flag,
2851
74.0M
                      &s, end, lowbuf);
2852
74.0M
          DATA_ENSURE(0);
2853
74.0M
          q = lowbuf;
2854
78.0M
          while (len-- > 0) {
2855
74.0M
            if (*p != *q) goto fail;
2856
4.00M
            p++; q++;
2857
4.00M
          }
2858
74.0M
        }
2859
72.3M
      }
2860
2861
112k
      MOP_OUT;
2862
112k
      JUMP;
2863
2864
112k
    CASE(OP_EXACTMB2N1)  MOP_IN(OP_EXACTMB2N1);
2865
106k
      DATA_ENSURE(2);
2866
106k
      if (*p != *s) goto fail;
2867
0
      p++; s++;
2868
0
      if (*p != *s) goto fail;
2869
0
      p++; s++;
2870
0
      MOP_OUT;
2871
0
      NEXT;
2872
2873
13.9k
    CASE(OP_EXACTMB2N2)  MOP_IN(OP_EXACTMB2N2);
2874
13.9k
      DATA_ENSURE(4);
2875
13.6k
      if (*p != *s) goto fail;
2876
0
      p++; s++;
2877
0
      if (*p != *s) goto fail;
2878
0
      p++; s++;
2879
0
      sprev = s;
2880
0
      if (*p != *s) goto fail;
2881
0
      p++; s++;
2882
0
      if (*p != *s) goto fail;
2883
0
      p++; s++;
2884
0
      MOP_OUT;
2885
0
      JUMP;
2886
2887
105k
    CASE(OP_EXACTMB2N3)  MOP_IN(OP_EXACTMB2N3);
2888
105k
      DATA_ENSURE(6);
2889
105k
      if (*p != *s) goto fail;
2890
0
      p++; s++;
2891
0
      if (*p != *s) goto fail;
2892
0
      p++; s++;
2893
0
      if (*p != *s) goto fail;
2894
0
      p++; s++;
2895
0
      if (*p != *s) goto fail;
2896
0
      p++; s++;
2897
0
      sprev = s;
2898
0
      if (*p != *s) goto fail;
2899
0
      p++; s++;
2900
0
      if (*p != *s) goto fail;
2901
0
      p++; s++;
2902
0
      MOP_OUT;
2903
0
      JUMP;
2904
2905
16.9k
    CASE(OP_EXACTMB2N)  MOP_IN(OP_EXACTMB2N);
2906
16.9k
      GET_LENGTH_INC(tlen, p);
2907
16.9k
      DATA_ENSURE(tlen * 2);
2908
16.5k
      while (tlen-- > 0) {
2909
16.5k
        if (*p != *s) goto fail;
2910
0
        p++; s++;
2911
0
        if (*p != *s) goto fail;
2912
0
        p++; s++;
2913
0
      }
2914
0
      sprev = s - 2;
2915
0
      MOP_OUT;
2916
0
      JUMP;
2917
2918
65.7k
    CASE(OP_EXACTMB3N)  MOP_IN(OP_EXACTMB3N);
2919
65.7k
      GET_LENGTH_INC(tlen, p);
2920
65.7k
      DATA_ENSURE(tlen * 3);
2921
65.1k
      while (tlen-- > 0) {
2922
65.1k
        if (*p != *s) goto fail;
2923
0
        p++; s++;
2924
0
        if (*p != *s) goto fail;
2925
0
        p++; s++;
2926
0
        if (*p != *s) goto fail;
2927
0
        p++; s++;
2928
0
      }
2929
0
      sprev = s - 3;
2930
0
      MOP_OUT;
2931
0
      JUMP;
2932
2933
32.9k
    CASE(OP_EXACTMBN)  MOP_IN(OP_EXACTMBN);
2934
32.9k
      GET_LENGTH_INC(tlen,  p);  /* mb-len */
2935
32.9k
      GET_LENGTH_INC(tlen2, p);  /* string len */
2936
32.9k
      tlen2 *= tlen;
2937
32.9k
      DATA_ENSURE(tlen2);
2938
32.8k
      while (tlen2-- > 0) {
2939
32.8k
        if (*p != *s) goto fail;
2940
0
        p++; s++;
2941
0
      }
2942
0
      sprev = s - tlen;
2943
0
      MOP_OUT;
2944
0
      JUMP;
2945
2946
10.0M
    CASE(OP_CCLASS)  MOP_IN(OP_CCLASS);
2947
10.0M
      DATA_ENSURE(1);
2948
9.65M
      if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2949
593k
      p += SIZE_BITSET;
2950
593k
      s += enclen(encode, s, end);   /* OP_CCLASS can match mb-code. \D, \S */
2951
593k
      MOP_OUT;
2952
593k
      NEXT;
2953
2954
2.14M
    CASE(OP_CCLASS_MB)  MOP_IN(OP_CCLASS_MB);
2955
2.14M
      if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2956
2957
126k
    cclass_mb:
2958
126k
      GET_LENGTH_INC(tlen, p);
2959
126k
      {
2960
126k
        OnigCodePoint code;
2961
126k
        UChar *ss;
2962
126k
        int mb_len;
2963
2964
126k
        DATA_ENSURE(1);
2965
0
        mb_len = enclen_approx(encode, s, end);
2966
0
        DATA_ENSURE(mb_len);
2967
0
        ss = s;
2968
0
        s += mb_len;
2969
0
        code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2970
2971
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2972
0
        if (! onig_is_in_code_range(p, code)) goto fail;
2973
#else
2974
        q = p;
2975
        ALIGNMENT_RIGHT(q);
2976
        if (! onig_is_in_code_range(q, code)) goto fail;
2977
#endif
2978
0
      }
2979
0
      p += tlen;
2980
0
      MOP_OUT;
2981
0
      NEXT;
2982
2983
431k
    CASE(OP_CCLASS_MIX)  MOP_IN(OP_CCLASS_MIX);
2984
431k
      DATA_ENSURE(1);
2985
406k
      if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2986
0
        p += SIZE_BITSET;
2987
0
        goto cclass_mb;
2988
0
      }
2989
406k
      else {
2990
406k
        if (BITSET_AT(((BitSetRef )p), *s) == 0)
2991
179k
          goto fail;
2992
2993
226k
        p += SIZE_BITSET;
2994
226k
        GET_LENGTH_INC(tlen, p);
2995
226k
        p += tlen;
2996
226k
        s++;
2997
226k
      }
2998
226k
      MOP_OUT;
2999
226k
      NEXT;
3000
3001
86.6M
    CASE(OP_CCLASS_NOT)  MOP_IN(OP_CCLASS_NOT);
3002
86.6M
      DATA_ENSURE(1);
3003
64.2M
      if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
3004
57.8M
      p += SIZE_BITSET;
3005
57.8M
      s += enclen(encode, s, end);
3006
57.8M
      MOP_OUT;
3007
57.8M
      NEXT;
3008
3009
57.8M
    CASE(OP_CCLASS_MB_NOT)  MOP_IN(OP_CCLASS_MB_NOT);
3010
1.34M
      DATA_ENSURE(1);
3011
1.33M
      if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3012
1.33M
        s++;
3013
1.33M
        GET_LENGTH_INC(tlen, p);
3014
1.33M
        p += tlen;
3015
1.33M
        goto cc_mb_not_success;
3016
1.33M
      }
3017
3018
0
    cclass_mb_not:
3019
0
      GET_LENGTH_INC(tlen, p);
3020
0
      {
3021
0
        OnigCodePoint code;
3022
0
        UChar *ss;
3023
0
        int mb_len = enclen(encode, s, end);
3024
3025
0
        if (! DATA_ENSURE_CHECK(mb_len)) {
3026
0
          DATA_ENSURE(1);
3027
0
          s = (UChar* )end;
3028
0
          p += tlen;
3029
0
          goto cc_mb_not_success;
3030
0
        }
3031
3032
0
        ss = s;
3033
0
        s += mb_len;
3034
0
        code = ONIGENC_MBC_TO_CODE(encode, ss, s);
3035
3036
0
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
3037
0
        if (onig_is_in_code_range(p, code)) goto fail;
3038
#else
3039
        q = p;
3040
        ALIGNMENT_RIGHT(q);
3041
        if (onig_is_in_code_range(q, code)) goto fail;
3042
#endif
3043
0
      }
3044
0
      p += tlen;
3045
3046
1.33M
    cc_mb_not_success:
3047
1.33M
      MOP_OUT;
3048
1.33M
      NEXT;
3049
3050
486M
    CASE(OP_CCLASS_MIX_NOT)  MOP_IN(OP_CCLASS_MIX_NOT);
3051
486M
      DATA_ENSURE(1);
3052
243M
      if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
3053
0
        p += SIZE_BITSET;
3054
0
        goto cclass_mb_not;
3055
0
      }
3056
243M
      else {
3057
243M
        if (BITSET_AT(((BitSetRef )p), *s) != 0)
3058
51.7k
          goto fail;
3059
3060
243M
        p += SIZE_BITSET;
3061
243M
        GET_LENGTH_INC(tlen, p);
3062
243M
        p += tlen;
3063
243M
        s++;
3064
243M
      }
3065
243M
      MOP_OUT;
3066
243M
      NEXT;
3067
3068
243M
    CASE(OP_ANYCHAR)  MOP_IN(OP_ANYCHAR);
3069
57.6M
      DATA_ENSURE(1);
3070
57.4M
      n = enclen_approx(encode, s, end);
3071
57.4M
      DATA_ENSURE(n);
3072
57.4M
      if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
3073
52.9M
      s += n;
3074
52.9M
      MOP_OUT;
3075
52.9M
      NEXT;
3076
3077
136M
    CASE(OP_ANYCHAR_ML)  MOP_IN(OP_ANYCHAR_ML);
3078
136M
      DATA_ENSURE(1);
3079
130M
      n = enclen_approx(encode, s, end);
3080
130M
      DATA_ENSURE(n);
3081
130M
      s += n;
3082
130M
      MOP_OUT;
3083
130M
      NEXT;
3084
3085
130M
    CASE(OP_ANYCHAR_STAR)  MOP_IN(OP_ANYCHAR_STAR);
3086
300M
      while (DATA_ENSURE_CHECK1) {
3087
300M
        CHECK_MATCH_CACHE;
3088
295M
        STACK_PUSH_ALT(p, s, sprev, pkeep);
3089
295M
        n = enclen_approx(encode, s, end);
3090
295M
        DATA_ENSURE(n);
3091
295M
        if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))  goto fail;
3092
290M
        sprev = s;
3093
290M
        s += n;
3094
290M
      }
3095
867k
      MOP_OUT;
3096
867k
      JUMP;
3097
3098
2.28M
    CASE(OP_ANYCHAR_ML_STAR)  MOP_IN(OP_ANYCHAR_ML_STAR);
3099
147M
      while (DATA_ENSURE_CHECK1) {
3100
146M
        CHECK_MATCH_CACHE;
3101
145M
        STACK_PUSH_ALT(p, s, sprev, pkeep);
3102
145M
        n = enclen_approx(encode, s, end);
3103
145M
        if (n > 1) {
3104
0
          DATA_ENSURE(n);
3105
0
          sprev = s;
3106
0
          s += n;
3107
0
        }
3108
145M
        else {
3109
145M
          sprev = s;
3110
145M
          s++;
3111
145M
        }
3112
145M
      }
3113
573k
      MOP_OUT;
3114
573k
      JUMP;
3115
3116
8.77M
    CASE(OP_ANYCHAR_STAR_PEEK_NEXT)  MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
3117
265M
      while (DATA_ENSURE_CHECK1) {
3118
261M
        CHECK_MATCH_CACHE;
3119
257M
        if (*p == *s) {
3120
8.90M
          STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3121
248M
        } else {
3122
248M
#ifdef USE_MATCH_CACHE
3123
          /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3124
          /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
3125
248M
          msa->num_fails++;
3126
248M
#endif
3127
248M
        }
3128
257M
        n = enclen_approx(encode, s, end);
3129
257M
        DATA_ENSURE(n);
3130
257M
        if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))  goto fail;
3131
256M
        sprev = s;
3132
256M
        s += n;
3133
256M
      }
3134
4.41M
      p++;
3135
4.41M
      MOP_OUT;
3136
4.41M
      NEXT;
3137
3138
5.33M
    CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
3139
264M
      while (DATA_ENSURE_CHECK1) {
3140
262M
        CHECK_MATCH_CACHE;
3141
258M
        if (*p == *s) {
3142
2.60M
          STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
3143
256M
        } else {
3144
256M
#ifdef USE_MATCH_CACHE
3145
          /* We need to increment num_fails here, for invoking a cache optimization correctly. */
3146
          /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
3147
256M
          msa->num_fails++;
3148
256M
#endif
3149
256M
        }
3150
258M
        n = enclen_approx(encode, s, end);
3151
258M
        if (n > 1) {
3152
0
          DATA_ENSURE(n);
3153
0
          sprev = s;
3154
0
          s += n;
3155
0
        }
3156
258M
        else {
3157
258M
          sprev = s;
3158
258M
          s++;
3159
258M
        }
3160
258M
      }
3161
1.70M
      p++;
3162
1.70M
      MOP_OUT;
3163
1.70M
      NEXT;
3164
3165
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3166
    CASE(OP_STATE_CHECK_ANYCHAR_STAR)  MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
3167
      GET_STATE_CHECK_NUM_INC(mem, p);
3168
      while (DATA_ENSURE_CHECK1) {
3169
        STATE_CHECK_VAL(scv, mem);
3170
        if (scv) goto fail;
3171
3172
        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3173
        n = enclen_approx(encode, s, end);
3174
        DATA_ENSURE(n);
3175
        if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0))  goto fail;
3176
        sprev = s;
3177
        s += n;
3178
      }
3179
      MOP_OUT;
3180
      NEXT;
3181
3182
    CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
3183
      MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
3184
3185
      GET_STATE_CHECK_NUM_INC(mem, p);
3186
      while (DATA_ENSURE_CHECK1) {
3187
        STATE_CHECK_VAL(scv, mem);
3188
        if (scv) goto fail;
3189
3190
        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
3191
        n = enclen_approx(encode, s, end);
3192
        if (n > 1) {
3193
          DATA_ENSURE(n);
3194
          sprev = s;
3195
          s += n;
3196
        }
3197
        else {
3198
          sprev = s;
3199
          s++;
3200
        }
3201
      }
3202
      MOP_OUT;
3203
      NEXT;
3204
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3205
3206
4.10M
    CASE(OP_WORD)  MOP_IN(OP_WORD);
3207
4.10M
      DATA_ENSURE(1);
3208
4.10M
      if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3209
101k
        goto fail;
3210
3211
4.00M
      s += enclen(encode, s, end);
3212
4.00M
      MOP_OUT;
3213
4.00M
      NEXT;
3214
3215
13.1M
    CASE(OP_ASCII_WORD)  MOP_IN(OP_ASCII_WORD);
3216
13.1M
      DATA_ENSURE(1);
3217
13.1M
      if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3218
809k
        goto fail;
3219
3220
12.3M
      s += enclen(encode, s, end);
3221
12.3M
      MOP_OUT;
3222
12.3M
      NEXT;
3223
3224
12.3M
    CASE(OP_NOT_WORD)  MOP_IN(OP_NOT_WORD);
3225
39.7k
      DATA_ENSURE(1);
3226
39.6k
      if (ONIGENC_IS_MBC_WORD(encode, s, end))
3227
3.58k
        goto fail;
3228
3229
36.0k
      s += enclen(encode, s, end);
3230
36.0k
      MOP_OUT;
3231
36.0k
      NEXT;
3232
3233
2.94M
    CASE(OP_NOT_ASCII_WORD)  MOP_IN(OP_NOT_ASCII_WORD);
3234
2.94M
      DATA_ENSURE(1);
3235
2.93M
      if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3236
512k
        goto fail;
3237
3238
2.42M
      s += enclen(encode, s, end);
3239
2.42M
      MOP_OUT;
3240
2.42M
      NEXT;
3241
3242
3.73M
    CASE(OP_WORD_BOUND)  MOP_IN(OP_WORD_BOUND);
3243
3.73M
      if (ON_STR_BEGIN(s)) {
3244
4.82k
        DATA_ENSURE(1);
3245
2.78k
        if (! ONIGENC_IS_MBC_WORD(encode, s, end))
3246
1.45k
          goto fail;
3247
2.78k
      }
3248
3.72M
      else if (ON_STR_END(s)) {
3249
97.6k
        if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
3250
94.0k
          goto fail;
3251
97.6k
      }
3252
3.62M
      else {
3253
3.62M
        if (ONIGENC_IS_MBC_WORD(encode, s, end)
3254
3.62M
            == ONIGENC_IS_MBC_WORD(encode, sprev, end))
3255
1.21M
          goto fail;
3256
3.62M
      }
3257
2.42M
      MOP_OUT;
3258
2.42M
      JUMP;
3259
3260
8.96M
    CASE(OP_ASCII_WORD_BOUND)  MOP_IN(OP_ASCII_WORD_BOUND);
3261
8.96M
      if (ON_STR_BEGIN(s)) {
3262
4.88k
        DATA_ENSURE(1);
3263
3.84k
        if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3264
2.23k
          goto fail;
3265
3.84k
      }
3266
8.95M
      else if (ON_STR_END(s)) {
3267
3.17k
        if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3268
2.19k
          goto fail;
3269
3.17k
      }
3270
8.95M
      else {
3271
8.95M
        if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3272
8.95M
            == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3273
1.86M
          goto fail;
3274
8.95M
      }
3275
7.09M
      MOP_OUT;
3276
7.09M
      JUMP;
3277
3278
7.09M
    CASE(OP_NOT_WORD_BOUND)  MOP_IN(OP_NOT_WORD_BOUND);
3279
4.03M
      if (ON_STR_BEGIN(s)) {
3280
9.91k
        if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3281
1.00k
          goto fail;
3282
9.91k
      }
3283
4.02M
      else if (ON_STR_END(s)) {
3284
6.47k
        if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3285
4.30k
          goto fail;
3286
6.47k
      }
3287
4.02M
      else {
3288
4.02M
        if (ONIGENC_IS_MBC_WORD(encode, s, end)
3289
4.02M
            != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3290
693k
          goto fail;
3291
4.02M
      }
3292
3.34M
      MOP_OUT;
3293
3.34M
      JUMP;
3294
3295
7.77M
    CASE(OP_NOT_ASCII_WORD_BOUND)  MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3296
7.77M
      if (ON_STR_BEGIN(s)) {
3297
11.3k
        if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3298
1.20k
          goto fail;
3299
11.3k
      }
3300
7.76M
      else if (ON_STR_END(s)) {
3301
550k
        if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3302
539k
          goto fail;
3303
550k
      }
3304
7.21M
      else {
3305
7.21M
        if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3306
7.21M
            != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3307
1.52M
          goto fail;
3308
7.21M
      }
3309
5.71M
      MOP_OUT;
3310
5.71M
      JUMP;
3311
3312
5.71M
#ifdef USE_WORD_BEGIN_END
3313
5.71M
    CASE(OP_WORD_BEGIN)  MOP_IN(OP_WORD_BEGIN);
3314
0
      if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3315
0
        if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3316
0
          MOP_OUT;
3317
0
          JUMP;
3318
0
  }
3319
0
      }
3320
0
      goto fail;
3321
3322
0
    CASE(OP_ASCII_WORD_BEGIN)  MOP_IN(OP_ASCII_WORD_BEGIN);
3323
0
      if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3324
0
        if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3325
0
          MOP_OUT;
3326
0
          JUMP;
3327
0
  }
3328
0
      }
3329
0
      goto fail;
3330
3331
0
    CASE(OP_WORD_END)  MOP_IN(OP_WORD_END);
3332
0
      if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3333
0
        if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3334
0
          MOP_OUT;
3335
0
          JUMP;
3336
0
  }
3337
0
      }
3338
0
      goto fail;
3339
3340
0
    CASE(OP_ASCII_WORD_END)  MOP_IN(OP_ASCII_WORD_END);
3341
0
      if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3342
0
        if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3343
0
          MOP_OUT;
3344
0
          JUMP;
3345
0
  }
3346
0
      }
3347
0
      goto fail;
3348
0
#endif
3349
3350
7.57M
    CASE(OP_BEGIN_BUF)  MOP_IN(OP_BEGIN_BUF);
3351
7.57M
      if (! ON_STR_BEGIN(s)) goto fail;
3352
2.35k
      if (IS_NOTBOS(msa->options)) goto fail;
3353
3354
2.35k
      MOP_OUT;
3355
2.35k
      JUMP;
3356
3357
74.3k
    CASE(OP_END_BUF)  MOP_IN(OP_END_BUF);
3358
74.3k
      if (! ON_STR_END(s)) goto fail;
3359
369
      if (IS_NOTEOS(msa->options)) goto fail;
3360
3361
369
      MOP_OUT;
3362
369
      JUMP;
3363
3364
11.2M
    CASE(OP_BEGIN_LINE)  MOP_IN(OP_BEGIN_LINE);
3365
11.2M
      if (ON_STR_BEGIN(s)) {
3366
2.65k
        if (IS_NOTBOL(msa->options)) goto fail;
3367
2.65k
        MOP_OUT;
3368
2.65k
        JUMP;
3369
2.65k
      }
3370
11.2M
      else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3371
35.3k
#ifdef USE_CRNL_AS_LINE_TERMINATOR
3372
35.3k
                && !(IS_NEWLINE_CRLF(option)
3373
0
                     && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3374
35.3k
#endif
3375
35.3k
                && !ON_STR_END(s)) {
3376
35.2k
        MOP_OUT;
3377
35.2k
  JUMP;
3378
35.2k
      }
3379
11.2M
      goto fail;
3380
3381
11.2M
    CASE(OP_END_LINE)  MOP_IN(OP_END_LINE);
3382
427k
      if (ON_STR_END(s)) {
3383
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3384
        if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3385
#endif
3386
5.09k
          if (IS_NOTEOL(msa->options)) goto fail;
3387
5.09k
          MOP_OUT;
3388
5.09k
          JUMP;
3389
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3390
        }
3391
#endif
3392
5.09k
      }
3393
422k
      else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3394
247
        MOP_OUT;
3395
247
  JUMP;
3396
247
      }
3397
427k
      goto fail;
3398
3399
427k
    CASE(OP_SEMI_END_BUF)  MOP_IN(OP_SEMI_END_BUF);
3400
390k
      if (ON_STR_END(s)) {
3401
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3402
        if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3403
#endif
3404
3.00k
          if (IS_NOTEOL(msa->options)) goto fail;
3405
3.00k
          MOP_OUT;
3406
3.00k
          JUMP;
3407
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3408
        }
3409
#endif
3410
3.00k
      }
3411
387k
      else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3412
125k
        UChar* ss = s + enclen(encode, s, end);
3413
125k
        if (ON_STR_END(ss)) {
3414
294
          MOP_OUT;
3415
294
          JUMP;
3416
294
        }
3417
125k
#ifdef USE_CRNL_AS_LINE_TERMINATOR
3418
125k
        else if (IS_NEWLINE_CRLF(option)
3419
0
            && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3420
0
          ss += enclen(encode, ss, end);
3421
0
          if (ON_STR_END(ss)) {
3422
0
            MOP_OUT;
3423
0
            JUMP;
3424
0
          }
3425
0
        }
3426
125k
#endif
3427
125k
      }
3428
390k
      goto fail;
3429
3430
390k
    CASE(OP_BEGIN_POSITION)  MOP_IN(OP_BEGIN_POSITION);
3431
346k
      if (s != msa->gpos)
3432
313k
        goto fail;
3433
3434
32.8k
      MOP_OUT;
3435
32.8k
      JUMP;
3436
3437
151M
    CASE(OP_MEMORY_START_PUSH)  MOP_IN(OP_MEMORY_START_PUSH);
3438
151M
      GET_MEMNUM_INC(mem, p);
3439
151M
      STACK_PUSH_MEM_START(mem, s);
3440
151M
      MOP_OUT;
3441
151M
      JUMP;
3442
3443
151M
    CASE(OP_MEMORY_START)  MOP_IN(OP_MEMORY_START);
3444
1.10M
      GET_MEMNUM_INC(mem, p);
3445
1.10M
      mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3446
1.10M
      mem_end_stk[mem] = INVALID_STACK_INDEX;
3447
1.10M
      MOP_OUT;
3448
1.10M
      JUMP;
3449
3450
31.3M
    CASE(OP_MEMORY_END_PUSH)  MOP_IN(OP_MEMORY_END_PUSH);
3451
31.3M
      GET_MEMNUM_INC(mem, p);
3452
31.3M
      STACK_PUSH_MEM_END(mem, s);
3453
31.3M
      MOP_OUT;
3454
31.3M
      JUMP;
3455
3456
125M
    CASE(OP_MEMORY_END)  MOP_IN(OP_MEMORY_END);
3457
125M
      GET_MEMNUM_INC(mem, p);
3458
125M
      mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3459
125M
      MOP_OUT;
3460
125M
      JUMP;
3461
3462
125M
    CASE(OP_KEEP)  MOP_IN(OP_KEEP);
3463
183k
      pkeep = s;
3464
183k
      MOP_OUT;
3465
183k
      JUMP;
3466
3467
183k
#ifdef USE_SUBEXP_CALL
3468
4.37M
    CASE(OP_MEMORY_END_PUSH_REC)  MOP_IN(OP_MEMORY_END_PUSH_REC);
3469
4.37M
      GET_MEMNUM_INC(mem, p);
3470
4.37M
      STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3471
4.37M
      mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3472
4.37M
      STACK_PUSH_MEM_END(mem, s);
3473
4.37M
      MOP_OUT;
3474
4.37M
      JUMP;
3475
3476
29.4M
    CASE(OP_MEMORY_END_REC)  MOP_IN(OP_MEMORY_END_REC);
3477
29.4M
      GET_MEMNUM_INC(mem, p);
3478
29.4M
      mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3479
29.4M
      STACK_GET_MEM_START(mem, stkp);
3480
3481
29.4M
      if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3482
29.4M
        mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3483
0
      else
3484
0
        mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3485
3486
29.4M
      STACK_PUSH_MEM_END_MARK(mem);
3487
29.4M
      MOP_OUT;
3488
29.4M
      JUMP;
3489
29.4M
#endif
3490
3491
31.0M
    CASE(OP_BACKREF1)  MOP_IN(OP_BACKREF1);
3492
31.0M
      mem = 1;
3493
31.0M
      goto backref;
3494
3495
9.74M
    CASE(OP_BACKREF2)  MOP_IN(OP_BACKREF2);
3496
9.74M
      mem = 2;
3497
9.74M
      goto backref;
3498
3499
50.3M
    CASE(OP_BACKREFN)  MOP_IN(OP_BACKREFN);
3500
50.3M
      GET_MEMNUM_INC(mem, p);
3501
91.1M
    backref:
3502
91.1M
      {
3503
91.1M
        int len;
3504
91.1M
        UChar *pstart, *pend;
3505
3506
        /* if you want to remove following line,
3507
           you should check in parse and compile time. */
3508
91.1M
        if (mem > num_mem) goto fail;
3509
91.1M
        if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3510
30.5M
        if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3511
3512
30.5M
        if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3513
30.5M
          pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3514
0
        else
3515
0
          pstart = (UChar* )((void* )mem_start_stk[mem]);
3516
3517
30.5M
        pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3518
30.5M
                ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3519
30.5M
                : (UChar* )((void* )mem_end_stk[mem]));
3520
30.5M
        n = pend - pstart;
3521
30.5M
        DATA_ENSURE(n);
3522
30.4M
        sprev = s;
3523
30.4M
        STRING_CMP(pstart, s, n);
3524
15.8M
        while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3525
569
          sprev += len;
3526
3527
15.8M
        MOP_OUT;
3528
15.8M
        JUMP;
3529
15.8M
      }
3530
3531
4.22M
    CASE(OP_BACKREFN_IC)  MOP_IN(OP_BACKREFN_IC);
3532
4.22M
      GET_MEMNUM_INC(mem, p);
3533
4.22M
      {
3534
4.22M
        int len;
3535
4.22M
        UChar *pstart, *pend;
3536
3537
        /* if you want to remove following line,
3538
           you should check in parse and compile time. */
3539
4.22M
        if (mem > num_mem) goto fail;
3540
4.22M
        if (mem_end_stk[mem]   == INVALID_STACK_INDEX) goto fail;
3541
4.19M
        if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3542
3543
4.19M
        if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3544
4.19M
          pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3545
0
        else
3546
0
          pstart = (UChar* )((void* )mem_start_stk[mem]);
3547
3548
4.19M
        pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3549
4.19M
                ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3550
4.19M
                : (UChar* )((void* )mem_end_stk[mem]));
3551
4.19M
        n = pend - pstart;
3552
4.19M
        DATA_ENSURE(n);
3553
4.19M
        sprev = s;
3554
4.19M
        STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
3555
2.29M
        while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3556
16.6k
          sprev += len;
3557
3558
2.28M
        MOP_OUT;
3559
2.28M
        JUMP;
3560
2.28M
      }
3561
2.28M
      NEXT;
3562
3563
4.75M
    CASE(OP_BACKREF_MULTI)  MOP_IN(OP_BACKREF_MULTI);
3564
4.75M
      {
3565
4.75M
        int len, is_fail;
3566
4.75M
        UChar *pstart, *pend, *swork;
3567
3568
4.75M
        GET_LENGTH_INC(tlen, p);
3569
12.0M
        for (i = 0; i < tlen; i++) {
3570
8.68M
          GET_MEMNUM_INC(mem, p);
3571
3572
8.68M
          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3573
1.56M
          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3574
3575
1.56M
          if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3576
1.56M
            pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3577
0
          else
3578
0
            pstart = (UChar* )((void* )mem_start_stk[mem]);
3579
3580
1.56M
          pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3581
1.56M
                  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3582
1.56M
                  : (UChar* )((void* )mem_end_stk[mem]));
3583
1.56M
          n = pend - pstart;
3584
1.56M
          DATA_ENSURE_CONTINUE(n);
3585
1.55M
          sprev = s;
3586
1.55M
          swork = s;
3587
1.55M
          STRING_CMP_VALUE(pstart, swork, n, is_fail);
3588
1.55M
          if (is_fail) continue;
3589
1.35M
          s = swork;
3590
1.35M
          while (sprev + (len = enclen_approx(encode, sprev, end)) < s)
3591
3.43k
            sprev += len;
3592
3593
1.35M
          p += (SIZE_MEMNUM * (tlen - i - 1));
3594
1.35M
          break; /* success */
3595
1.55M
        }
3596
4.75M
        if (i == tlen) goto fail;
3597
1.35M
        MOP_OUT;
3598
1.35M
        JUMP;
3599
1.35M
      }
3600
1.35M
      NEXT;
3601
3602
6.61M
    CASE(OP_BACKREF_MULTI_IC)  MOP_IN(OP_BACKREF_MULTI_IC);
3603
6.61M
      {
3604
6.61M
        int len, is_fail;
3605
6.61M
        UChar *pstart, *pend, *swork;
3606
3607
6.61M
        GET_LENGTH_INC(tlen, p);
3608
19.4M
        for (i = 0; i < tlen; i++) {
3609
16.3M
          GET_MEMNUM_INC(mem, p);
3610
3611
16.3M
          if (mem_end_stk[mem]   == INVALID_STACK_INDEX) continue;
3612
4.18M
          if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3613
3614
4.18M
          if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3615
4.18M
            pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3616
0
          else
3617
0
            pstart = (UChar* )((void* )mem_start_stk[mem]);
3618
3619
4.18M
          pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3620
4.18M
                  ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3621
4.18M
                  : (UChar* )((void* )mem_end_stk[mem]));
3622
4.18M
          n = pend - pstart;
3623
4.18M
          DATA_ENSURE_CONTINUE(n);
3624
4.18M
          sprev = s;
3625
4.18M
          swork = s;
3626
4.18M
          STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3627
4.18M
          if (is_fail) continue;
3628
3.46M
          s = swork;
3629
3.60M
          while (sprev + (len = enclen(encode, sprev, end)) < s)
3630
139k
            sprev += len;
3631
3632
3.46M
          p += (SIZE_MEMNUM * (tlen - i - 1));
3633
3.46M
          break; /* success */
3634
4.18M
        }
3635
6.61M
        if (i == tlen) goto fail;
3636
3.46M
        MOP_OUT;
3637
3.46M
        JUMP;
3638
3.46M
      }
3639
3640
0
#ifdef USE_BACKREF_WITH_LEVEL
3641
20.4M
    CASE(OP_BACKREF_WITH_LEVEL)
3642
20.4M
      {
3643
20.4M
        int len;
3644
20.4M
        OnigOptionType ic;
3645
20.4M
        LengthType level;
3646
3647
20.4M
        GET_OPTION_INC(ic,    p);
3648
20.4M
        GET_LENGTH_INC(level, p);
3649
20.4M
        GET_LENGTH_INC(tlen,  p);
3650
3651
20.4M
        sprev = s;
3652
20.4M
        if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3653
20.4M
                  case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3654
19.7M
          while (sprev + (len = enclen(encode, sprev, end)) < s)
3655
6.98k
            sprev += len;
3656
3657
19.7M
          p += (SIZE_MEMNUM * tlen);
3658
19.7M
        }
3659
706k
        else
3660
706k
          goto fail;
3661
3662
19.7M
        MOP_OUT;
3663
19.7M
        JUMP;
3664
19.7M
      }
3665
3666
0
#endif
3667
3668
#if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */
3669
    CASE(OP_SET_OPTION_PUSH)  MOP_IN(OP_SET_OPTION_PUSH);
3670
      GET_OPTION_INC(option, p);
3671
      STACK_PUSH_ALT(p, s, sprev, pkeep);
3672
      p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3673
      MOP_OUT;
3674
      JUMP;
3675
3676
    CASE(OP_SET_OPTION)  MOP_IN(OP_SET_OPTION);
3677
      GET_OPTION_INC(option, p);
3678
      MOP_OUT;
3679
      JUMP;
3680
#endif
3681
3682
158M
    CASE(OP_NULL_CHECK_START)  MOP_IN(OP_NULL_CHECK_START);
3683
158M
      GET_MEMNUM_INC(mem, p);    /* mem: null check id */
3684
158M
      STACK_PUSH_NULL_CHECK_START(mem, s);
3685
158M
      MOP_OUT;
3686
158M
      JUMP;
3687
3688
158M
    CASE(OP_NULL_CHECK_END)  MOP_IN(OP_NULL_CHECK_END);
3689
89.5M
      {
3690
89.5M
        int isnull;
3691
3692
89.5M
        GET_MEMNUM_INC(mem, p); /* mem: null check id */
3693
89.5M
        STACK_NULL_CHECK(isnull, mem, s);
3694
89.5M
        if (isnull) {
3695
#ifdef ONIG_DEBUG_MATCH
3696
          fprintf(stderr, "NULL_CHECK_END: skip  id:%d, s:%"PRIuPTR" (%p)\n",
3697
                  (int )mem, (uintptr_t )s, s);
3698
#endif
3699
130M
        null_check_found:
3700
          /* empty loop founded, skip next instruction */
3701
130M
          switch (*p++) {
3702
105M
          case OP_JUMP:
3703
110M
          case OP_PUSH:
3704
110M
            p += SIZE_RELADDR;
3705
110M
            break;
3706
5.58M
          case OP_REPEAT_INC:
3707
8.54M
          case OP_REPEAT_INC_NG:
3708
14.2M
          case OP_REPEAT_INC_SG:
3709
19.9M
          case OP_REPEAT_INC_NG_SG:
3710
19.9M
            p += SIZE_MEMNUM;
3711
19.9M
            break;
3712
0
          default:
3713
0
            goto unexpected_bytecode_error;
3714
0
            break;
3715
130M
          }
3716
130M
        }
3717
89.5M
      }
3718
159M
      MOP_OUT;
3719
159M
      JUMP;
3720
3721
159M
#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3722
159M
    CASE(OP_NULL_CHECK_END_MEMST)  MOP_IN(OP_NULL_CHECK_END_MEMST);
3723
130M
      {
3724
130M
        int isnull;
3725
3726
130M
        GET_MEMNUM_INC(mem, p); /* mem: null check id */
3727
130M
        STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
3728
130M
        if (isnull) {
3729
# ifdef ONIG_DEBUG_MATCH
3730
          fprintf(stderr, "NULL_CHECK_END_MEMST: skip  id:%d, s:%"PRIuPTR" (%p)\n",
3731
                  (int )mem, (uintptr_t )s, s);
3732
# endif
3733
67.6M
          if (isnull == -1) goto fail;
3734
65.3M
          goto null_check_found;
3735
67.6M
        }
3736
130M
      }
3737
63.0M
      MOP_OUT;
3738
63.0M
      JUMP;
3739
63.0M
#endif
3740
3741
63.0M
#ifdef USE_SUBEXP_CALL
3742
63.0M
    CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3743
9.67M
      MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3744
9.67M
      {
3745
9.67M
        int isnull;
3746
3747
9.67M
        GET_MEMNUM_INC(mem, p); /* mem: null check id */
3748
9.67M
# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3749
9.67M
        STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3750
# else
3751
        STACK_NULL_CHECK_REC(isnull, mem, s);
3752
# endif
3753
9.67M
        if (isnull) {
3754
# ifdef ONIG_DEBUG_MATCH
3755
          fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip  id:%d, s:%"PRIuPTR" (%p)\n",
3756
                  (int )mem, (uintptr_t )s, s);
3757
# endif
3758
4.14M
          if (isnull == -1) goto fail;
3759
4.14M
          goto null_check_found;
3760
4.14M
        }
3761
5.53M
        else {
3762
5.53M
          STACK_PUSH_NULL_CHECK_END(mem);
3763
5.53M
        }
3764
9.67M
      }
3765
5.53M
      MOP_OUT;
3766
5.53M
      JUMP;
3767
5.53M
#endif
3768
3769
771M
    CASE(OP_JUMP)  MOP_IN(OP_JUMP);
3770
771M
      GET_RELADDR_INC(addr, p);
3771
771M
      p += addr;
3772
771M
      MOP_OUT;
3773
771M
      CHECK_INTERRUPT_IN_MATCH_AT;
3774
771M
      JUMP;
3775
3776
936M
    CASE(OP_PUSH)  MOP_IN(OP_PUSH);
3777
936M
      GET_RELADDR_INC(addr, p);
3778
936M
      CHECK_MATCH_CACHE;
3779
932M
      STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3780
932M
      MOP_OUT;
3781
932M
      JUMP;
3782
3783
#ifdef USE_COMBINATION_EXPLOSION_CHECK
3784
    CASE(OP_STATE_CHECK_PUSH)  MOP_IN(OP_STATE_CHECK_PUSH);
3785
      GET_STATE_CHECK_NUM_INC(mem, p);
3786
      STATE_CHECK_VAL(scv, mem);
3787
      if (scv) goto fail;
3788
3789
      GET_RELADDR_INC(addr, p);
3790
      STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3791
      MOP_OUT;
3792
      JUMP;
3793
3794
    CASE(OP_STATE_CHECK_PUSH_OR_JUMP)  MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3795
      GET_STATE_CHECK_NUM_INC(mem, p);
3796
      GET_RELADDR_INC(addr, p);
3797
      STATE_CHECK_VAL(scv, mem);
3798
      if (scv) {
3799
        p += addr;
3800
      }
3801
      else {
3802
        STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3803
      }
3804
      MOP_OUT;
3805
      JUMP;
3806
3807
    CASE(OP_STATE_CHECK)  MOP_IN(OP_STATE_CHECK);
3808
      GET_STATE_CHECK_NUM_INC(mem, p);
3809
      STATE_CHECK_VAL(scv, mem);
3810
      if (scv) goto fail;
3811
3812
      STACK_PUSH_STATE_CHECK(s, mem);
3813
      MOP_OUT;
3814
      JUMP;
3815
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3816
3817
932M
    CASE(OP_POP)  MOP_IN(OP_POP);
3818
0
      STACK_POP_ONE;
3819
0
#ifdef USE_MATCH_CACHE
3820
      /* We need to increment num_fails here, for invoking a cache optimization correctly, */
3821
      /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3822
0
      msa->num_fails++;
3823
0
#endif
3824
0
      MOP_OUT;
3825
0
      JUMP;
3826
3827
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3828
    CASE(OP_PUSH_OR_JUMP_EXACT1)  MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3829
      GET_RELADDR_INC(addr, p);
3830
      if (*p == *s && DATA_ENSURE_CHECK1) {
3831
        p++;
3832
        CHECK_MATCH_CACHE;
3833
        STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3834
        MOP_OUT;
3835
        JUMP;
3836
      }
3837
      p += (addr + 1);
3838
      MOP_OUT;
3839
      JUMP;
3840
#endif
3841
3842
403M
    CASE(OP_PUSH_IF_PEEK_NEXT)  MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3843
403M
      GET_RELADDR_INC(addr, p);
3844
403M
      CHECK_MATCH_CACHE;
3845
402M
      if (*p == *s) {
3846
21.2M
        p++;
3847
21.2M
        STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3848
21.2M
        MOP_OUT;
3849
21.2M
        JUMP;
3850
21.2M
      }
3851
402M
      p++;
3852
402M
      INC_NUM_FAILS;
3853
402M
      MOP_OUT;
3854
402M
      JUMP;
3855
3856
402M
    CASE(OP_REPEAT)  MOP_IN(OP_REPEAT);
3857
24.9M
      {
3858
24.9M
        GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
3859
24.9M
        GET_RELADDR_INC(addr, p);
3860
3861
24.9M
        STACK_ENSURE(1);
3862
24.9M
        repeat_stk[mem] = GET_STACK_INDEX(stk);
3863
24.9M
        STACK_PUSH_REPEAT(mem, p);
3864
3865
24.9M
        if (reg->repeat_range[mem].lower == 0) {
3866
24.1M
          CHECK_MATCH_CACHE;
3867
24.0M
          STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3868
24.0M
        }
3869
24.9M
      }
3870
24.8M
      MOP_OUT;
3871
24.8M
      JUMP;
3872
3873
77.4M
    CASE(OP_REPEAT_NG)  MOP_IN(OP_REPEAT_NG);
3874
77.4M
      {
3875
77.4M
        GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
3876
77.4M
        GET_RELADDR_INC(addr, p);
3877
3878
77.4M
        STACK_ENSURE(1);
3879
77.4M
        repeat_stk[mem] = GET_STACK_INDEX(stk);
3880
77.4M
        STACK_PUSH_REPEAT(mem, p);
3881
3882
77.4M
        if (reg->repeat_range[mem].lower == 0) {
3883
77.2M
          CHECK_MATCH_CACHE;
3884
75.6M
          STACK_PUSH_ALT(p, s, sprev, pkeep);
3885
75.6M
          p += addr;
3886
75.6M
        }
3887
77.4M
      }
3888
75.8M
      MOP_OUT;
3889
75.8M
      JUMP;
3890
3891
75.8M
    CASE(OP_REPEAT_INC)  MOP_IN(OP_REPEAT_INC);
3892
27.8M
      GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3893
27.8M
      si = repeat_stk[mem];
3894
27.8M
      stkp = STACK_AT(si);
3895
3896
75.3M
    repeat_inc:
3897
75.3M
      stkp->u.repeat.count++;
3898
75.3M
      if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3899
        /* end of repeat. Nothing to do. */
3900
6.16M
      }
3901
69.1M
      else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3902
61.8M
#ifdef USE_MATCH_CACHE
3903
61.8M
        if (*pbegin == OP_REPEAT_INC) {
3904
26.1M
#undef MATCH_CACHE_HIT
3905
26.1M
#define MATCH_CACHE_HIT stkp->u.repeat.count--;
3906
26.1M
          CHECK_MATCH_CACHE;
3907
26.1M
#undef MATCH_CACHE_HIT
3908
26.1M
#define MATCH_CACHE_HIT ((void) 0)
3909
26.1M
        }
3910
61.8M
#endif
3911
61.8M
        STACK_PUSH_ALT(p, s, sprev, pkeep);
3912
61.8M
        p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3913
61.8M
      }
3914
7.30M
      else {
3915
7.30M
        p = stkp->u.repeat.pcode;
3916
7.30M
      }
3917
75.3M
      STACK_PUSH_REPEAT_INC(si);
3918
75.3M
      MOP_OUT;
3919
75.3M
      CHECK_INTERRUPT_IN_MATCH_AT;
3920
75.3M
      JUMP;
3921
3922
75.3M
    CASE(OP_REPEAT_INC_SG)  MOP_IN(OP_REPEAT_INC_SG);
3923
47.4M
      GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3924
47.4M
      STACK_GET_REPEAT(mem, stkp);
3925
47.4M
      si = GET_STACK_INDEX(stkp);
3926
47.4M
      goto repeat_inc;
3927
3928
101M
    CASE(OP_REPEAT_INC_NG)  MOP_IN(OP_REPEAT_INC_NG);
3929
101M
      GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3930
101M
      si = repeat_stk[mem];
3931
101M
      stkp = STACK_AT(si);
3932
3933
115M
    repeat_inc_ng:
3934
115M
      stkp->u.repeat.count++;
3935
115M
      if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3936
72.2M
        if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3937
65.7M
          UChar* pcode = stkp->u.repeat.pcode;
3938
3939
65.7M
          STACK_PUSH_REPEAT_INC(si);
3940
65.7M
          if (*pbegin == OP_REPEAT_INC_NG) {
3941
63.1M
            CHECK_MATCH_CACHE;
3942
63.1M
          }
3943
65.7M
          STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3944
65.7M
        }
3945
6.46M
        else {
3946
6.46M
          p = stkp->u.repeat.pcode;
3947
6.46M
          STACK_PUSH_REPEAT_INC(si);
3948
6.46M
        }
3949
72.2M
      }
3950
43.6M
      else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3951
28.8M
        STACK_PUSH_REPEAT_INC(si);
3952
28.8M
      }
3953
115M
      MOP_OUT;
3954
115M
      CHECK_INTERRUPT_IN_MATCH_AT;
3955
115M
      JUMP;
3956
3957
115M
    CASE(OP_REPEAT_INC_NG_SG)  MOP_IN(OP_REPEAT_INC_NG_SG);
3958
14.6M
      GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3959
14.6M
      STACK_GET_REPEAT(mem, stkp);
3960
14.6M
      si = GET_STACK_INDEX(stkp);
3961
14.6M
      goto repeat_inc_ng;
3962
3963
7.19M
    CASE(OP_PUSH_POS)  MOP_IN(OP_PUSH_POS);
3964
7.19M
      STACK_PUSH_POS(s, sprev, pkeep);
3965
7.19M
      MOP_OUT;
3966
7.19M
      JUMP;
3967
3968
7.19M
    CASE(OP_POP_POS)  MOP_IN(OP_POP_POS);
3969
6.71M
      {
3970
6.71M
        STACK_POS_END(stkp);
3971
6.71M
        s     = stkp->u.state.pstr;
3972
6.71M
        sprev = stkp->u.state.pstr_prev;
3973
6.71M
      }
3974
6.71M
      MOP_OUT;
3975
6.71M
      JUMP;
3976
3977
10.4M
    CASE(OP_PUSH_POS_NOT)  MOP_IN(OP_PUSH_POS_NOT);
3978
10.4M
      GET_RELADDR_INC(addr, p);
3979
10.4M
      STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3980
10.4M
      MOP_OUT;
3981
10.4M
      JUMP;
3982
3983
10.4M
    CASE(OP_FAIL_POS)  MOP_IN(OP_FAIL_POS);
3984
700k
      STACK_POP_TIL_POS_NOT;
3985
700k
      goto fail;
3986
3987
295M
    CASE(OP_PUSH_STOP_BT)  MOP_IN(OP_PUSH_STOP_BT);
3988
295M
      STACK_PUSH_STOP_BT;
3989
295M
      MOP_OUT;
3990
295M
      JUMP;
3991
3992
295M
    CASE(OP_POP_STOP_BT)  MOP_IN(OP_POP_STOP_BT);
3993
210M
      STACK_STOP_BT_END;
3994
210M
      MOP_OUT;
3995
210M
      JUMP;
3996
3997
210M
    CASE(OP_LOOK_BEHIND)  MOP_IN(OP_LOOK_BEHIND);
3998
84.2k
      GET_LENGTH_INC(tlen, p);
3999
84.2k
      s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4000
84.2k
      if (IS_NULL(s)) goto fail;
4001
83.6k
      sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4002
83.6k
      MOP_OUT;
4003
83.6k
      JUMP;
4004
4005
9.60M
    CASE(OP_PUSH_LOOK_BEHIND_NOT)  MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
4006
9.60M
      GET_RELADDR_INC(addr, p);
4007
9.60M
      GET_LENGTH_INC(tlen, p);
4008
9.60M
      q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
4009
9.60M
      if (IS_NULL(q)) {
4010
        /* too short case -> success. ex. /(?<!XXX)a/.match("a")
4011
           If you want to change to fail, replace following line. */
4012
3.98k
        p += addr;
4013
        /* goto fail; */
4014
3.98k
      }
4015
9.59M
      else {
4016
9.59M
        STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
4017
9.59M
        s = q;
4018
9.59M
        sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
4019
9.59M
      }
4020
9.60M
      MOP_OUT;
4021
9.60M
      JUMP;
4022
4023
9.60M
    CASE(OP_FAIL_LOOK_BEHIND_NOT)  MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
4024
4.65M
      STACK_POP_TIL_LOOK_BEHIND_NOT;
4025
4.65M
      goto fail;
4026
4027
6.26M
    CASE(OP_PUSH_ABSENT_POS)  MOP_IN(OP_PUSH_ABSENT_POS);
4028
      /* Save the absent-start-pos and the original end-pos. */
4029
6.26M
      STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
4030
6.26M
      MOP_OUT;
4031
6.26M
      JUMP;
4032
4033
80.8M
    CASE(OP_ABSENT)  MOP_IN(OP_ABSENT);
4034
80.8M
      {
4035
80.8M
        const UChar* aend = ABSENT_END_POS;
4036
80.8M
        UChar* absent;
4037
80.8M
        UChar* selfp = p - 1;
4038
4039
80.8M
        STACK_POP_ABSENT_POS(absent, ABSENT_END_POS);  /* Restore end-pos. */
4040
80.8M
        GET_RELADDR_INC(addr, p);
4041
#ifdef ONIG_DEBUG_MATCH
4042
        fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
4043
#endif
4044
80.8M
        if ((absent > aend) && (s > absent)) {
4045
          /* An empty match occurred in (?~...) at the start point.
4046
           * Never match. */
4047
1.57M
          STACK_POP;
4048
1.57M
          goto fail;
4049
1.57M
        }
4050
79.2M
        else if ((s >= aend) && (s > absent)) {
4051
4.51M
          if (s > aend) {
4052
            /* Only one (or less) character matched in the last iteration.
4053
             * This is not a possible point. */
4054
3.19M
            goto fail;
4055
3.19M
          }
4056
          /* All possible points were found. Try matching after (?~...). */
4057
1.32M
          DATA_ENSURE(0);
4058
1.32M
          p += addr;
4059
1.32M
        }
4060
74.7M
        else if (s == end) {
4061
          /* At the end of the string, just match with it */
4062
162k
          DATA_ENSURE(0);
4063
162k
          p += addr;
4064
162k
        }
4065
74.5M
        else {
4066
74.5M
          STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
4067
74.5M
          n = enclen(encode, s, end);
4068
74.5M
          STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
4069
74.5M
          STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
4070
74.5M
          STACK_PUSH_ABSENT;
4071
74.5M
          ABSENT_END_POS = aend;
4072
74.5M
        }
4073
80.8M
      }
4074
76.0M
      MOP_OUT;
4075
76.0M
      JUMP;
4076
4077
76.0M
    CASE(OP_ABSENT_END)  MOP_IN(OP_ABSENT_END);
4078
      /* The pattern inside (?~...) was matched.
4079
       * Set the end-pos temporary and go to next iteration. */
4080
7.95M
      if (sprev < ABSENT_END_POS)
4081
7.95M
        ABSENT_END_POS = sprev;
4082
#ifdef ONIG_DEBUG_MATCH
4083
      fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
4084
#endif
4085
7.95M
      STACK_POP_TIL_ABSENT;
4086
7.95M
      goto fail;
4087
4088
0
#ifdef USE_SUBEXP_CALL
4089
32.2M
    CASE(OP_CALL)  MOP_IN(OP_CALL);
4090
32.2M
      GET_ABSADDR_INC(addr, p);
4091
32.2M
      STACK_PUSH_CALL_FRAME(p);
4092
32.2M
      p = reg->p + addr;
4093
32.2M
      MOP_OUT;
4094
32.2M
      JUMP;
4095
4096
32.2M
    CASE(OP_RETURN)  MOP_IN(OP_RETURN);
4097
12.0M
      STACK_RETURN(p);
4098
12.0M
      STACK_PUSH_RETURN;
4099
12.0M
      MOP_OUT;
4100
12.0M
      JUMP;
4101
12.0M
#endif
4102
4103
12.0M
    CASE(OP_CONDITION)  MOP_IN(OP_CONDITION);
4104
6.72M
      GET_MEMNUM_INC(mem, p);
4105
6.72M
      GET_RELADDR_INC(addr, p);
4106
6.72M
      if ((mem > num_mem) ||
4107
6.72M
          (mem_end_stk[mem]   == INVALID_STACK_INDEX) ||
4108
6.72M
          (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
4109
936
        p += addr;
4110
936
      }
4111
6.72M
      MOP_OUT;
4112
6.72M
      JUMP;
4113
4114
6.72M
    CASE(OP_FINISH)
4115
6.50M
      goto finish;
4116
4117
0
    CASE(OP_FAIL)
4118
0
      if (0) {
4119
        /* fall */
4120
1.51G
      fail:
4121
1.51G
        MOP_OUT;
4122
1.51G
      }
4123
1.51G
      MOP_IN(OP_FAIL);
4124
1.51G
      STACK_POP;
4125
1.51G
      p     = stk->u.state.pcode;
4126
1.51G
      s     = stk->u.state.pstr;
4127
1.51G
      sprev = stk->u.state.pstr_prev;
4128
1.51G
      pkeep = stk->u.state.pkeep;
4129
4130
1.51G
#ifdef USE_MATCH_CACHE
4131
1.51G
      if (
4132
1.51G
          msa->match_cache_status != MATCH_CACHE_STATUS_DISABLED &&
4133
574M
          ++msa->num_fails >= (long)(end - str) * msa->num_cache_opcodes
4134
1.51G
      ) {
4135
199M
        if (msa->match_cache_status == MATCH_CACHE_STATUS_UNINIT) {
4136
119k
          msa->match_cache_status = MATCH_CACHE_STATUS_INIT;
4137
119k
          OnigPosition r = count_num_cache_opcodes(reg, &msa->num_cache_opcodes);
4138
119k
          if (r < 0) goto bytecode_error;
4139
119k
        }
4140
199M
        if (msa->num_cache_opcodes == NUM_CACHE_OPCODES_IMPOSSIBLE || msa->num_cache_opcodes == 0) {
4141
41.5k
          msa->match_cache_status = MATCH_CACHE_STATUS_DISABLED;
4142
41.5k
          goto fail_match_cache;
4143
41.5k
        }
4144
199M
        if (msa->num_fails < (long)(end - str) * msa->num_cache_opcodes) {
4145
75.8k
          goto fail_match_cache;
4146
75.8k
        }
4147
199M
        if (msa->cache_opcodes == NULL) {
4148
42.4k
          msa->match_cache_status = MATCH_CACHE_STATUS_ENABLED;
4149
42.4k
          OnigCacheOpcode* cache_opcodes = (OnigCacheOpcode*)xmalloc(msa->num_cache_opcodes * sizeof(OnigCacheOpcode));
4150
42.4k
          if (cache_opcodes == NULL) {
4151
0
            return ONIGERR_MEMORY;
4152
0
          }
4153
42.4k
          OnigPosition r = init_cache_opcodes(reg, cache_opcodes, &msa->num_cache_points);
4154
42.4k
          if (r < 0) {
4155
0
            if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
4156
0
            else goto bytecode_error;
4157
0
          }
4158
42.4k
          msa->cache_opcodes = cache_opcodes;
4159
#ifdef ONIG_DEBUG_MATCH_CACHE
4160
          fprintf(stderr, "MATCH CACHE: #cache opcodes = %ld\n", msa->num_cache_opcodes);
4161
          fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points);
4162
          fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes);
4163
          for (int i = 0; i < msa->num_cache_opcodes; i++) {
4164
            fprintf(stderr, "MATCH CACHE:   [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr);
4165
          }
4166
#endif
4167
42.4k
        }
4168
199M
        if (msa->match_cache_buf == NULL) {
4169
42.4k
          size_t length = (end - str) + 1;
4170
42.4k
          size_t num_match_cache_points = (size_t)msa->num_cache_points * length;
4171
#ifdef ONIG_DEBUG_MATCH_CACHE
4172
          fprintf(stderr, "MATCH CACHE: #match cache points = %zu (length = %zu)\n", num_match_cache_points, length);
4173
#endif
4174
          /* Overflow check */
4175
42.4k
          if (num_match_cache_points / length != (size_t)msa->num_cache_points) {
4176
0
            return ONIGERR_MEMORY;
4177
0
          }
4178
42.4k
          if (num_match_cache_points >= LONG_MAX_LIMIT) {
4179
0
            return ONIGERR_MEMORY;
4180
0
          }
4181
42.4k
          size_t match_cache_buf_length = (num_match_cache_points >> 3) + (num_match_cache_points & 7 ? 1 : 0) + 1;
4182
42.4k
          uint8_t* match_cache_buf = (uint8_t*)xmalloc(match_cache_buf_length * sizeof(uint8_t));
4183
42.4k
          if (match_cache_buf == NULL) {
4184
0
            return ONIGERR_MEMORY;
4185
0
          }
4186
42.4k
          xmemset(match_cache_buf, 0, match_cache_buf_length * sizeof(uint8_t));
4187
42.4k
          msa->match_cache_buf = match_cache_buf;
4188
42.4k
        }
4189
199M
      }
4190
1.51G
      fail_match_cache:
4191
1.51G
#endif
4192
4193
#ifdef USE_COMBINATION_EXPLOSION_CHECK
4194
      if (stk->u.state.state_check != 0) {
4195
        stk->type = STK_STATE_CHECK_MARK;
4196
        stk++;
4197
      }
4198
#endif
4199
4200
1.51G
      MOP_OUT;
4201
1.51G
      CHECK_INTERRUPT_IN_MATCH_AT;
4202
1.51G
      JUMP;
4203
4204
1.51G
    DEFAULT
4205
0
      goto bytecode_error;
4206
1.51G
  } VM_LOOP_END
4207
4208
7.67M
 finish:
4209
7.67M
  STACK_SAVE;
4210
7.67M
  xfree(xmalloc_base);
4211
7.67M
  return best_len;
4212
4213
#ifdef ONIG_DEBUG
4214
 stack_error:
4215
  STACK_SAVE;
4216
  xfree(xmalloc_base);
4217
  return ONIGERR_STACK_BUG;
4218
#endif
4219
4220
695
 bytecode_error:
4221
695
  STACK_SAVE;
4222
695
  xfree(xmalloc_base);
4223
695
  return ONIGERR_UNDEFINED_BYTECODE;
4224
4225
0
 unexpected_bytecode_error:
4226
0
  STACK_SAVE;
4227
0
  xfree(xmalloc_base);
4228
0
  return ONIGERR_UNEXPECTED_BYTECODE;
4229
4230
0
 timeout:
4231
0
  STACK_SAVE;
4232
0
  xfree(xmalloc_base);
4233
0
  return ONIGERR_TIMEOUT;
4234
1.51G
}
4235
4236
4237
static UChar*
4238
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
4239
            const UChar* text, const UChar* text_end, UChar* text_range)
4240
171k
{
4241
171k
  UChar *t, *p, *s, *end;
4242
4243
171k
  end = (UChar* )text_end;
4244
171k
  end -= target_end - target - 1;
4245
171k
  if (end > text_range)
4246
249
    end = text_range;
4247
4248
171k
  s = (UChar* )text;
4249
4250
171k
  if (enc->max_enc_len == enc->min_enc_len) {
4251
136k
    int n = enc->max_enc_len;
4252
4253
1.58M
    while (s < end) {
4254
1.58M
      if (*s == *target) {
4255
136k
        p = s + 1;
4256
136k
        t = target + 1;
4257
136k
        if (target_end == t || memcmp(t, p, target_end - t) == 0)
4258
136k
          return s;
4259
136k
      }
4260
1.45M
      s += n;
4261
1.45M
    }
4262
487
    return (UChar* )NULL;
4263
136k
  }
4264
151k
  while (s < end) {
4265
151k
    if (*s == *target) {
4266
34.2k
      p = s + 1;
4267
34.2k
      t = target + 1;
4268
34.2k
      if (target_end == t || memcmp(t, p, target_end - t) == 0)
4269
34.2k
        return s;
4270
34.2k
    }
4271
117k
    s += enclen(enc, s, text_end);
4272
117k
  }
4273
4274
45
  return (UChar* )NULL;
4275
34.3k
}
4276
4277
static int
4278
str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4279
                     const UChar* t, const UChar* tend,
4280
                     const UChar* p, const UChar* end)
4281
557k
{
4282
557k
  int lowlen;
4283
557k
  UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4284
4285
2.06M
  while (t < tend) {
4286
1.96M
    lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4287
1.96M
    q = lowbuf;
4288
3.47M
    while (lowlen > 0) {
4289
1.96M
      if (*t++ != *q++)  return 0;
4290
1.50M
      lowlen--;
4291
1.50M
    }
4292
1.96M
  }
4293
4294
100k
  return 1;
4295
557k
}
4296
4297
static UChar*
4298
slow_search_ic(OnigEncoding enc, int case_fold_flag,
4299
               UChar* target, UChar* target_end,
4300
               const UChar* text, const UChar* text_end, UChar* text_range)
4301
40.0k
{
4302
40.0k
  UChar *s, *end;
4303
4304
40.0k
  end = (UChar* )text_end;
4305
40.0k
  end -= target_end - target - 1;
4306
40.0k
  if (end > text_range)
4307
564
    end = text_range;
4308
4309
40.0k
  s = (UChar* )text;
4310
4311
450k
  while (s < end) {
4312
450k
    if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4313
450k
                             s, text_end))
4314
39.9k
      return s;
4315
4316
410k
    s += enclen(enc, s, text_end);
4317
410k
  }
4318
4319
125
  return (UChar* )NULL;
4320
40.0k
}
4321
4322
static UChar*
4323
slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4324
                     const UChar* text, const UChar* adjust_text,
4325
                     const UChar* text_end, const UChar* text_start)
4326
24
{
4327
24
  UChar *t, *p, *s;
4328
4329
24
  s = (UChar* )text_end;
4330
24
  s -= (target_end - target);
4331
24
  if (s > text_start)
4332
0
    s = (UChar* )text_start;
4333
24
  else
4334
24
    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4335
4336
24
  while (s >= text) {
4337
24
    if (*s == *target) {
4338
24
      p = s + 1;
4339
24
      t = target + 1;
4340
24
      while (t < target_end) {
4341
0
        if (*t != *p++)
4342
0
          break;
4343
0
        t++;
4344
0
      }
4345
24
      if (t == target_end)
4346
24
        return s;
4347
24
    }
4348
0
    s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4349
0
  }
4350
4351
0
  return (UChar* )NULL;
4352
24
}
4353
4354
static UChar*
4355
slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4356
                        UChar* target, UChar* target_end,
4357
                        const UChar* text, const UChar* adjust_text,
4358
                        const UChar* text_end, const UChar* text_start)
4359
10
{
4360
10
  UChar *s;
4361
4362
10
  s = (UChar* )text_end;
4363
10
  s -= (target_end - target);
4364
10
  if (s > text_start)
4365
0
    s = (UChar* )text_start;
4366
10
  else
4367
10
    s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4368
4369
12
  while (s >= text) {
4370
10
    if (str_lower_case_match(enc, case_fold_flag,
4371
10
                             target, target_end, s, text_end))
4372
8
      return s;
4373
4374
2
    s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4375
2
  }
4376
4377
2
  return (UChar* )NULL;
4378
10
}
4379
4380
/* Sunday's quick search applied to a multibyte string */
4381
static UChar*
4382
bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4383
                 const UChar* text, const UChar* text_end,
4384
                 const UChar* text_range)
4385
0
{
4386
0
  const UChar *s, *se, *t, *p, *end;
4387
0
  const UChar *tail;
4388
0
  ptrdiff_t skip, tlen1;
4389
0
  OnigEncoding enc = reg->enc;
4390
4391
# ifdef ONIG_DEBUG_SEARCH
4392
  fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4393
          (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4394
# endif
4395
4396
0
  tail = target_end - 1;
4397
0
  tlen1 = tail - target;
4398
0
  end = text_range;
4399
0
  if (end + tlen1 > text_end)
4400
0
    end = text_end - tlen1;
4401
4402
0
  s = text;
4403
4404
0
  while (s < end) {
4405
0
    p = se = s + tlen1;
4406
0
    t = tail;
4407
0
    while (*p == *t) {
4408
0
      if (t == target) return (UChar* )s;
4409
0
      p--; t--;
4410
0
    }
4411
0
    if (s + 1 >= end) break;
4412
0
    skip = reg->map[se[1]];
4413
0
    t = s;
4414
0
    do {
4415
0
      s += enclen(enc, s, end);
4416
0
    } while ((s - t) < skip && s < end);
4417
0
  }
4418
4419
0
  return (UChar* )NULL;
4420
0
}
4421
4422
/* Sunday's quick search */
4423
static UChar*
4424
bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4425
          const UChar* text, const UChar* text_end, const UChar* text_range)
4426
65.8k
{
4427
65.8k
  const UChar *s, *t, *p, *end;
4428
65.8k
  const UChar *tail;
4429
65.8k
  ptrdiff_t tlen1;
4430
4431
# ifdef ONIG_DEBUG_SEARCH
4432
  fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4433
          (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4434
# endif
4435
4436
65.8k
  tail = target_end - 1;
4437
65.8k
  tlen1 = tail - target;
4438
65.8k
  end = text_range + tlen1;
4439
65.8k
  if (end > text_end)
4440
62.8k
    end = text_end;
4441
4442
65.8k
  s = text + tlen1;
4443
269k
  while (s < end) {
4444
268k
    p = s;
4445
268k
    t = tail;
4446
2.00M
    while (*p == *t) {
4447
1.79M
      if (t == target) return (UChar* )p;
4448
1.73M
      p--; t--;
4449
1.73M
    }
4450
203k
    if (s + 1 >= end) break;
4451
203k
    s += reg->map[s[1]];
4452
203k
  }
4453
4454
639
  return (UChar* )NULL;
4455
65.8k
}
4456
4457
/* Sunday's quick search applied to a multibyte string (ignore case) */
4458
static UChar*
4459
bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4460
                    const UChar* text, const UChar* text_end,
4461
                    const UChar* text_range)
4462
0
{
4463
0
  const UChar *s, *se, *t, *end;
4464
0
  const UChar *tail;
4465
0
  ptrdiff_t skip, tlen1;
4466
0
  OnigEncoding enc = reg->enc;
4467
0
  int case_fold_flag = reg->case_fold_flag;
4468
4469
# ifdef ONIG_DEBUG_SEARCH
4470
  fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4471
          (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4472
# endif
4473
4474
0
  tail = target_end - 1;
4475
0
  tlen1 = tail - target;
4476
0
  end = text_range;
4477
0
  if (end + tlen1 > text_end)
4478
0
    end = text_end - tlen1;
4479
4480
0
  s = text;
4481
4482
0
  while (s < end) {
4483
0
    se = s + tlen1;
4484
0
    if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4485
0
                             s, se + 1))
4486
0
      return (UChar* )s;
4487
0
    if (s + 1 >= end) break;
4488
0
    skip = reg->map[se[1]];
4489
0
    t = s;
4490
0
    do {
4491
0
      s += enclen(enc, s, end);
4492
0
    } while ((s - t) < skip && s < end);
4493
0
  }
4494
4495
0
  return (UChar* )NULL;
4496
0
}
4497
4498
/* Sunday's quick search (ignore case) */
4499
static UChar*
4500
bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4501
             const UChar* text, const UChar* text_end, const UChar* text_range)
4502
60.8k
{
4503
60.8k
  const UChar *s, *p, *end;
4504
60.8k
  const UChar *tail;
4505
60.8k
  ptrdiff_t tlen1;
4506
60.8k
  OnigEncoding enc = reg->enc;
4507
60.8k
  int case_fold_flag = reg->case_fold_flag;
4508
4509
# ifdef ONIG_DEBUG_SEARCH
4510
  fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4511
          (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4512
# endif
4513
4514
60.8k
  tail = target_end - 1;
4515
60.8k
  tlen1 = tail - target;
4516
60.8k
  end = text_range + tlen1;
4517
60.8k
  if (end > text_end)
4518
58.3k
    end = text_end;
4519
4520
60.8k
  s = text + tlen1;
4521
107k
  while (s < end) {
4522
107k
    p = s - tlen1;
4523
107k
    if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4524
107k
                             p, s + 1))
4525
60.7k
      return (UChar* )p;
4526
46.6k
    if (s + 1 >= end) break;
4527
46.5k
    s += reg->map[s[1]];
4528
46.5k
  }
4529
4530
122
  return (UChar* )NULL;
4531
60.8k
}
4532
4533
static UChar*
4534
map_search(OnigEncoding enc, UChar map[],
4535
           const UChar* text, const UChar* text_range, const UChar* text_end)
4536
222k
{
4537
222k
  const UChar *s = text;
4538
4539
2.21M
  while (s < text_range) {
4540
2.21M
    if (map[*s]) return (UChar* )s;
4541
4542
1.99M
    s += enclen(enc, s, text_end);
4543
1.99M
  }
4544
731
  return (UChar* )NULL;
4545
222k
}
4546
4547
static UChar*
4548
map_search_backward(OnigEncoding enc, UChar map[],
4549
                    const UChar* text, const UChar* adjust_text,
4550
                    const UChar* text_start, const UChar* text_end)
4551
7
{
4552
7
  const UChar *s = text_start;
4553
4554
13
  while (s >= text) {
4555
10
    if (map[*s]) return (UChar* )s;
4556
4557
6
    s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4558
6
  }
4559
3
  return (UChar* )NULL;
4560
7
}
4561
4562
extern OnigPosition
4563
onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4564
            OnigOptionType option)
4565
0
{
4566
0
  ptrdiff_t r;
4567
0
  UChar *prev;
4568
0
  OnigMatchArg msa;
4569
4570
0
  MATCH_ARG_INIT(msa, option, region, at, at);
4571
#ifdef USE_COMBINATION_EXPLOSION_CHECK
4572
  {
4573
    ptrdiff_t offset = at - str;
4574
    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4575
  }
4576
#endif
4577
4578
0
  if (region) {
4579
0
    r = onig_region_resize_clear(region, reg->num_mem + 1);
4580
0
  }
4581
0
  else
4582
0
    r = 0;
4583
4584
0
  if (r == 0) {
4585
0
    prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4586
0
    r = match_at(reg, str, end,
4587
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4588
                 end,
4589
#endif
4590
0
                 at, prev, &msa);
4591
0
  }
4592
4593
0
  MATCH_ARG_FREE(msa);
4594
0
  return r;
4595
0
}
4596
4597
static int
4598
forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4599
                     UChar* range, UChar** low, UChar** high, UChar** low_prev)
4600
467k
{
4601
467k
  UChar *p, *pprev = (UChar* )NULL;
4602
467k
  size_t input_len = end - str;
4603
4604
#ifdef ONIG_DEBUG_SEARCH
4605
  fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4606
          (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4607
#endif
4608
4609
467k
  if (reg->dmin > input_len) {
4610
146
    return 0;
4611
146
  }
4612
4613
467k
  p = s;
4614
467k
  if (reg->dmin != 0) {
4615
17.1k
    if ((OnigDistance)(end - p) <= reg->dmin) return 0; /* fail */
4616
17.1k
    if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4617
16.7k
      p += reg->dmin;
4618
16.7k
    }
4619
369
    else {
4620
369
      UChar *q = p + reg->dmin;
4621
4622
6.23k
      while (p < q) p += enclen(reg->enc, p, end);
4623
369
    }
4624
17.1k
  }
4625
4626
560k
 retry:
4627
560k
  switch (reg->optimize) {
4628
171k
  case ONIG_OPTIMIZE_EXACT:
4629
171k
    p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4630
171k
    break;
4631
40.0k
  case ONIG_OPTIMIZE_EXACT_IC:
4632
40.0k
    p = slow_search_ic(reg->enc, reg->case_fold_flag,
4633
40.0k
                       reg->exact, reg->exact_end, p, end, range);
4634
40.0k
    break;
4635
4636
65.8k
  case ONIG_OPTIMIZE_EXACT_BM:
4637
65.8k
    p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4638
65.8k
    break;
4639
4640
0
  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4641
0
    p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4642
0
    break;
4643
4644
60.8k
  case ONIG_OPTIMIZE_EXACT_BM_IC:
4645
60.8k
    p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4646
60.8k
    break;
4647
4648
0
  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4649
0
    p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4650
0
    break;
4651
4652
222k
  case ONIG_OPTIMIZE_MAP:
4653
222k
    p = map_search(reg->enc, reg->map, p, range, end);
4654
222k
    break;
4655
560k
  }
4656
4657
560k
  if (p && p < range) {
4658
558k
    if ((OnigDistance)(p - s) < reg->dmin) {
4659
92.7k
    retry_gate:
4660
92.7k
      pprev = p;
4661
92.7k
      p += enclen(reg->enc, p, end);
4662
92.7k
      goto retry;
4663
0
    }
4664
4665
558k
    if (reg->sub_anchor) {
4666
93.5k
      UChar* prev;
4667
4668
93.5k
      switch (reg->sub_anchor) {
4669
93.5k
      case ANCHOR_BEGIN_LINE:
4670
93.5k
        if (!ON_STR_BEGIN(p)) {
4671
93.4k
          prev = onigenc_get_prev_char_head(reg->enc,
4672
93.4k
                                            (pprev ? pprev : str), p, end);
4673
93.4k
          if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4674
92.7k
            goto retry_gate;
4675
93.4k
        }
4676
791
        break;
4677
4678
791
      case ANCHOR_END_LINE:
4679
0
        if (ON_STR_END(p)) {
4680
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4681
          prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4682
                                            (pprev ? pprev : str), p);
4683
          if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4684
            goto retry_gate;
4685
#endif
4686
0
        }
4687
0
        else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4688
0
          goto retry_gate;
4689
0
        break;
4690
93.5k
      }
4691
93.5k
    }
4692
4693
465k
    if (reg->dmax == 0) {
4694
330k
      *low = p;
4695
330k
      if (low_prev) {
4696
330k
        if (*low > s)
4697
40.3k
          *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4698
289k
        else
4699
289k
          *low_prev = onigenc_get_prev_char_head(reg->enc,
4700
289k
                                                 (pprev ? pprev : str), p, end);
4701
330k
      }
4702
330k
      *high = p;
4703
330k
    }
4704
135k
    else {
4705
135k
      if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4706
76.3k
        if ((OnigDistance)(p - str) < reg->dmax) {
4707
4.62k
          *low = (UChar* )str;
4708
4.62k
          if (low_prev)
4709
4.62k
            *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4710
4.62k
        }
4711
71.7k
        else {
4712
71.7k
          *low = p - reg->dmax;
4713
71.7k
          if (*low > s) {
4714
7.04k
            *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4715
7.04k
                                                                *low, end, (const UChar** )low_prev);
4716
7.04k
            if (low_prev && IS_NULL(*low_prev))
4717
7.04k
              *low_prev = onigenc_get_prev_char_head(reg->enc,
4718
7.04k
                                                     (pprev ? pprev : s), *low, end);
4719
7.04k
          }
4720
64.6k
          else {
4721
64.6k
            if (low_prev)
4722
64.6k
              *low_prev = onigenc_get_prev_char_head(reg->enc,
4723
64.6k
                                                 (pprev ? pprev : str), *low, end);
4724
64.6k
          }
4725
71.7k
        }
4726
76.3k
      }
4727
      /* no needs to adjust *high, *high is used as range check only */
4728
135k
      if ((OnigDistance)(p - str) < reg->dmin)
4729
0
        *high = (UChar* )str;
4730
135k
      else
4731
135k
        *high = p - reg->dmin;
4732
135k
    }
4733
4734
#ifdef ONIG_DEBUG_SEARCH
4735
    fprintf(stderr,
4736
    "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4737
            *low - str, *high - str, reg->dmin, reg->dmax);
4738
#endif
4739
465k
    return 1; /* success */
4740
558k
  }
4741
4742
2.14k
  return 0; /* fail */
4743
560k
}
4744
4745
#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD   100
4746
4747
static int
4748
backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4749
                      UChar* s, const UChar* range, UChar* adjrange,
4750
                      UChar** low, UChar** high)
4751
41
{
4752
41
  UChar *p;
4753
41
  size_t input_len = end - str;
4754
4755
41
  if (reg->dmin > input_len) {
4756
0
    return 0;
4757
0
  }
4758
4759
41
  p = s;
4760
4761
41
 retry:
4762
41
  switch (reg->optimize) {
4763
24
  case ONIG_OPTIMIZE_EXACT:
4764
24
  exact_method:
4765
24
    p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4766
24
                             range, adjrange, end, p);
4767
24
    break;
4768
4769
10
  case ONIG_OPTIMIZE_EXACT_IC:
4770
10
  case ONIG_OPTIMIZE_EXACT_BM_IC:
4771
10
  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4772
10
    p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4773
10
                                reg->exact, reg->exact_end,
4774
10
                                range, adjrange, end, p);
4775
10
    break;
4776
4777
0
  case ONIG_OPTIMIZE_EXACT_BM:
4778
0
  case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4779
0
    goto exact_method;
4780
0
    break;
4781
4782
7
  case ONIG_OPTIMIZE_MAP:
4783
7
    p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4784
7
    break;
4785
41
  }
4786
4787
41
  if (p) {
4788
36
    if (reg->sub_anchor) {
4789
0
      UChar* prev;
4790
4791
0
      switch (reg->sub_anchor) {
4792
0
      case ANCHOR_BEGIN_LINE:
4793
0
        if (!ON_STR_BEGIN(p)) {
4794
0
          prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4795
0
          if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4796
0
            p = prev;
4797
0
            goto retry;
4798
0
          }
4799
0
        }
4800
0
        break;
4801
4802
0
      case ANCHOR_END_LINE:
4803
0
        if (ON_STR_END(p)) {
4804
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4805
          prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4806
          if (IS_NULL(prev)) goto fail;
4807
          if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4808
            p = prev;
4809
            goto retry;
4810
          }
4811
#endif
4812
0
        }
4813
0
        else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4814
0
          p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4815
0
          if (IS_NULL(p)) goto fail;
4816
0
          goto retry;
4817
0
        }
4818
0
        break;
4819
0
      }
4820
0
    }
4821
4822
36
    if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4823
35
      if ((OnigDistance)(p - str) < reg->dmax)
4824
0
        *low = (UChar* )str;
4825
35
      else
4826
35
        *low = p - reg->dmax;
4827
4828
35
      if (reg->dmin != 0) {
4829
0
        if ((OnigDistance)(p - str) < reg->dmin)
4830
0
          *high = (UChar* )str;
4831
0
        else
4832
0
          *high = p - reg->dmin;
4833
0
      }
4834
35
      else {
4835
35
        *high = p;
4836
35
      }
4837
4838
35
      *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4839
35
    }
4840
4841
#ifdef ONIG_DEBUG_SEARCH
4842
    fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4843
            (int )(*low - str), (int )(*high - str));
4844
#endif
4845
36
    return 1; /* success */
4846
36
  }
4847
4848
5
 fail:
4849
#ifdef ONIG_DEBUG_SEARCH
4850
  fprintf(stderr, "backward_search_range: fail.\n");
4851
#endif
4852
5
  return 0; /* fail */
4853
41
}
4854
4855
4856
extern OnigPosition
4857
onig_search(regex_t* reg, const UChar* str, const UChar* end,
4858
            const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4859
1.17M
{
4860
1.17M
  return onig_search_gpos(reg, str, end, start, start, range, region, option);
4861
1.17M
}
4862
4863
extern OnigPosition
4864
onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4865
            const UChar* global_pos,
4866
            const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4867
1.17M
{
4868
1.17M
  ptrdiff_t r;
4869
1.17M
  UChar *s, *prev;
4870
1.17M
  OnigMatchArg msa;
4871
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4872
  const UChar *orig_start = start;
4873
  const UChar *orig_range = range;
4874
#endif
4875
4876
#ifdef ONIG_DEBUG_SEARCH
4877
  fprintf(stderr,
4878
     "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4879
     (uintptr_t )str, str, end - str, start - str, range - str);
4880
#endif
4881
4882
1.17M
  if (region) {
4883
1.17M
    r = onig_region_resize_clear(region, reg->num_mem + 1);
4884
1.17M
    if (r) goto finish_no_msa;
4885
1.17M
  }
4886
4887
1.17M
  if (start > end || start < str) goto mismatch_no_msa;
4888
4889
4890
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4891
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4892
#  define MATCH_AND_RETURN_CHECK(upper_range) \
4893
  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4894
  switch (r) { \
4895
    case ONIG_MISMATCH: \
4896
      break; \
4897
    case ONIGERR_TIMEOUT: \
4898
      goto timeout; \
4899
    default: \
4900
      if (r >= 0) { \
4901
        if (! IS_FIND_LONGEST(reg->options)) { \
4902
          goto match; \
4903
        }\
4904
      }\
4905
      else goto finish; /* error */ \
4906
  }
4907
# else
4908
#  define MATCH_AND_RETURN_CHECK(upper_range) \
4909
  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4910
  switch (r) { \
4911
    case ONIG_MISMATCH: \
4912
      break; \
4913
    case ONIGERR_TIMEOUT: \
4914
      goto timeout; \
4915
    default: \
4916
      if (r >= 0) { \
4917
        goto match; \
4918
      }\
4919
      else goto finish; /* error */ \
4920
  }
4921
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4922
#else
4923
1.17M
# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4924
1.17M
#  define MATCH_AND_RETURN_CHECK(none) \
4925
7.67M
  r = match_at(reg, str, end, s, prev, &msa);\
4926
7.67M
  switch (r) { \
4927
6.50M
    case ONIG_MISMATCH: \
4928
6.50M
      break; \
4929
0
    case ONIGERR_TIMEOUT: \
4930
0
      goto timeout; \
4931
1.16M
    default: \
4932
1.16M
      if (r >= 0) { \
4933
1.16M
        if (! IS_FIND_LONGEST(reg->options)) { \
4934
1.16M
          goto match; \
4935
1.16M
        } \
4936
1.16M
      } \
4937
1.16M
      else goto finish; /* error */ \
4938
7.67M
  }
4939
# else
4940
#  define MATCH_AND_RETURN_CHECK(none) \
4941
  r = match_at(reg, str, end, s, prev, &msa);\
4942
  switch (r) { \
4943
    case ONIG_MISMATCH: \
4944
      break; \
4945
    case ONIGERR_TIMEOUT: \
4946
      goto timeout; \
4947
    default: \
4948
    if (r >= 0) { \
4949
        goto match; \
4950
      } \
4951
      else goto finish; /* error */ \
4952
  }
4953
# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4954
1.17M
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4955
4956
4957
  /* anchor optimize: resume search range */
4958
1.17M
  if (reg->anchor != 0 && str < end) {
4959
19.9k
    UChar *min_semi_end, *max_semi_end;
4960
4961
19.9k
    if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4962
      /* search start-position only */
4963
3.97k
    begin_position:
4964
3.97k
      if (range > start)
4965
3.96k
      {
4966
3.96k
        if (global_pos > start)
4967
0
        {
4968
0
          if (global_pos < range)
4969
0
            range = global_pos + 1;
4970
0
        }
4971
3.96k
        else
4972
3.96k
          range = start + 1;
4973
3.96k
      }
4974
13
      else
4975
13
        range = start;
4976
3.97k
    }
4977
15.9k
    else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4978
      /* search str-position only */
4979
20
      if (range > start) {
4980
17
        if (start != str) goto mismatch_no_msa;
4981
17
        range = str + 1;
4982
17
      }
4983
3
      else {
4984
3
        if (range <= str) {
4985
0
          start = str;
4986
0
          range = str;
4987
0
        }
4988
3
        else
4989
3
          goto mismatch_no_msa;
4990
3
      }
4991
20
    }
4992
15.9k
    else if (reg->anchor & ANCHOR_END_BUF) {
4993
21
      min_semi_end = max_semi_end = (UChar* )end;
4994
4995
284
    end_buf:
4996
284
      if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
4997
0
        goto mismatch_no_msa;
4998
4999
284
      if (range > start) {
5000
276
        if ((OnigDistance)(min_semi_end - start) > reg->anchor_dmax) {
5001
50
          start = min_semi_end - reg->anchor_dmax;
5002
50
          if (start < end)
5003
40
            start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5004
50
        }
5005
276
        if ((OnigDistance)(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5006
255
          if ((OnigDistance)(max_semi_end - str + 1) < reg->anchor_dmin)
5007
0
            goto mismatch_no_msa;
5008
255
          else
5009
255
            range = max_semi_end - reg->anchor_dmin + 1;
5010
255
        }
5011
5012
276
        if (start > range) goto mismatch_no_msa;
5013
        /* If start == range, match with empty at end.
5014
           Backward search is used. */
5015
276
      }
5016
8
      else {
5017
8
        if ((OnigDistance)(min_semi_end - range) > reg->anchor_dmax) {
5018
0
          range = min_semi_end - reg->anchor_dmax;
5019
0
        }
5020
8
        if ((OnigDistance)(max_semi_end - start) < reg->anchor_dmin) {
5021
6
          if ((OnigDistance)(max_semi_end - str) < reg->anchor_dmin)
5022
0
            goto mismatch_no_msa;
5023
6
          else {
5024
6
            start = max_semi_end - reg->anchor_dmin;
5025
6
            start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5026
6
          }
5027
6
        }
5028
8
        if (range > start) goto mismatch_no_msa;
5029
8
      }
5030
284
    }
5031
15.9k
    else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5032
265
      UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5033
5034
265
      max_semi_end = (UChar* )end;
5035
265
      if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5036
200
        min_semi_end = pre_end;
5037
5038
200
#ifdef USE_CRNL_AS_LINE_TERMINATOR
5039
200
        pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5040
200
        if (IS_NOT_NULL(pre_end) &&
5041
200
            IS_NEWLINE_CRLF(reg->options) &&
5042
0
            ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5043
0
          min_semi_end = pre_end;
5044
0
        }
5045
200
#endif
5046
200
        if (min_semi_end > str && start <= min_semi_end) {
5047
198
          goto end_buf;
5048
198
        }
5049
200
      }
5050
65
      else {
5051
65
        min_semi_end = (UChar* )end;
5052
65
        goto end_buf;
5053
65
      }
5054
265
    }
5055
15.6k
    else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5056
11
      goto begin_position;
5057
11
    }
5058
19.9k
  }
5059
1.15M
  else if (str == end) { /* empty string */
5060
8.80k
    static const UChar address_for_empty_string[] = "";
5061
5062
#ifdef ONIG_DEBUG_SEARCH
5063
    fprintf(stderr, "onig_search: empty string.\n");
5064
#endif
5065
5066
8.80k
    if (reg->threshold_len == 0) {
5067
3.56k
      start = end = str = address_for_empty_string;
5068
3.56k
      s = (UChar* )start;
5069
3.56k
      prev = (UChar* )NULL;
5070
5071
3.56k
      MATCH_ARG_INIT(msa, option, region, start, start);
5072
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5073
      msa.state_check_buff = (void* )0;
5074
      msa.state_check_buff_size = 0;   /* NO NEED, for valgrind */
5075
#endif
5076
3.56k
      MATCH_AND_RETURN_CHECK(end);
5077
1.43k
      goto mismatch;
5078
3.56k
    }
5079
5.23k
    goto mismatch_no_msa;
5080
8.80k
  }
5081
5082
#ifdef ONIG_DEBUG_SEARCH
5083
  fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5084
          (int )(end - str), (int )(start - str), (int )(range - str));
5085
#endif
5086
5087
1.16M
  MATCH_ARG_INIT(msa, option, region, start, global_pos);
5088
#ifdef USE_COMBINATION_EXPLOSION_CHECK
5089
  {
5090
    ptrdiff_t offset = (MIN(start, range) - str);
5091
    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5092
  }
5093
#endif
5094
5095
1.16M
  s = (UChar* )start;
5096
1.16M
  if (range > start) {   /* forward search */
5097
1.16M
    if (s > str)
5098
1.15M
      prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5099
12.7k
    else
5100
12.7k
      prev = (UChar* )NULL;
5101
5102
1.16M
    if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5103
188k
      UChar *sch_range, *low, *high, *low_prev;
5104
5105
188k
      if (reg->dmax != 0) {
5106
82.1k
        if (reg->dmax == ONIG_INFINITE_DISTANCE)
5107
59.5k
          sch_range = (UChar* )end;
5108
22.5k
        else {
5109
22.5k
          if ((OnigDistance)(end - range) < reg->dmax)
5110
19.4k
            sch_range = (UChar* )end;
5111
3.13k
          else {
5112
3.13k
            sch_range = (UChar* )range + reg->dmax;
5113
3.13k
          }
5114
22.5k
        }
5115
82.1k
      }
5116
106k
      else
5117
106k
        sch_range = (UChar* )range;
5118
5119
188k
      if ((end - start) < reg->threshold_len)
5120
171
        goto mismatch;
5121
5122
188k
      if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5123
408k
        do {
5124
408k
          if (! forward_search_range(reg, str, end, s, sch_range,
5125
408k
                                     &low, &high, &low_prev)) goto mismatch;
5126
406k
          if (s < low) {
5127
47.4k
            s    = low;
5128
47.4k
            prev = low_prev;
5129
47.4k
          }
5130
761k
          while (s <= high) {
5131
481k
            MATCH_AND_RETURN_CHECK(orig_range);
5132
354k
            prev = s;
5133
354k
            s += enclen(reg->enc, s, end);
5134
354k
          }
5135
406k
        } while (s < range);
5136
133
        goto mismatch;
5137
128k
      }
5138
59.5k
      else { /* check only. */
5139
59.5k
        if (! forward_search_range(reg, str, end, s, sch_range,
5140
59.5k
                                   &low, &high, (UChar** )NULL)) goto mismatch;
5141
5142
59.1k
        if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5143
20.2k
          do {
5144
20.2k
            MATCH_AND_RETURN_CHECK(orig_range);
5145
18.5k
            prev = s;
5146
18.5k
            s += enclen(reg->enc, s, end);
5147
5148
18.5k
            if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5149
58.0k
              while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5150
56.8k
                  && s < range) {
5151
56.8k
                prev = s;
5152
56.8k
                s += enclen(reg->enc, s, end);
5153
56.8k
              }
5154
1.29k
            }
5155
18.5k
          } while (s < range);
5156
57
          goto mismatch;
5157
1.73k
        }
5158
59.1k
      }
5159
188k
    }
5160
5161
7.16M
    do {
5162
7.16M
      MATCH_AND_RETURN_CHECK(orig_range);
5163
6.13M
      prev = s;
5164
6.13M
      s += enclen(reg->enc, s, end);
5165
6.13M
    } while (s < range);
5166
5167
2.64k
    if (s == range) { /* because empty match with /$/. */
5168
2.64k
      MATCH_AND_RETURN_CHECK(orig_range);
5169
2.61k
    }
5170
2.64k
  }
5171
3.30k
  else {  /* backward search */
5172
3.30k
    if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5173
226
      UChar *low, *high, *adjrange, *sch_start;
5174
226
      const UChar *min_range;
5175
5176
226
      if (range < end)
5177
41
        adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5178
185
      else
5179
185
        adjrange = (UChar* )end;
5180
5181
226
      if ((OnigDistance)(end - range) > reg->dmin)
5182
41
        min_range = range + reg->dmin;
5183
185
      else
5184
185
        min_range = end;
5185
5186
226
      if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5187
146
          end - range >= reg->threshold_len) {
5188
40
        do {
5189
40
          if ((OnigDistance)(end - s) > reg->dmax)
5190
14
            sch_start = s + reg->dmax;
5191
26
          else
5192
26
            sch_start = (UChar* )end;
5193
5194
40
          if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
5195
40
                                    &low, &high) <= 0)
5196
5
            goto mismatch;
5197
5198
35
          if (s > high)
5199
0
            s = high;
5200
5201
106
          while (s >= low) {
5202
71
            prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5203
71
            MATCH_AND_RETURN_CHECK(orig_start);
5204
71
            s = prev;
5205
71
          }
5206
35
        } while (s >= range);
5207
35
        goto mismatch;
5208
40
      }
5209
186
      else { /* check only. */
5210
186
        if (end - range < reg->threshold_len) goto mismatch;
5211
5212
1
        if (reg->dmax != 0) {
5213
1
          if (reg->dmax == ONIG_INFINITE_DISTANCE)
5214
1
            sch_start = (UChar* )end;
5215
0
          else {
5216
0
            if ((OnigDistance)(end - s) > reg->dmax) {
5217
0
              sch_start = s + reg->dmax;
5218
0
              sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5219
0
                                                    start, sch_start, end);
5220
0
            } else
5221
0
              sch_start = (UChar* )end;
5222
0
          }
5223
1
        }
5224
0
        else
5225
0
          sch_start = (UChar* )s;
5226
5227
1
        if (backward_search_range(reg, str, end, sch_start, min_range, adjrange,
5228
1
                                  &low, &high) <= 0) goto mismatch;
5229
1
      }
5230
226
    }
5231
5232
3.08k
    do {
5233
3.08k
      prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5234
3.08k
      MATCH_AND_RETURN_CHECK(orig_start);
5235
224
      s = prev;
5236
224
    } while (s >= range);
5237
3.08k
  }
5238
5239
7.16k
 mismatch:
5240
7.16k
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5241
7.16k
  if (IS_FIND_LONGEST(reg->options)) {
5242
0
    if (msa.best_len >= 0) {
5243
0
      s = msa.best_s;
5244
0
      goto match;
5245
0
    }
5246
0
  }
5247
7.16k
#endif
5248
7.16k
  r = ONIG_MISMATCH;
5249
5250
7.86k
 finish:
5251
7.86k
  MATCH_ARG_FREE(msa);
5252
5253
  /* If result is mismatch and no FIND_NOT_EMPTY option,
5254
     then the region is not set in match_at(). */
5255
7.86k
  if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5256
0
    onig_region_clear(region);
5257
0
  }
5258
5259
#ifdef ONIG_DEBUG
5260
  if (r != ONIG_MISMATCH)
5261
    fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5262
#endif
5263
7.86k
  return r;
5264
5265
5.24k
 mismatch_no_msa:
5266
5.24k
  r = ONIG_MISMATCH;
5267
5.24k
 finish_no_msa:
5268
#ifdef ONIG_DEBUG
5269
  if (r != ONIG_MISMATCH)
5270
    fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5271
#endif
5272
5.24k
  return r;
5273
5274
1.16M
 match:
5275
1.16M
  MATCH_ARG_FREE(msa);
5276
1.16M
  return s - str;
5277
5278
0
timeout:
5279
0
  MATCH_ARG_FREE(msa);
5280
0
  return ONIGERR_TIMEOUT;
5281
5.24k
}
5282
5283
extern OnigPosition
5284
onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5285
          OnigRegion* region, OnigOptionType option,
5286
          int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5287
          void* callback_arg)
5288
0
{
5289
0
  OnigPosition r;
5290
0
  OnigPosition n;
5291
0
  int rs;
5292
0
  const UChar* start;
5293
5294
0
  n = 0;
5295
0
  start = str;
5296
0
  while (1) {
5297
0
    r = onig_search(reg, str, end, start, end, region, option);
5298
0
    if (r >= 0) {
5299
0
      rs = scan_callback(n, r, region, callback_arg);
5300
0
      n++;
5301
0
      if (rs != 0)
5302
0
        return rs;
5303
5304
0
      if (region->end[0] == start - str) {
5305
0
        if (start >= end) break;
5306
0
        start += enclen(reg->enc, start, end);
5307
0
      }
5308
0
      else
5309
0
        start = str + region->end[0];
5310
5311
0
      if (start > end)
5312
0
        break;
5313
0
    }
5314
0
    else if (r == ONIG_MISMATCH) {
5315
0
      break;
5316
0
    }
5317
0
    else { /* error */
5318
0
      return r;
5319
0
    }
5320
0
  }
5321
5322
0
  return n;
5323
0
}
5324
5325
extern OnigEncoding
5326
onig_get_encoding(const regex_t* reg)
5327
0
{
5328
0
  return reg->enc;
5329
0
}
5330
5331
extern OnigOptionType
5332
onig_get_options(const regex_t* reg)
5333
0
{
5334
0
  return reg->options;
5335
0
}
5336
5337
extern  OnigCaseFoldType
5338
onig_get_case_fold_flag(const regex_t* reg)
5339
0
{
5340
0
  return reg->case_fold_flag;
5341
0
}
5342
5343
extern const OnigSyntaxType*
5344
onig_get_syntax(const regex_t* reg)
5345
0
{
5346
0
  return reg->syntax;
5347
0
}
5348
5349
extern int
5350
onig_number_of_captures(const regex_t* reg)
5351
0
{
5352
0
  return reg->num_mem;
5353
0
}
5354
5355
extern int
5356
onig_number_of_capture_histories(const regex_t* reg)
5357
0
{
5358
#ifdef USE_CAPTURE_HISTORY
5359
  int i, n;
5360
5361
  n = 0;
5362
  for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5363
    if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5364
      n++;
5365
  }
5366
  return n;
5367
#else
5368
0
  return 0;
5369
0
#endif
5370
0
}
5371
5372
extern void
5373
onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5374
0
{
5375
0
  *to = *from;
5376
0
}