Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/pcre/pcre2lib/pcre2_match.c
Line
Count
Source (jump to first uncovered line)
1
/*************************************************
2
*      Perl-Compatible Regular Expressions       *
3
*************************************************/
4
5
/* PCRE is a library of functions to support regular expressions whose syntax
6
and semantics are as close as possible to those of the Perl 5 language.
7
8
                       Written by Philip Hazel
9
     Original API code Copyright (c) 1997-2012 University of Cambridge
10
          New API code Copyright (c) 2015-2024 University of Cambridge
11
12
-----------------------------------------------------------------------------
13
Redistribution and use in source and binary forms, with or without
14
modification, are permitted provided that the following conditions are met:
15
16
    * Redistributions of source code must retain the above copyright notice,
17
      this list of conditions and the following disclaimer.
18
19
    * Redistributions in binary form must reproduce the above copyright
20
      notice, this list of conditions and the following disclaimer in the
21
      documentation and/or other materials provided with the distribution.
22
23
    * Neither the name of the University of Cambridge nor the names of its
24
      contributors may be used to endorse or promote products derived from
25
      this software without specific prior written permission.
26
27
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
POSSIBILITY OF SUCH DAMAGE.
38
-----------------------------------------------------------------------------
39
*/
40
41
42
#ifdef HAVE_CONFIG_H
43
#include "config.h"
44
#endif
45
46
#include "pcre2_internal.h"
47
48
/* These defines enable debugging code */
49
50
/* #define DEBUG_FRAMES_DISPLAY */
51
/* #define DEBUG_SHOW_OPS */
52
/* #define DEBUG_SHOW_RMATCH */
53
54
#ifdef DEBUG_FRAMES_DISPLAY
55
#include <stdarg.h>
56
#endif
57
58
#ifdef DEBUG_SHOW_OPS
59
static const char *OP_names[] = { OP_NAME_LIST };
60
#endif
61
62
/* These defines identify the name of the block containing "static"
63
information, and fields within it. */
64
65
64.6M
#define NLBLOCK mb              /* Block containing newline information */
66
7.92k
#define PSSTART start_subject   /* Field containing processed string start */
67
16.1M
#define PSEND   end_subject     /* Field containing processed string end */
68
69
379k
#define RECURSE_UNSET 0xffffffffu  /* Bigger than max group number */
70
71
/* Masks for identifying the public options that are permitted at match time. */
72
73
#define PUBLIC_MATCH_OPTIONS \
74
4.32k
  (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
75
4.32k
   PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
76
4.32k
   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
77
4.32k
   PCRE2_DISABLE_RECURSELOOP_CHECK)
78
79
#define PUBLIC_JIT_MATCH_OPTIONS \
80
   (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
81
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
82
    PCRE2_COPY_MATCHED_SUBJECT)
83
84
/* Non-error returns from and within the match() function. Error returns are
85
externally defined PCRE2_ERROR_xxx codes, which are all negative. */
86
87
4.77k
#define MATCH_MATCH        1
88
428M
#define MATCH_NOMATCH      0
89
90
/* Special internal returns used in the match() function. Make them
91
sufficiently negative to avoid the external error codes. */
92
93
1.42k
#define MATCH_ACCEPT       (-999)
94
1.96k
#define MATCH_KETRPOS      (-998)
95
/* The next 5 must be kept together and in sequence so that a test that checks
96
for any one of them can use a range. */
97
2.00M
#define MATCH_COMMIT       (-997)
98
360k
#define MATCH_PRUNE        (-996)
99
0
#define MATCH_SKIP         (-995)
100
2.94k
#define MATCH_SKIP_ARG     (-994)
101
9.61M
#define MATCH_THEN         (-993)
102
1.00M
#define MATCH_BACKTRACK_MAX MATCH_THEN
103
1.00M
#define MATCH_BACKTRACK_MIN MATCH_COMMIT
104
105
/* Group frame type values. Zero means the frame is not a group frame. The
106
lower 16 bits are used for data (e.g. the capture number). Group frames are
107
used for most groups so that information about the start is easily available at
108
the end without having to scan back through intermediate frames (backtrack
109
points). */
110
111
2.04M
#define GF_CAPTURE     0x00010000u
112
462
#define GF_NOCAPTURE   0x00020000u
113
2.48M
#define GF_CONDASSERT  0x00030000u
114
7.06M
#define GF_RECURSE     0x00040000u
115
116
/* Masks for the identity and data parts of the group frame type. */
117
118
9.51M
#define GF_IDMASK(a)   ((a) & 0xffff0000u)
119
1.00M
#define GF_DATAMASK(a) ((a) & 0x0000ffffu)
120
121
/* Repetition types */
122
123
enum { REPTYPE_MIN, REPTYPE_MAX, REPTYPE_POS };
124
125
/* Min and max values for the common repeats; a maximum of UINT32_MAX =>
126
infinity. */
127
128
static const uint32_t rep_min[] = {
129
  0, 0,       /* * and *? */
130
  1, 1,       /* + and +? */
131
  0, 0,       /* ? and ?? */
132
  0, 0,       /* dummy placefillers for OP_CR[MIN]RANGE */
133
  0, 1, 0 };  /* OP_CRPOS{STAR, PLUS, QUERY} */
134
135
static const uint32_t rep_max[] = {
136
  UINT32_MAX, UINT32_MAX,      /* * and *? */
137
  UINT32_MAX, UINT32_MAX,      /* + and +? */
138
  1, 1,                        /* ? and ?? */
139
  0, 0,                        /* dummy placefillers for OP_CR[MIN]RANGE */
140
  UINT32_MAX, UINT32_MAX, 1 }; /* OP_CRPOS{STAR, PLUS, QUERY} */
141
142
/* Repetition types - must include OP_CRPOSRANGE (not needed above) */
143
144
static const uint32_t rep_typ[] = {
145
  REPTYPE_MAX, REPTYPE_MIN,    /* * and *? */
146
  REPTYPE_MAX, REPTYPE_MIN,    /* + and +? */
147
  REPTYPE_MAX, REPTYPE_MIN,    /* ? and ?? */
148
  REPTYPE_MAX, REPTYPE_MIN,    /* OP_CRRANGE and OP_CRMINRANGE */
149
  REPTYPE_POS, REPTYPE_POS,    /* OP_CRPOSSTAR, OP_CRPOSPLUS */
150
  REPTYPE_POS, REPTYPE_POS };  /* OP_CRPOSQUERY, OP_CRPOSRANGE */
151
152
/* Numbers for RMATCH calls at backtracking points. When these lists are
153
changed, the code at RETURN_SWITCH below must be updated in sync.  */
154
155
enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
156
       RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
157
       RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
158
       RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39 };
159
160
#ifdef SUPPORT_WIDE_CHARS
161
enum { RM100=100, RM101, RM102, RM103 };
162
#endif
163
164
#ifdef SUPPORT_UNICODE
165
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
166
       RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
167
       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
168
       RM224 };
169
#endif
170
171
/* Define short names for general fields in the current backtrack frame, which
172
is always pointed to by the F variable. Occasional references to fields in
173
other frames are written out explicitly. There are also some fields in the
174
current frame whose names start with "temp" that are used for short-term,
175
localised backtracking memory. These are #defined with Lxxx names at the point
176
of use and undefined afterwards. */
177
178
855M
#define Fback_frame        F->back_frame
179
2.84M
#define Fcapture_last      F->capture_last
180
3.86M
#define Fcurrent_recurse   F->current_recurse
181
1.55G
#define Fecode             F->ecode
182
2.22G
#define Feptr              F->eptr
183
427M
#define Fgroup_frame_type  F->group_frame_type
184
12.3M
#define Flast_group_offset F->last_group_offset
185
322M
#define Flength            F->length
186
362k
#define Fmark              F->mark
187
1.28G
#define Frdepth            F->rdepth
188
374k
#define Fstart_match       F->start_match
189
5.32M
#define Foffset_top        F->offset_top
190
0
#define Foccu              F->occu
191
1.20G
#define Fop                F->op
192
4.96M
#define Fovector           F->ovector
193
855M
#define Freturn_id         F->return_id
194
195
196
#ifdef DEBUG_FRAMES_DISPLAY
197
/*************************************************
198
*      Display current frames and contents       *
199
*************************************************/
200
201
/* This debugging function displays the current set of frames and their
202
contents. It is not called automatically from anywhere, the intention being
203
that calls can be inserted where necessary when debugging frame-related
204
problems.
205
206
Arguments:
207
  f           the file to write to
208
  F           the current top frame
209
  P           a previous frame of interest
210
  frame_size  the frame size
211
  mb          points to the match block
212
  match_data  points to the match data block
213
  s           identification text
214
215
Returns:    nothing
216
*/
217
218
static void
219
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
220
  match_block *mb, pcre2_match_data *match_data, const char *s, ...)
221
{
222
uint32_t i;
223
heapframe *Q;
224
va_list ap;
225
va_start(ap, s);
226
227
fprintf(f, "FRAMES ");
228
vfprintf(f, s, ap);
229
va_end(ap);
230
231
if (P != NULL) fprintf(f, " P=%lu",
232
  ((char *)P - (char *)(match_data->heapframes))/frame_size);
233
fprintf(f, "\n");
234
235
for (i = 0, Q = match_data->heapframes;
236
     Q <= F;
237
     i++, Q = (heapframe *)((char *)Q + frame_size))
238
  {
239
  fprintf(f, "Frame %d type=%x subj=%lu code=%d back=%lu id=%d",
240
    i, Q->group_frame_type, Q->eptr - mb->start_subject, *(Q->ecode),
241
    Q->back_frame, Q->return_id);
242
243
  if (Q->last_group_offset == PCRE2_UNSET)
244
    fprintf(f, " lgoffset=unset\n");
245
  else
246
    fprintf(f, " lgoffset=%lu\n",  Q->last_group_offset/frame_size);
247
  }
248
}
249
250
#endif
251
252
253
254
/*************************************************
255
*                Process a callout               *
256
*************************************************/
257
258
/* This function is called for all callouts, whether "standalone" or at the
259
start of a conditional group. Feptr will be pointing to either OP_CALLOUT or
260
OP_CALLOUT_STR. A callout block is allocated in pcre2_match() and initialized
261
with fixed values.
262
263
Arguments:
264
  F          points to the current backtracking frame
265
  mb         points to the match block
266
  lengthptr  where to return the length of the callout item
267
268
Returns:     the return from the callout
269
             or 0 if no callout function exists
270
*/
271
272
static int
273
do_callout(heapframe *F, match_block *mb, PCRE2_SIZE *lengthptr)
274
0
{
275
0
int rc;
276
0
PCRE2_SIZE save0, save1;
277
0
PCRE2_SIZE *callout_ovector;
278
0
pcre2_callout_block *cb;
279
280
0
*lengthptr = (*Fecode == OP_CALLOUT)?
281
0
  PRIV(OP_lengths)[OP_CALLOUT] : GET(Fecode, 1 + 2*LINK_SIZE);
282
283
0
if (mb->callout == NULL) return 0;   /* No callout function provided */
284
285
/* The original matching code (pre 10.30) worked directly with the ovector
286
passed by the user, and this was passed to callouts. Now that the working
287
ovector is in the backtracking frame, it no longer needs to reserve space for
288
the overall match offsets (which would waste space in the frame). For backward
289
compatibility, however, we pass capture_top and offset_vector to the callout as
290
if for the extended ovector, and we ensure that the first two slots are unset
291
by preserving and restoring their current contents. Picky compilers complain if
292
references such as Fovector[-2] are use directly, so we set up a separate
293
pointer. */
294
295
0
callout_ovector = (PCRE2_SIZE *)(Fovector) - 2;
296
297
/* The cb->version, cb->subject, cb->subject_length, and cb->start_match fields
298
are set externally. The first 3 never change; the last is updated for each
299
bumpalong. */
300
301
0
cb = mb->cb;
302
0
cb->capture_top      = (uint32_t)Foffset_top/2 + 1;
303
0
cb->capture_last     = Fcapture_last;
304
0
cb->offset_vector    = callout_ovector;
305
0
cb->mark             = mb->nomatch_mark;
306
0
cb->current_position = (PCRE2_SIZE)(Feptr - mb->start_subject);
307
0
cb->pattern_position = GET(Fecode, 1);
308
0
cb->next_item_length = GET(Fecode, 1 + LINK_SIZE);
309
310
0
if (*Fecode == OP_CALLOUT)  /* Numerical callout */
311
0
  {
312
0
  cb->callout_number = Fecode[1 + 2*LINK_SIZE];
313
0
  cb->callout_string_offset = 0;
314
0
  cb->callout_string = NULL;
315
0
  cb->callout_string_length = 0;
316
0
  }
317
0
else  /* String callout */
318
0
  {
319
0
  cb->callout_number = 0;
320
0
  cb->callout_string_offset = GET(Fecode, 1 + 3*LINK_SIZE);
321
0
  cb->callout_string = Fecode + (1 + 4*LINK_SIZE) + 1;
322
0
  cb->callout_string_length =
323
0
    *lengthptr - (1 + 4*LINK_SIZE) - 2;
324
0
  }
325
326
0
save0 = callout_ovector[0];
327
0
save1 = callout_ovector[1];
328
0
callout_ovector[0] = callout_ovector[1] = PCRE2_UNSET;
329
0
rc = mb->callout(cb, mb->callout_data);
330
0
callout_ovector[0] = save0;
331
0
callout_ovector[1] = save1;
332
0
cb->callout_flags = 0;
333
0
return rc;
334
0
}
335
336
337
338
/*************************************************
339
*          Match a back-reference                *
340
*************************************************/
341
342
/* This function is called only when it is known that the offset lies within
343
the offsets that have so far been used in the match. Note that in caseless
344
UTF-8 mode, the number of subject bytes matched may be different to the number
345
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
346
seems unlikely.)
347
348
Arguments:
349
  offset      index into the offset vector
350
  caseless    TRUE if caseless
351
  caseopts    bitmask of REFI_FLAG_XYZ values
352
  F           the current backtracking frame pointer
353
  mb          points to match block
354
  lengthptr   pointer for returning the length matched
355
356
Returns:      = 0 sucessful match; number of code units matched is set
357
              < 0 no match
358
              > 0 partial match
359
*/
360
361
static int
362
match_ref(PCRE2_SIZE offset, BOOL caseless, int caseopts, heapframe *F,
363
  match_block *mb, PCRE2_SIZE *lengthptr)
364
0
{
365
0
PCRE2_SPTR p;
366
0
PCRE2_SIZE length;
367
0
PCRE2_SPTR eptr;
368
0
PCRE2_SPTR eptr_start;
369
370
/* Deal with an unset group. The default is no match, but there is an option to
371
match an empty string. */
372
373
0
if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
374
0
  {
375
0
  if ((mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
376
0
    {
377
0
    *lengthptr = 0;
378
0
    return 0;      /* Match */
379
0
    }
380
0
  else return -1;  /* No match */
381
0
  }
382
383
/* Separate the caseless and UTF cases for speed. */
384
385
0
eptr = eptr_start = Feptr;
386
0
p = mb->start_subject + Fovector[offset];
387
0
length = Fovector[offset+1] - Fovector[offset];
388
389
0
if (caseless)
390
0
  {
391
0
#if defined SUPPORT_UNICODE
392
0
  BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
393
0
  BOOL caseless_restrict = (caseopts & REFI_FLAG_CASELESS_RESTRICT) != 0;
394
0
  BOOL turkish_casing = !caseless_restrict && (caseopts & REFI_FLAG_TURKISH_CASING) != 0;
395
396
0
  if (utf || (mb->poptions & PCRE2_UCP) != 0)
397
0
    {
398
0
    PCRE2_SPTR endptr = p + length;
399
400
    /* Match characters up to the end of the reference. NOTE: the number of
401
    code units matched may differ, because in UTF-8 there are some characters
402
    whose upper and lower case codes have different numbers of bytes. For
403
    example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65 (3
404
    bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
405
    sequence of two of the latter. It is important, therefore, to check the
406
    length along the reference, not along the subject (earlier code did this
407
    wrong). UCP without uses Unicode properties but without UTF encoding. */
408
409
0
    while (p < endptr)
410
0
      {
411
0
      uint32_t c, d;
412
0
      const ucd_record *ur;
413
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
414
415
0
      if (utf)
416
0
        {
417
0
        GETCHARINC(c, eptr);
418
0
        GETCHARINC(d, p);
419
0
        }
420
0
      else
421
0
        {
422
0
        c = *eptr++;
423
0
        d = *p++;
424
0
        }
425
426
0
      if (turkish_casing && UCD_ANY_I(d))
427
0
        {
428
0
        c = UCD_FOLD_I_TURKISH(c);
429
0
        d = UCD_FOLD_I_TURKISH(d);
430
0
        if (c != d) return -1;  /* No match */
431
0
        }
432
0
      else if (c != d && c != (uint32_t)((int)d + (ur = GET_UCD(d))->other_case))
433
0
        {
434
0
        const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
435
436
        /* When PCRE2_EXTRA_CASELESS_RESTRICT is set, ignore any caseless sets
437
        that start with an ASCII character. */
438
0
        if (caseless_restrict && *pp < 128) return -1;  /* No match */
439
440
0
        for (;;)
441
0
          {
442
0
          if (c < *pp) return -1;  /* No match */
443
0
          if (c == *pp++) break;
444
0
          }
445
0
        }
446
0
      }
447
0
    }
448
0
  else
449
0
#endif
450
451
  /* Not in UTF or UCP mode */
452
0
    {
453
0
    for (; length > 0; length--)
454
0
      {
455
0
      uint32_t cc, cp;
456
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
457
0
      cc = UCHAR21TEST(eptr);
458
0
      cp = UCHAR21TEST(p);
459
0
      if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
460
0
        return -1;  /* No match */
461
0
      p++;
462
0
      eptr++;
463
0
      }
464
0
    }
465
0
  }
466
467
/* In the caseful case, we can just compare the code units, whether or not we
468
are in UTF and/or UCP mode. When partial matching, we have to do this unit by
469
unit. */
470
471
0
else
472
0
  {
473
0
  if (mb->partial != 0)
474
0
    {
475
0
    for (; length > 0; length--)
476
0
      {
477
0
      if (eptr >= mb->end_subject) return 1;   /* Partial match */
478
0
      if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;  /* No match */
479
0
      }
480
0
    }
481
482
  /* Not partial matching */
483
484
0
  else
485
0
    {
486
0
    if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
487
0
    if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1;  /* No match */
488
0
    eptr += length;
489
0
    }
490
0
  }
491
492
0
*lengthptr = eptr - eptr_start;
493
0
return 0;  /* Match */
494
0
}
495
496
497
498
/******************************************************************************
499
*******************************************************************************
500
                   "Recursion" in the match() function
501
502
The original match() function was highly recursive, but this proved to be the
503
source of a number of problems over the years, mostly because of the relatively
504
small system stacks that are commonly found. As new features were added to
505
patterns, various kludges were invented to reduce the amount of stack used,
506
making the code hard to understand in places.
507
508
A version did exist that used individual frames on the heap instead of calling
509
match() recursively, but this ran substantially slower. The current version is
510
a refactoring that uses a vector of frames to remember backtracking points.
511
This runs no slower, and possibly even a bit faster than the original recursive
512
implementation.
513
514
At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
515
frames) was allocated on the system stack. If this was not big enough, the heap
516
was used for a larger vector. However, it turns out that there are environments
517
where taking as little as 20KiB from the system stack is an embarrassment.
518
After another refactoring, the heap is used exclusively, but a pointer the
519
frames vector and its size are cached in the match_data block, so that there is
520
no new memory allocation if the same match_data block is used for multiple
521
matches (unless the frames vector has to be extended).
522
*******************************************************************************
523
******************************************************************************/
524
525
526
527
528
/*************************************************
529
*       Macros for the match() function          *
530
*************************************************/
531
532
/* These macros pack up tests that are used for partial matching several times
533
in the code. The second one is used when we already know we are past the end of
534
the subject. We set the "hit end" flag if the pointer is at the end of the
535
subject and either (a) the pointer is past the earliest inspected character
536
(i.e. something has been matched, even if not part of the actual matched
537
string), or (b) the pattern contains a lookbehind. These are the conditions for
538
which adding more characters may allow the current match to continue.
539
540
For hard partial matching, we immediately return a partial match. Otherwise,
541
carrying on means that a complete match on the current subject will be sought.
542
A partial match is returned only if no complete match can be found. */
543
544
#define CHECK_PARTIAL() \
545
8.56M
  do { \
546
8.56M
     if (Feptr >= mb->end_subject) \
547
8.56M
       { \
548
1.04M
       SCHECK_PARTIAL(); \
549
1.04M
       } \
550
8.56M
     } \
551
8.56M
  while (0)
552
553
#define SCHECK_PARTIAL() \
554
10.1M
  do { \
555
10.1M
     if (mb->partial != 0 && \
556
10.1M
         (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
557
10.1M
       { \
558
0
       mb->hitend = TRUE; \
559
0
       if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
560
0
       } \
561
10.1M
     } \
562
10.1M
  while (0)
563
564
565
/* These macros are used to implement backtracking. They simulate a recursive
566
call to the match() function by means of a local vector of frames which
567
remember the backtracking points. */
568
569
#define RMATCH(ra,rb) \
570
427M
  do { \
571
427M
     start_ecode = ra; \
572
427M
     Freturn_id = rb; \
573
427M
     goto MATCH_RECURSE; \
574
427M
     L_##rb:; \
575
427M
     } \
576
427M
  while (0)
577
578
#define RRETURN(ra) \
579
427M
  do { \
580
427M
     rrc = ra; \
581
427M
     goto RETURN_SWITCH; \
582
427M
     } \
583
427M
  while (0)
584
585
586
587
/*************************************************
588
*         Match from current position            *
589
*************************************************/
590
591
/* This function is called to run one match attempt at a single starting point
592
in the subject.
593
594
Performance note: It might be tempting to extract commonly used fields from the
595
mb structure (e.g. end_subject) into individual variables to improve
596
performance. Tests using gcc on a SPARC disproved this; in the first case, it
597
made performance worse.
598
599
Arguments:
600
   start_eptr   starting character in subject
601
   start_ecode  starting position in compiled code
602
   top_bracket  number of capturing parentheses in the pattern
603
   frame_size   size of each backtracking frame
604
   match_data   pointer to the match_data block
605
   mb           pointer to "static" variables block
606
607
Returns:        MATCH_MATCH if matched            )  these values are >= 0
608
                MATCH_NOMATCH if failed to match  )
609
                negative MATCH_xxx value for PRUNE, SKIP, etc
610
                negative PCRE2_ERROR_xxx value if aborted by an error condition
611
                (e.g. stopped by repeated call or depth limit)
612
*/
613
614
static int
615
match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
616
  PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
617
360k
{
618
/* Frame-handling variables */
619
620
360k
heapframe *F;           /* Current frame pointer */
621
360k
heapframe *N = NULL;    /* Temporary frame pointers */
622
360k
heapframe *P = NULL;
623
624
360k
heapframe *frames_top;  /* End of frames vector */
625
360k
heapframe *assert_accept_frame = NULL;  /* For passing back a frame with captures */
626
360k
PCRE2_SIZE frame_copy_size;   /* Amount to copy when creating a new frame */
627
628
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
629
630
360k
PCRE2_SPTR branch_end = NULL;
631
360k
PCRE2_SPTR branch_start;
632
360k
PCRE2_SPTR bracode;     /* Temp pointer to start of group */
633
360k
PCRE2_SIZE offset;      /* Used for group offsets */
634
360k
PCRE2_SIZE length;      /* Used for various length calculations */
635
636
360k
int rrc;                /* Return from functions & backtracking "recursions" */
637
360k
#ifdef SUPPORT_UNICODE
638
360k
int proptype;           /* Type of character property */
639
360k
#endif
640
641
360k
uint32_t i;             /* Used for local loops */
642
360k
uint32_t fc;            /* Character values */
643
360k
uint32_t number;        /* Used for group and other numbers */
644
360k
uint32_t reptype = 0;   /* Type of repetition (0 to avoid compiler warning) */
645
360k
uint32_t group_frame_type;  /* Specifies type for new group frames */
646
647
360k
BOOL condition;         /* Used in conditional groups */
648
360k
BOOL cur_is_word;       /* Used in "word" tests */
649
360k
BOOL prev_is_word;      /* Used in "word" tests */
650
651
/* UTF and UCP flags */
652
653
360k
#ifdef SUPPORT_UNICODE
654
360k
BOOL utf = (mb->poptions & PCRE2_UTF) != 0;
655
360k
BOOL ucp = (mb->poptions & PCRE2_UCP) != 0;
656
#else
657
BOOL utf = FALSE;  /* Required for convenience even when no Unicode support */
658
#endif
659
660
/* This is the length of the last part of a backtracking frame that must be
661
copied when a new frame is created. */
662
663
360k
frame_copy_size = frame_size - offsetof(heapframe, eptr);
664
665
/* Set up the first frame and the end of the frames vector. */
666
667
360k
F = match_data->heapframes;
668
360k
frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
669
670
360k
Frdepth = 0;                        /* "Recursion" depth */
671
360k
Fcapture_last = 0;                  /* Number of most recent capture */
672
360k
Fcurrent_recurse = RECURSE_UNSET;   /* Not pattern recursing. */
673
360k
Fstart_match = Feptr = start_eptr;  /* Current data pointer and start match */
674
360k
Fmark = NULL;                       /* Most recent mark */
675
360k
Foffset_top = 0;                    /* End of captures within the frame */
676
360k
Flast_group_offset = PCRE2_UNSET;   /* Saved frame of most recent group */
677
360k
group_frame_type = 0;               /* Not a start of group frame */
678
360k
goto NEW_FRAME;                     /* Start processing with this frame */
679
680
/* Come back here when we want to create a new frame for remembering a
681
backtracking point. */
682
683
427M
MATCH_RECURSE:
684
685
/* Set up a new backtracking frame. If the vector is full, get a new one,
686
doubling the size, but constrained by the heap limit (which is in KiB). */
687
688
427M
N = (heapframe *)((char *)F + frame_size);
689
427M
if ((heapframe *)((char *)N + frame_size) >= frames_top)
690
5
  {
691
5
  heapframe *new;
692
5
  PCRE2_SIZE newsize;
693
5
  PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
694
695
5
  if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
696
0
    {
697
0
    if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
698
0
      return PCRE2_ERROR_NOMEMORY;
699
0
    newsize = PCRE2_SIZE_MAX - 1;
700
0
    }
701
5
  else
702
5
    newsize = match_data->heapframes_size * 2;
703
704
5
  if (newsize / 1024 >= mb->heap_limit)
705
0
    {
706
0
    PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
707
0
    if (mb->heap_limit <= old_size)
708
0
      return PCRE2_ERROR_HEAPLIMIT;
709
0
    else
710
0
      {
711
0
      PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
712
0
      int over_bytes = match_data->heapframes_size % 1024;
713
0
      if (over_bytes) max_delta -= (1024 - over_bytes);
714
0
      newsize = match_data->heapframes_size + max_delta;
715
0
      }
716
0
    }
717
718
  /* With a heap limit set, the permitted additional size may not be enough for
719
  another frame, so do a final check. */
720
721
5
  if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
722
5
  new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
723
5
  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
724
5
  memcpy(new, match_data->heapframes, usedsize);
725
726
5
  N = (heapframe *)((char *)new + usedsize);
727
5
  F = (heapframe *)((char *)N - frame_size);
728
729
5
  match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
730
5
  match_data->heapframes = new;
731
5
  match_data->heapframes_size = newsize;
732
5
  frames_top = (heapframe *)((char *)new + newsize);
733
5
  }
734
735
#ifdef DEBUG_SHOW_RMATCH
736
fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
737
if (group_frame_type != 0)
738
  {
739
  fprintf(stderr, " type=%x ", group_frame_type);
740
  switch (GF_IDMASK(group_frame_type))
741
    {
742
    case GF_CAPTURE:
743
    fprintf(stderr, "capture=%d", GF_DATAMASK(group_frame_type));
744
    break;
745
746
    case GF_NOCAPTURE:
747
    fprintf(stderr, "nocapture op=%d", GF_DATAMASK(group_frame_type));
748
    break;
749
750
    case GF_CONDASSERT:
751
    fprintf(stderr, "condassert op=%d", GF_DATAMASK(group_frame_type));
752
    break;
753
754
    case GF_RECURSE:
755
    fprintf(stderr, "recurse=%d", GF_DATAMASK(group_frame_type));
756
    break;
757
758
    default:
759
    fprintf(stderr, "*** unknown ***");
760
    break;
761
    }
762
  }
763
fprintf(stderr, "\n");
764
#endif
765
766
/* Copy those fields that must be copied into the new frame, increase the
767
"recursion" depth (i.e. the new frame's index) and then make the new frame
768
current. */
769
770
427M
memcpy((char *)N + offsetof(heapframe, eptr),
771
427M
       (char *)F + offsetof(heapframe, eptr),
772
427M
       frame_copy_size);
773
774
427M
N->rdepth = Frdepth + 1;
775
427M
F = N;
776
777
/* Carry on processing with a new frame. */
778
779
427M
NEW_FRAME:
780
427M
Fgroup_frame_type = group_frame_type;
781
427M
Fecode = start_ecode;      /* Starting code pointer */
782
427M
Fback_frame = frame_size;  /* Default is go back one frame */
783
784
/* If this is a special type of group frame, remember its offset for quick
785
access at the end of the group. If this is a recursion, set a new current
786
recursion value. */
787
788
427M
if (group_frame_type != 0)
789
7.03M
  {
790
7.03M
  Flast_group_offset = (char *)F - (char *)match_data->heapframes;
791
7.03M
  if (GF_IDMASK(group_frame_type) == GF_RECURSE)
792
1.00M
    Fcurrent_recurse = GF_DATAMASK(group_frame_type);
793
7.03M
  group_frame_type = 0;
794
7.03M
  }
795
796
797
/* ========================================================================= */
798
/* This is the main processing loop. First check that we haven't recorded too
799
many backtracks (search tree is too large), or that we haven't exceeded the
800
recursive depth limit (used too many backtracking frames). If not, process the
801
opcodes. */
802
803
427M
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
804
427M
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
805
806
#ifdef DEBUG_SHOW_OPS
807
fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
808
  GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
809
#endif
810
811
427M
for (;;)
812
572M
  {
813
#ifdef DEBUG_SHOW_OPS
814
fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
815
  OP_names[*Fecode]);
816
#endif
817
818
572M
  Fop = (uint8_t)(*Fecode);  /* Cast needed for 16-bit and 32-bit modes */
819
572M
  switch(Fop)
820
572M
    {
821
    /* ===================================================================== */
822
    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes, to close
823
    any currently open capturing brackets. Unlike reaching the end of a group,
824
    where we know the starting frame is at the top of the chained frames, in
825
    this case we have to search back for the relevant frame in case other types
826
    of group that use chained frames have intervened. Multiple OP_CLOSEs always
827
    come innermost first, which matches the chain order. We can ignore this in
828
    a recursion, because captures are not passed out of recursions. */
829
830
0
    case OP_CLOSE:
831
0
    if (Fcurrent_recurse == RECURSE_UNSET)
832
0
      {
833
0
      number = GET2(Fecode, 1);
834
0
      offset = Flast_group_offset;
835
0
      for(;;)
836
0
        {
837
        /* Corrupted heapframes?. Trigger an assert and return an error */
838
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
839
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
840
841
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
842
0
        P = (heapframe *)((char *)N - frame_size);
843
0
        if (N->group_frame_type == (GF_CAPTURE | number)) break;
844
0
        offset = P->last_group_offset;
845
0
        }
846
0
      offset = (number << 1) - 2;
847
0
      Fcapture_last = number;
848
0
      Fovector[offset] = P->eptr - mb->start_subject;
849
0
      Fovector[offset+1] = Feptr - mb->start_subject;
850
0
      if (offset >= Foffset_top) Foffset_top = offset + 2;
851
0
      }
852
0
    Fecode += PRIV(OP_lengths)[*Fecode];
853
0
    break;
854
855
856
    /* ===================================================================== */
857
    /* Real or forced end of the pattern, assertion, or recursion. In an
858
    assertion ACCEPT, update the last used pointer and remember the current
859
    frame so that the captures and mark can be fished out of it. */
860
861
0
    case OP_ASSERT_ACCEPT:
862
0
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
863
0
    assert_accept_frame = F;
864
0
    RRETURN(MATCH_ACCEPT);
865
866
    /* For ACCEPT within a recursion, we have to find the most recent
867
    recursion. If not in a recursion, fall through to code that is common with
868
    OP_END. */
869
870
0
    case OP_ACCEPT:
871
0
    if (Fcurrent_recurse != RECURSE_UNSET)
872
0
      {
873
#ifdef DEBUG_SHOW_OPS
874
      fprintf(stderr, "++ Accept within recursion\n");
875
#endif
876
0
      offset = Flast_group_offset;
877
0
      for(;;)
878
0
        {
879
        /* Corrupted heapframes?. Trigger an assert and return an error */
880
0
        PCRE2_ASSERT(offset != PCRE2_UNSET);
881
0
        if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
882
883
0
        N = (heapframe *)((char *)match_data->heapframes + offset);
884
0
        P = (heapframe *)((char *)N - frame_size);
885
0
        if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
886
0
        offset = P->last_group_offset;
887
0
        }
888
889
      /* N is now the frame of the recursion; the previous frame is at the
890
      OP_RECURSE position. Go back there, copying the current subject position
891
      and mark, and the start_match position (\K might have changed it), and
892
      then move on past the OP_RECURSE. */
893
894
0
      P->eptr = Feptr;
895
0
      P->mark = Fmark;
896
0
      P->start_match = Fstart_match;
897
0
      F = P;
898
0
      Fecode += 1 + LINK_SIZE;
899
0
      continue;
900
0
      }
901
    /* Fall through */
902
903
    /* OP_END itself can never be reached within a recursion because that is
904
    picked up when the OP_KET that always precedes OP_END is reached. */
905
906
4.40k
    case OP_END:
907
908
    /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
909
    PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
910
    subject. In both cases, backtracking will then try other alternatives, if
911
    any. */
912
913
4.40k
    if (Feptr == Fstart_match &&
914
4.40k
         ((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
915
3.95k
           ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
916
3.95k
             Fstart_match == mb->start_subject + mb->start_offset)))
917
3.84k
      {
918
#ifdef DEBUG_SHOW_OPS
919
      fprintf(stderr, "++ Backtrack because empty string\n");
920
#endif
921
3.84k
      RRETURN(MATCH_NOMATCH);
922
3.84k
      }
923
924
    /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
925
    the end of the subject. After (*ACCEPT) we fail the entire match (at this
926
    position) but backtrack if we've reached the end of the pattern. This
927
    applies whether or not we are in a recursion. */
928
929
559
    if (Feptr < mb->end_subject &&
930
559
        ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
931
0
      {
932
0
      if (Fop == OP_END)
933
0
        {
934
#ifdef DEBUG_SHOW_OPS
935
        fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
936
#endif
937
0
        RRETURN(MATCH_NOMATCH);
938
0
        }
939
940
#ifdef DEBUG_SHOW_OPS
941
      fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
942
#endif
943
0
      return MATCH_NOMATCH;   /* (*ACCEPT) */
944
0
      }
945
946
    /* We have a successful match of the whole pattern. Record the result and
947
    then do a direct return from the function. If there is space in the offset
948
    vector, set any pairs that follow the highest-numbered captured string but
949
    are less than the number of capturing groups in the pattern to PCRE2_UNSET.
950
    It is documented that this happens. "Gaps" are set to PCRE2_UNSET
951
    dynamically. It is only those at the end that need setting here. */
952
953
559
    mb->end_match_ptr = Feptr;           /* Record where we ended */
954
559
    mb->end_offset_top = Foffset_top;    /* and how many extracts were taken */
955
559
    mb->mark = Fmark;                    /* and the last success mark */
956
559
    if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
957
958
559
    match_data->ovector[0] = Fstart_match - mb->start_subject;
959
559
    match_data->ovector[1] = Feptr - mb->start_subject;
960
961
    /* Set i to the smaller of the sizes of the external and frame ovectors. */
962
963
559
    i = 2 * ((top_bracket + 1 > match_data->oveccount)?
964
559
      match_data->oveccount : top_bracket + 1);
965
559
    memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
966
1.07k
    while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
967
559
    return MATCH_MATCH;  /* Note: NOT RRETURN */
968
969
970
    /*===================================================================== */
971
    /* Match any single character type except newline; have to take care with
972
    CRLF newlines and partial matching. */
973
974
6.95M
    case OP_ANY:
975
6.95M
    if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
976
6.77M
    if (mb->partial != 0 &&
977
6.77M
        Feptr == mb->end_subject - 1 &&
978
6.77M
        NLBLOCK->nltype == NLTYPE_FIXED &&
979
6.77M
        NLBLOCK->nllen == 2 &&
980
6.77M
        UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
981
0
      {
982
0
      mb->hitend = TRUE;
983
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
984
0
      }
985
    /* Fall through */
986
987
    /* Match any single character whatsoever. */
988
989
8.13M
    case OP_ALLANY:
990
8.13M
    if (Feptr >= mb->end_subject)  /* DO NOT merge the Feptr++ here; it must */
991
101k
      {                            /* not be updated before SCHECK_PARTIAL. */
992
101k
      SCHECK_PARTIAL();
993
101k
      RRETURN(MATCH_NOMATCH);
994
101k
      }
995
8.03M
    Feptr++;
996
8.03M
#ifdef SUPPORT_UNICODE
997
8.03M
    if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
998
8.03M
#endif
999
8.03M
    Fecode++;
1000
8.03M
    break;
1001
1002
1003
    /* ===================================================================== */
1004
    /* Match a single code unit, even in UTF mode. This opcode really does
1005
    match any code unit, even newline. (It really should be called ANYCODEUNIT,
1006
    of course - the byte name is from pre-16 bit days.) */
1007
1008
302
    case OP_ANYBYTE:
1009
302
    if (Feptr >= mb->end_subject)   /* DO NOT merge the Feptr++ here; it must */
1010
3
      {                             /* not be updated before SCHECK_PARTIAL. */
1011
3
      SCHECK_PARTIAL();
1012
3
      RRETURN(MATCH_NOMATCH);
1013
3
      }
1014
299
    Feptr++;
1015
299
    Fecode++;
1016
299
    break;
1017
1018
1019
    /* ===================================================================== */
1020
    /* Match a single character, casefully */
1021
1022
127M
    case OP_CHAR:
1023
127M
#ifdef SUPPORT_UNICODE
1024
127M
    if (utf)
1025
83.4M
      {
1026
83.4M
      Flength = 1;
1027
83.4M
      Fecode++;
1028
83.4M
      GETCHARLEN(fc, Fecode, Flength);
1029
83.4M
      if (Flength > (PCRE2_SIZE)(mb->end_subject - Feptr))
1030
988k
        {
1031
988k
        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
1032
988k
        RRETURN(MATCH_NOMATCH);
1033
988k
        }
1034
82.4M
      for (; Flength > 0; Flength--)
1035
82.4M
        {
1036
82.4M
        if (*Fecode++ != UCHAR21INC(Feptr)) RRETURN(MATCH_NOMATCH);
1037
82.4M
        }
1038
82.4M
      }
1039
43.9M
    else
1040
43.9M
#endif
1041
1042
    /* Not UTF mode */
1043
43.9M
      {
1044
43.9M
      if (mb->end_subject - Feptr < 1)
1045
120k
        {
1046
120k
        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
1047
120k
        RRETURN(MATCH_NOMATCH);
1048
120k
        }
1049
43.8M
      if (Fecode[1] != *Feptr++) RRETURN(MATCH_NOMATCH);
1050
795k
      Fecode += 2;
1051
795k
      }
1052
833k
    break;
1053
1054
1055
    /* ===================================================================== */
1056
    /* Match a single character, caselessly. If we are at the end of the
1057
    subject, give up immediately. We get here only when the pattern character
1058
    has at most one other case. Characters with more than two cases are coded
1059
    as OP_PROP with the pseudo-property PT_CLIST. */
1060
1061
24.0M
    case OP_CHARI:
1062
24.0M
    if (Feptr >= mb->end_subject)
1063
220k
      {
1064
220k
      SCHECK_PARTIAL();
1065
220k
      RRETURN(MATCH_NOMATCH);
1066
220k
      }
1067
1068
23.8M
#ifdef SUPPORT_UNICODE
1069
23.8M
    if (utf)
1070
5.09M
      {
1071
5.09M
      Flength = 1;
1072
5.09M
      Fecode++;
1073
5.09M
      GETCHARLEN(fc, Fecode, Flength);
1074
1075
      /* If the pattern character's value is < 128, we know that its other case
1076
      (if any) is also < 128 (and therefore only one code unit long in all
1077
      code-unit widths), so we can use the fast lookup table. We checked above
1078
      that there is at least one character left in the subject. */
1079
1080
5.09M
      if (fc < 128)
1081
5.06M
        {
1082
5.06M
        uint32_t cc = UCHAR21(Feptr);
1083
5.06M
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1084
194k
        Fecode++;
1085
194k
        Feptr++;
1086
194k
        }
1087
1088
      /* Otherwise we must pick up the subject character and use Unicode
1089
      property support to test its other case. Note that we cannot use the
1090
      value of "Flength" to check for sufficient bytes left, because the other
1091
      case of the character may have more or fewer code units. */
1092
1093
22.6k
      else
1094
22.6k
        {
1095
22.6k
        uint32_t dc;
1096
22.6k
        GETCHARINC(dc, Feptr);
1097
22.6k
        Fecode += Flength;
1098
22.6k
        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1099
22.6k
        }
1100
5.09M
      }
1101
1102
    /* If UCP is set without UTF we must do the same as above, but with one
1103
    character per code unit. */
1104
1105
18.7M
    else if (ucp)
1106
0
      {
1107
0
      uint32_t cc = UCHAR21(Feptr);
1108
0
      fc = Fecode[1];
1109
0
      if (fc < 128)
1110
0
        {
1111
0
        if (mb->lcc[fc] != TABLE_GET(cc, mb->lcc, cc)) RRETURN(MATCH_NOMATCH);
1112
0
        }
1113
0
      else
1114
0
        {
1115
0
        if (cc != fc && cc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
1116
0
        }
1117
0
      Feptr++;
1118
0
      Fecode += 2;
1119
0
      }
1120
1121
18.7M
    else
1122
18.7M
#endif   /* SUPPORT_UNICODE */
1123
1124
    /* Not UTF or UCP mode; use the table for characters < 256. */
1125
18.7M
      {
1126
18.7M
      if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
1127
18.7M
          != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);
1128
266k
      Feptr++;
1129
266k
      Fecode += 2;
1130
266k
      }
1131
461k
    break;
1132
1133
1134
    /* ===================================================================== */
1135
    /* Match not a single character. */
1136
1137
461k
    case OP_NOT:
1138
686k
    case OP_NOTI:
1139
686k
    if (Feptr >= mb->end_subject)
1140
2.14k
      {
1141
2.14k
      SCHECK_PARTIAL();
1142
2.14k
      RRETURN(MATCH_NOMATCH);
1143
2.14k
      }
1144
1145
684k
#ifdef SUPPORT_UNICODE
1146
684k
    if (utf)
1147
3.10k
      {
1148
3.10k
      uint32_t ch;
1149
3.10k
      Fecode++;
1150
3.10k
      GETCHARINC(ch, Fecode);
1151
3.10k
      GETCHARINC(fc, Feptr);
1152
3.10k
      if (ch == fc)
1153
120
        {
1154
120
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1155
120
        }
1156
2.98k
      else if (Fop == OP_NOTI)   /* If caseless */
1157
2.97k
        {
1158
2.97k
        if (ch > 127)
1159
0
          ch = UCD_OTHERCASE(ch);
1160
2.97k
        else
1161
2.97k
          ch = (mb->fcc)[ch];
1162
2.97k
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1163
2.97k
        }
1164
3.10k
      }
1165
1166
    /* UCP without UTF is as above, but with one character per code unit. */
1167
1168
681k
    else if (ucp)
1169
0
      {
1170
0
      uint32_t ch;
1171
0
      fc = UCHAR21INC(Feptr);
1172
0
      ch = Fecode[1];
1173
0
      Fecode += 2;
1174
1175
0
      if (ch == fc)
1176
0
        {
1177
0
        RRETURN(MATCH_NOMATCH);  /* Caseful match */
1178
0
        }
1179
0
      else if (Fop == OP_NOTI)   /* If caseless */
1180
0
        {
1181
0
        if (ch > 127)
1182
0
          ch = UCD_OTHERCASE(ch);
1183
0
        else
1184
0
          ch = (mb->fcc)[ch];
1185
0
        if (ch == fc) RRETURN(MATCH_NOMATCH);
1186
0
        }
1187
0
      }
1188
1189
681k
    else
1190
681k
#endif  /* SUPPORT_UNICODE */
1191
1192
    /* Neither UTF nor UCP is set */
1193
1194
681k
      {
1195
681k
      uint32_t ch = Fecode[1];
1196
681k
      fc = UCHAR21INC(Feptr);
1197
681k
      if (ch == fc || (Fop == OP_NOTI && TABLE_GET(ch, mb->fcc, ch) == fc))
1198
5.24k
        RRETURN(MATCH_NOMATCH);
1199
676k
      Fecode += 2;
1200
676k
      }
1201
679k
    break;
1202
1203
1204
    /* ===================================================================== */
1205
    /* Match a single character repeatedly. */
1206
1207
679k
#define Loclength    F->temp_size
1208
45.3M
#define Lstart_eptr  F->temp_sptr[0]
1209
22.6M
#define Lcharptr     F->temp_sptr[1]
1210
152M
#define Lmin         F->temp_32[0]
1211
114M
#define Lmax         F->temp_32[1]
1212
83.1M
#define Lc           F->temp_32[2]
1213
13.1M
#define Loc          F->temp_32[3]
1214
1215
679k
    case OP_EXACT:
1216
0
    case OP_EXACTI:
1217
0
    Lmin = Lmax = GET2(Fecode, 1);
1218
0
    Fecode += 1 + IMM2_SIZE;
1219
0
    goto REPEATCHAR;
1220
1221
0
    case OP_POSUPTO:
1222
0
    case OP_POSUPTOI:
1223
0
    reptype = REPTYPE_POS;
1224
0
    Lmin = 0;
1225
0
    Lmax = GET2(Fecode, 1);
1226
0
    Fecode += 1 + IMM2_SIZE;
1227
0
    goto REPEATCHAR;
1228
1229
0
    case OP_UPTO:
1230
0
    case OP_UPTOI:
1231
0
    reptype = REPTYPE_MAX;
1232
0
    Lmin = 0;
1233
0
    Lmax = GET2(Fecode, 1);
1234
0
    Fecode += 1 + IMM2_SIZE;
1235
0
    goto REPEATCHAR;
1236
1237
0
    case OP_MINUPTO:
1238
0
    case OP_MINUPTOI:
1239
0
    reptype = REPTYPE_MIN;
1240
0
    Lmin = 0;
1241
0
    Lmax = GET2(Fecode, 1);
1242
0
    Fecode += 1 + IMM2_SIZE;
1243
0
    goto REPEATCHAR;
1244
1245
12.1k
    case OP_POSSTAR:
1246
60.3k
    case OP_POSSTARI:
1247
60.3k
    reptype = REPTYPE_POS;
1248
60.3k
    Lmin = 0;
1249
60.3k
    Lmax = UINT32_MAX;
1250
60.3k
    Fecode++;
1251
60.3k
    goto REPEATCHAR;
1252
1253
153k
    case OP_POSPLUS:
1254
215k
    case OP_POSPLUSI:
1255
215k
    reptype = REPTYPE_POS;
1256
215k
    Lmin = 1;
1257
215k
    Lmax = UINT32_MAX;
1258
215k
    Fecode++;
1259
215k
    goto REPEATCHAR;
1260
1261
20.2M
    case OP_POSQUERY:
1262
23.1M
    case OP_POSQUERYI:
1263
23.1M
    reptype = REPTYPE_POS;
1264
23.1M
    Lmin = 0;
1265
23.1M
    Lmax = 1;
1266
23.1M
    Fecode++;
1267
23.1M
    goto REPEATCHAR;
1268
1269
410
    case OP_STAR:
1270
6.80k
    case OP_STARI:
1271
7.78k
    case OP_MINSTAR:
1272
18.2k
    case OP_MINSTARI:
1273
20.9k
    case OP_PLUS:
1274
23.7k
    case OP_PLUSI:
1275
24.1k
    case OP_MINPLUS:
1276
55.3k
    case OP_MINPLUSI:
1277
9.22M
    case OP_QUERY:
1278
11.0M
    case OP_QUERYI:
1279
13.2M
    case OP_MINQUERY:
1280
14.8M
    case OP_MINQUERYI:
1281
14.8M
    fc = *Fecode++ - ((Fop < OP_STARI)? OP_STAR : OP_STARI);
1282
14.8M
    Lmin = rep_min[fc];
1283
14.8M
    Lmax = rep_max[fc];
1284
14.8M
    reptype = rep_typ[fc];
1285
1286
    /* Common code for all repeated single-character matches. We first check
1287
    for the minimum number of characters. If the minimum equals the maximum, we
1288
    are done. Otherwise, if minimizing, check the rest of the pattern for a
1289
    match; if there isn't one, advance up to the maximum, one character at a
1290
    time.
1291
1292
    If maximizing, advance up to the maximum number of matching characters,
1293
    until Feptr is past the end of the maximum run. If possessive, we are
1294
    then done (no backing up). Otherwise, match at this position; anything
1295
    other than no match is immediately returned. For nomatch, back up one
1296
    character, unless we are matching \R and the last thing matched was
1297
    \r\n, in which case, back up two code units until we reach the first
1298
    optional character position.
1299
1300
    The various UTF/non-UTF and caseful/caseless cases are handled separately,
1301
    for speed. */
1302
1303
38.3M
    REPEATCHAR:
1304
38.3M
#ifdef SUPPORT_UNICODE
1305
38.3M
    if (utf)
1306
22.6M
      {
1307
22.6M
      Flength = 1;
1308
22.6M
      Lcharptr = Fecode;
1309
22.6M
      GETCHARLEN(fc, Fecode, Flength);
1310
22.6M
      Fecode += Flength;
1311
1312
      /* Handle multi-code-unit character matching, caseful and caseless. */
1313
1314
22.6M
      if (Flength > 1)
1315
4.37k
        {
1316
4.37k
        uint32_t othercase;
1317
1318
4.37k
        if (Fop >= OP_STARI &&     /* Caseless */
1319
4.37k
            (othercase = UCD_OTHERCASE(fc)) != fc)
1320
0
          Loclength = PRIV(ord2utf)(othercase, Foccu);
1321
4.37k
        else Loclength = 0;
1322
1323
4.37k
        for (i = 1; i <= Lmin; i++)
1324
4.37k
          {
1325
4.37k
          if (Feptr <= mb->end_subject - Flength &&
1326
4.37k
            memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1327
4.37k
          else if (Loclength > 0 &&
1328
4.37k
                   Feptr <= mb->end_subject - Loclength &&
1329
4.37k
                   memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1330
0
            Feptr += Loclength;
1331
4.37k
          else
1332
4.37k
            {
1333
4.37k
            CHECK_PARTIAL();
1334
4.37k
            RRETURN(MATCH_NOMATCH);
1335
4.37k
            }
1336
4.37k
          }
1337
1338
0
        if (Lmin == Lmax) continue;
1339
1340
0
        if (reptype == REPTYPE_MIN)
1341
0
          {
1342
0
          for (;;)
1343
0
            {
1344
0
            RMATCH(Fecode, RM202);
1345
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1346
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1347
0
            if (Feptr <= mb->end_subject - Flength &&
1348
0
              memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0) Feptr += Flength;
1349
0
            else if (Loclength > 0 &&
1350
0
                     Feptr <= mb->end_subject - Loclength &&
1351
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1352
0
              Feptr += Loclength;
1353
0
            else
1354
0
              {
1355
0
              CHECK_PARTIAL();
1356
0
              RRETURN(MATCH_NOMATCH);
1357
0
              }
1358
0
            }
1359
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
1360
0
          }
1361
1362
0
        else  /* Maximize */
1363
0
          {
1364
0
          Lstart_eptr = Feptr;
1365
0
          for (i = Lmin; i < Lmax; i++)
1366
0
            {
1367
0
            if (Feptr <= mb->end_subject - Flength &&
1368
0
                memcmp(Feptr, Lcharptr, CU2BYTES(Flength)) == 0)
1369
0
              Feptr += Flength;
1370
0
            else if (Loclength > 0 &&
1371
0
                     Feptr <= mb->end_subject - Loclength &&
1372
0
                     memcmp(Feptr, Foccu, CU2BYTES(Loclength)) == 0)
1373
0
              Feptr += Loclength;
1374
0
            else
1375
0
              {
1376
0
              CHECK_PARTIAL();
1377
0
              break;
1378
0
              }
1379
0
            }
1380
1381
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1382
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1383
          go too far. */
1384
1385
0
          if (reptype != REPTYPE_POS) for(;;)
1386
0
            {
1387
0
            if (Feptr <= Lstart_eptr) break;
1388
0
            RMATCH(Fecode, RM203);
1389
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1390
0
            Feptr--;
1391
0
            BACKCHAR(Feptr);
1392
0
            }
1393
0
          }
1394
0
        break;   /* End of repeated wide character handling */
1395
0
        }
1396
1397
      /* Length of UTF character is 1. Put it into the preserved variable and
1398
      fall through to the non-UTF code. */
1399
1400
22.6M
      Lc = fc;
1401
22.6M
      }
1402
15.7M
    else
1403
15.7M
#endif  /* SUPPORT_UNICODE */
1404
1405
    /* When not in UTF mode, load a single-code-unit character. Then proceed as
1406
    above, using Unicode casing if either UTF or UCP is set. */
1407
1408
15.7M
    Lc = *Fecode++;
1409
1410
    /* Caseless comparison */
1411
1412
38.3M
    if (Fop >= OP_STARI)
1413
6.61M
      {
1414
6.61M
#if PCRE2_CODE_UNIT_WIDTH == 8
1415
6.61M
#ifdef SUPPORT_UNICODE
1416
6.61M
      if (ucp && !utf && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1417
6.61M
      else
1418
6.61M
#endif  /* SUPPORT_UNICODE */
1419
      /* Lc will be < 128 in UTF-8 mode. */
1420
6.61M
      Loc = mb->fcc[Lc];
1421
#else /* 16-bit & 32-bit */
1422
#ifdef SUPPORT_UNICODE
1423
      if ((utf || ucp) && Lc > 127) Loc = UCD_OTHERCASE(Lc);
1424
      else
1425
#endif  /* SUPPORT_UNICODE */
1426
      Loc = TABLE_GET(Lc, mb->fcc, Lc);
1427
#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
1428
1429
6.61M
      for (i = 1; i <= Lmin; i++)
1430
91.7k
        {
1431
91.7k
        uint32_t cc;                 /* Faster than PCRE2_UCHAR */
1432
91.7k
        if (Feptr >= mb->end_subject)
1433
2.91k
          {
1434
2.91k
          SCHECK_PARTIAL();
1435
2.91k
          RRETURN(MATCH_NOMATCH);
1436
2.91k
          }
1437
88.8k
        cc = UCHAR21TEST(Feptr);
1438
88.8k
        if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1439
2.51k
        Feptr++;
1440
2.51k
        }
1441
6.52M
      if (Lmin == Lmax) continue;
1442
1443
6.52M
      if (reptype == REPTYPE_MIN)
1444
1.65M
        {
1445
1.65M
        for (;;)
1446
1.69M
          {
1447
1.69M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1448
1.69M
          RMATCH(Fecode, RM25);
1449
1.69M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1450
1.69M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1451
1.66M
          if (Feptr >= mb->end_subject)
1452
14.9k
            {
1453
14.9k
            SCHECK_PARTIAL();
1454
14.9k
            RRETURN(MATCH_NOMATCH);
1455
14.9k
            }
1456
1.64M
          cc = UCHAR21TEST(Feptr);
1457
1.64M
          if (Lc != cc && Loc != cc) RRETURN(MATCH_NOMATCH);
1458
40.5k
          Feptr++;
1459
40.5k
          }
1460
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1461
0
        }
1462
1463
4.86M
      else  /* Maximize */
1464
4.86M
        {
1465
4.86M
        Lstart_eptr = Feptr;
1466
4.89M
        for (i = Lmin; i < Lmax; i++)
1467
4.87M
          {
1468
4.87M
          uint32_t cc;               /* Faster than PCRE2_UCHAR */
1469
4.87M
          if (Feptr >= mb->end_subject)
1470
23.2k
            {
1471
23.2k
            SCHECK_PARTIAL();
1472
23.2k
            break;
1473
23.2k
            }
1474
4.85M
          cc = UCHAR21TEST(Feptr);
1475
4.85M
          if (Lc != cc && Loc != cc) break;
1476
22.5k
          Feptr++;
1477
22.5k
          }
1478
4.86M
        if (reptype != REPTYPE_POS) for (;;)
1479
1.87M
          {
1480
1.87M
          if (Feptr == Lstart_eptr) break;
1481
2.40k
          RMATCH(Fecode, RM26);
1482
2.40k
          Feptr--;
1483
2.40k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1484
2.40k
          }
1485
4.86M
        }
1486
6.52M
      }
1487
1488
    /* Caseful comparisons (includes all multi-byte characters) */
1489
1490
31.7M
    else
1491
31.7M
      {
1492
31.7M
      for (i = 1; i <= Lmin; i++)
1493
156k
        {
1494
156k
        if (Feptr >= mb->end_subject)
1495
949
          {
1496
949
          SCHECK_PARTIAL();
1497
949
          RRETURN(MATCH_NOMATCH);
1498
949
          }
1499
155k
        if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1500
155k
        }
1501
1502
31.5M
      if (Lmin == Lmax) continue;
1503
1504
31.5M
      if (reptype == REPTYPE_MIN)
1505
2.15M
        {
1506
2.15M
        for (;;)
1507
2.15M
          {
1508
2.15M
          RMATCH(Fecode, RM27);
1509
2.15M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1510
2.15M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1511
2.15M
          if (Feptr >= mb->end_subject)
1512
8.64k
            {
1513
8.64k
            SCHECK_PARTIAL();
1514
8.64k
            RRETURN(MATCH_NOMATCH);
1515
8.64k
            }
1516
2.14M
          if (Lc != UCHAR21INCTEST(Feptr)) RRETURN(MATCH_NOMATCH);
1517
2.14M
          }
1518
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1519
0
        }
1520
29.4M
      else  /* Maximize */
1521
29.4M
        {
1522
29.4M
        Lstart_eptr = Feptr;
1523
29.5M
        for (i = Lmin; i < Lmax; i++)
1524
29.4M
          {
1525
29.4M
          if (Feptr >= mb->end_subject)
1526
153k
            {
1527
153k
            SCHECK_PARTIAL();
1528
153k
            break;
1529
153k
            }
1530
1531
29.3M
          if (Lc != UCHAR21TEST(Feptr)) break;
1532
102k
          Feptr++;
1533
102k
          }
1534
1535
29.4M
        if (reptype != REPTYPE_POS) for (;;)
1536
9.17M
          {
1537
9.17M
          if (Feptr <= Lstart_eptr) break;
1538
8.95k
          RMATCH(Fecode, RM28);
1539
8.93k
          Feptr--;
1540
8.93k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541
8.93k
          }
1542
29.4M
        }
1543
31.5M
      }
1544
34.2M
    break;
1545
1546
34.2M
#undef Loclength
1547
34.2M
#undef Lstart_eptr
1548
34.2M
#undef Lcharptr
1549
34.2M
#undef Lmin
1550
34.2M
#undef Lmax
1551
34.2M
#undef Lc
1552
34.2M
#undef Loc
1553
1554
1555
    /* ===================================================================== */
1556
    /* Match a negated single one-byte character repeatedly. This is almost a
1557
    repeat of the code for a repeated single character, but I haven't found a
1558
    nice way of commoning these up that doesn't require a test of the
1559
    positive/negative option for each character match. Maybe that wouldn't add
1560
    very much to the time taken, but character matching *is* what this is all
1561
    about... */
1562
1563
34.2M
#define Lstart_eptr  F->temp_sptr[0]
1564
34.2M
#define Lmin         F->temp_32[0]
1565
34.2M
#define Lmax         F->temp_32[1]
1566
34.2M
#define Lc           F->temp_32[2]
1567
34.2M
#define Loc          F->temp_32[3]
1568
1569
34.2M
    case OP_NOTEXACT:
1570
0
    case OP_NOTEXACTI:
1571
0
    Lmin = Lmax = GET2(Fecode, 1);
1572
0
    Fecode += 1 + IMM2_SIZE;
1573
0
    goto REPEATNOTCHAR;
1574
1575
0
    case OP_NOTUPTO:
1576
0
    case OP_NOTUPTOI:
1577
0
    Lmin = 0;
1578
0
    Lmax = GET2(Fecode, 1);
1579
0
    reptype = REPTYPE_MAX;
1580
0
    Fecode += 1 + IMM2_SIZE;
1581
0
    goto REPEATNOTCHAR;
1582
1583
0
    case OP_NOTMINUPTO:
1584
0
    case OP_NOTMINUPTOI:
1585
0
    Lmin = 0;
1586
0
    Lmax = GET2(Fecode, 1);
1587
0
    reptype = REPTYPE_MIN;
1588
0
    Fecode += 1 + IMM2_SIZE;
1589
0
    goto REPEATNOTCHAR;
1590
1591
0
    case OP_NOTPOSSTAR:
1592
0
    case OP_NOTPOSSTARI:
1593
0
    reptype = REPTYPE_POS;
1594
0
    Lmin = 0;
1595
0
    Lmax = UINT32_MAX;
1596
0
    Fecode++;
1597
0
    goto REPEATNOTCHAR;
1598
1599
345
    case OP_NOTPOSPLUS:
1600
1.23k
    case OP_NOTPOSPLUSI:
1601
1.23k
    reptype = REPTYPE_POS;
1602
1.23k
    Lmin = 1;
1603
1.23k
    Lmax = UINT32_MAX;
1604
1.23k
    Fecode++;
1605
1.23k
    goto REPEATNOTCHAR;
1606
1607
32
    case OP_NOTPOSQUERY:
1608
1.20k
    case OP_NOTPOSQUERYI:
1609
1.20k
    reptype = REPTYPE_POS;
1610
1.20k
    Lmin = 0;
1611
1.20k
    Lmax = 1;
1612
1.20k
    Fecode++;
1613
1.20k
    goto REPEATNOTCHAR;
1614
1615
0
    case OP_NOTPOSUPTO:
1616
0
    case OP_NOTPOSUPTOI:
1617
0
    reptype = REPTYPE_POS;
1618
0
    Lmin = 0;
1619
0
    Lmax = GET2(Fecode, 1);
1620
0
    Fecode += 1 + IMM2_SIZE;
1621
0
    goto REPEATNOTCHAR;
1622
1623
157
    case OP_NOTSTAR:
1624
970
    case OP_NOTSTARI:
1625
970
    case OP_NOTMINSTAR:
1626
1.28k
    case OP_NOTMINSTARI:
1627
2.37k
    case OP_NOTPLUS:
1628
122k
    case OP_NOTPLUSI:
1629
124k
    case OP_NOTMINPLUS:
1630
135k
    case OP_NOTMINPLUSI:
1631
136k
    case OP_NOTQUERY:
1632
149k
    case OP_NOTQUERYI:
1633
149k
    case OP_NOTMINQUERY:
1634
285k
    case OP_NOTMINQUERYI:
1635
285k
    fc = *Fecode++ - ((Fop >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
1636
285k
    Lmin = rep_min[fc];
1637
285k
    Lmax = rep_max[fc];
1638
285k
    reptype = rep_typ[fc];
1639
1640
    /* Common code for all repeated single-character non-matches. */
1641
1642
288k
    REPEATNOTCHAR:
1643
288k
    GETCHARINCTEST(Lc, Fecode);
1644
1645
    /* The code is duplicated for the caseless and caseful cases, for speed,
1646
    since matching characters is likely to be quite common. First, ensure the
1647
    minimum number of matches are present. If Lmin = Lmax, we are done.
1648
    Otherwise, if minimizing, keep trying the rest of the expression and
1649
    advancing one matching character if failing, up to the maximum.
1650
    Alternatively, if maximizing, find the maximum number of characters and
1651
    work backwards. */
1652
1653
288k
    if (Fop >= OP_NOTSTARI)     /* Caseless */
1654
284k
      {
1655
284k
#ifdef SUPPORT_UNICODE
1656
284k
      if ((utf || ucp) && Lc > 127)
1657
0
        Loc = UCD_OTHERCASE(Lc);
1658
284k
      else
1659
284k
#endif /* SUPPORT_UNICODE */
1660
1661
284k
      Loc = TABLE_GET(Lc, mb->fcc, Lc);  /* Other case from table */
1662
1663
284k
#ifdef SUPPORT_UNICODE
1664
284k
      if (utf)
1665
143k
        {
1666
143k
        uint32_t d;
1667
143k
        for (i = 1; i <= Lmin; i++)
1668
696
          {
1669
696
          if (Feptr >= mb->end_subject)
1670
54
            {
1671
54
            SCHECK_PARTIAL();
1672
54
            RRETURN(MATCH_NOMATCH);
1673
54
            }
1674
642
          GETCHARINC(d, Feptr);
1675
642
          if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1676
642
          }
1677
143k
        }
1678
140k
      else
1679
140k
#endif  /* SUPPORT_UNICODE */
1680
1681
      /* Not UTF mode */
1682
140k
        {
1683
271k
        for (i = 1; i <= Lmin; i++)
1684
132k
          {
1685
132k
          if (Feptr >= mb->end_subject)
1686
682
            {
1687
682
            SCHECK_PARTIAL();
1688
682
            RRETURN(MATCH_NOMATCH);
1689
682
            }
1690
131k
          if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1691
130k
          Feptr++;
1692
130k
          }
1693
140k
        }
1694
1695
281k
      if (Lmin == Lmax) continue;  /* Finished for exact count */
1696
1697
281k
      if (reptype == REPTYPE_MIN)
1698
147k
        {
1699
147k
#ifdef SUPPORT_UNICODE
1700
147k
        if (utf)
1701
135k
          {
1702
135k
          uint32_t d;
1703
135k
          for (;;)
1704
269k
            {
1705
269k
            RMATCH(Fecode, RM204);
1706
269k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1707
269k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1708
142k
            if (Feptr >= mb->end_subject)
1709
1.39k
              {
1710
1.39k
              SCHECK_PARTIAL();
1711
1.39k
              RRETURN(MATCH_NOMATCH);
1712
1.39k
              }
1713
141k
            GETCHARINC(d, Feptr);
1714
141k
            if (Lc == d || Loc == d) RRETURN(MATCH_NOMATCH);
1715
141k
            }
1716
135k
          }
1717
11.9k
        else
1718
11.9k
#endif  /*SUPPORT_UNICODE */
1719
1720
        /* Not UTF mode */
1721
11.9k
          {
1722
11.9k
          for (;;)
1723
500k
            {
1724
500k
            RMATCH(Fecode, RM29);
1725
500k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1726
500k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1727
500k
            if (Feptr >= mb->end_subject)
1728
1.80k
              {
1729
1.80k
              SCHECK_PARTIAL();
1730
1.80k
              RRETURN(MATCH_NOMATCH);
1731
1.80k
              }
1732
498k
            if (Lc == *Feptr || Loc == *Feptr) RRETURN(MATCH_NOMATCH);
1733
488k
            Feptr++;
1734
488k
            }
1735
11.9k
          }
1736
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1737
0
        }
1738
1739
      /* Maximize case */
1740
1741
134k
      else
1742
134k
        {
1743
134k
        Lstart_eptr = Feptr;
1744
1745
134k
#ifdef SUPPORT_UNICODE
1746
134k
        if (utf)
1747
7.90k
          {
1748
7.90k
          uint32_t d;
1749
17.1k
          for (i = Lmin; i < Lmax; i++)
1750
10.2k
            {
1751
10.2k
            int len = 1;
1752
10.2k
            if (Feptr >= mb->end_subject)
1753
447
              {
1754
447
              SCHECK_PARTIAL();
1755
447
              break;
1756
447
              }
1757
9.80k
            GETCHARLEN(d, Feptr, len);
1758
9.80k
            if (Lc == d || Loc == d) break;
1759
9.28k
            Feptr += len;
1760
9.28k
            }
1761
1762
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1763
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1764
          go too far. */
1765
1766
7.90k
          if (reptype != REPTYPE_POS) for(;;)
1767
15.4k
            {
1768
15.4k
            if (Feptr <= Lstart_eptr) break;
1769
8.41k
            RMATCH(Fecode, RM205);
1770
8.41k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1771
8.41k
            Feptr--;
1772
8.41k
            BACKCHAR(Feptr);
1773
8.41k
            }
1774
7.90k
          }
1775
126k
        else
1776
126k
#endif  /* SUPPORT_UNICODE */
1777
1778
        /* Not UTF mode */
1779
126k
          {
1780
6.72M
          for (i = Lmin; i < Lmax; i++)
1781
6.71M
            {
1782
6.71M
            if (Feptr >= mb->end_subject)
1783
35.9k
              {
1784
35.9k
              SCHECK_PARTIAL();
1785
35.9k
              break;
1786
35.9k
              }
1787
6.68M
            if (Lc == *Feptr || Loc == *Feptr) break;
1788
6.59M
            Feptr++;
1789
6.59M
            }
1790
126k
          if (reptype != REPTYPE_POS) for (;;)
1791
6.71M
            {
1792
6.71M
            if (Feptr == Lstart_eptr) break;
1793
6.58M
            RMATCH(Fecode, RM30);
1794
6.58M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1795
6.58M
            Feptr--;
1796
6.58M
            }
1797
126k
          }
1798
134k
        }
1799
281k
      }
1800
1801
    /* Caseful comparisons */
1802
1803
3.86k
    else
1804
3.86k
      {
1805
3.86k
#ifdef SUPPORT_UNICODE
1806
3.86k
      if (utf)
1807
186
        {
1808
186
        uint32_t d;
1809
186
        for (i = 1; i <= Lmin; i++)
1810
0
          {
1811
0
          if (Feptr >= mb->end_subject)
1812
0
            {
1813
0
            SCHECK_PARTIAL();
1814
0
            RRETURN(MATCH_NOMATCH);
1815
0
            }
1816
0
          GETCHARINC(d, Feptr);
1817
0
          if (Lc == d) RRETURN(MATCH_NOMATCH);
1818
0
          }
1819
186
        }
1820
3.67k
      else
1821
3.67k
#endif
1822
      /* Not UTF mode */
1823
3.67k
        {
1824
6.30k
        for (i = 1; i <= Lmin; i++)
1825
2.70k
          {
1826
2.70k
          if (Feptr >= mb->end_subject)
1827
0
            {
1828
0
            SCHECK_PARTIAL();
1829
0
            RRETURN(MATCH_NOMATCH);
1830
0
            }
1831
2.70k
          if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1832
2.70k
          }
1833
3.67k
        }
1834
1835
3.77k
      if (Lmin == Lmax) continue;
1836
1837
3.77k
      if (reptype == REPTYPE_MIN)
1838
1.25k
        {
1839
1.25k
#ifdef SUPPORT_UNICODE
1840
1.25k
        if (utf)
1841
0
          {
1842
0
          uint32_t d;
1843
0
          for (;;)
1844
0
            {
1845
0
            RMATCH(Fecode, RM206);
1846
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1847
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1848
0
            if (Feptr >= mb->end_subject)
1849
0
              {
1850
0
              SCHECK_PARTIAL();
1851
0
              RRETURN(MATCH_NOMATCH);
1852
0
              }
1853
0
            GETCHARINC(d, Feptr);
1854
0
            if (Lc == d) RRETURN(MATCH_NOMATCH);
1855
0
            }
1856
0
          }
1857
1.25k
        else
1858
1.25k
#endif
1859
        /* Not UTF mode */
1860
1.25k
          {
1861
1.25k
          for (;;)
1862
78.2k
            {
1863
78.2k
            RMATCH(Fecode, RM31);
1864
78.2k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1865
78.2k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
1866
78.2k
            if (Feptr >= mb->end_subject)
1867
351
              {
1868
351
              SCHECK_PARTIAL();
1869
351
              RRETURN(MATCH_NOMATCH);
1870
351
              }
1871
77.9k
            if (Lc == *Feptr++) RRETURN(MATCH_NOMATCH);
1872
77.9k
            }
1873
1.25k
          }
1874
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
1875
0
        }
1876
1877
      /* Maximize case */
1878
1879
2.52k
      else
1880
2.52k
        {
1881
2.52k
        Lstart_eptr = Feptr;
1882
1883
2.52k
#ifdef SUPPORT_UNICODE
1884
2.52k
        if (utf)
1885
186
          {
1886
186
          uint32_t d;
1887
369
          for (i = Lmin; i < Lmax; i++)
1888
186
            {
1889
186
            int len = 1;
1890
186
            if (Feptr >= mb->end_subject)
1891
0
              {
1892
0
              SCHECK_PARTIAL();
1893
0
              break;
1894
0
              }
1895
186
            GETCHARLEN(d, Feptr, len);
1896
186
            if (Lc == d) break;
1897
183
            Feptr += len;
1898
183
            }
1899
1900
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
1901
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
1902
          go too far. */
1903
1904
186
          if (reptype != REPTYPE_POS) for(;;)
1905
369
            {
1906
369
            if (Feptr <= Lstart_eptr) break;
1907
183
            RMATCH(Fecode, RM207);
1908
183
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1909
183
            Feptr--;
1910
183
            BACKCHAR(Feptr);
1911
183
            }
1912
186
          }
1913
2.34k
        else
1914
2.34k
#endif
1915
        /* Not UTF mode */
1916
2.34k
          {
1917
31.4k
          for (i = Lmin; i < Lmax; i++)
1918
30.6k
            {
1919
30.6k
            if (Feptr >= mb->end_subject)
1920
490
              {
1921
490
              SCHECK_PARTIAL();
1922
490
              break;
1923
490
              }
1924
30.1k
            if (Lc == *Feptr) break;
1925
29.0k
            Feptr++;
1926
29.0k
            }
1927
2.34k
          if (reptype != REPTYPE_POS) for (;;)
1928
28.6k
            {
1929
28.6k
            if (Feptr == Lstart_eptr) break;
1930
26.6k
            RMATCH(Fecode, RM32);
1931
26.6k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1932
26.6k
            Feptr--;
1933
26.6k
            }
1934
2.34k
          }
1935
2.52k
        }
1936
3.77k
      }
1937
137k
    break;
1938
1939
137k
#undef Lstart_eptr
1940
137k
#undef Lmin
1941
137k
#undef Lmax
1942
137k
#undef Lc
1943
137k
#undef Loc
1944
1945
1946
    /* ===================================================================== */
1947
    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
1948
    are used when all the characters in the class have values in the range
1949
    0-255, and either the matching is caseful, or the characters are in the
1950
    range 0-127 when UTF processing is enabled. The only difference between
1951
    OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1952
    encountered. */
1953
1954
9.49M
#define Lmin               F->temp_32[0]
1955
8.63M
#define Lmax               F->temp_32[1]
1956
6.35M
#define Lstart_eptr        F->temp_sptr[0]
1957
6.46M
#define Lbyte_map_address  F->temp_sptr[1]
1958
4.16M
#define Lbyte_map          ((const unsigned char *)Lbyte_map_address)
1959
1960
369k
    case OP_NCLASS:
1961
2.29M
    case OP_CLASS:
1962
2.29M
      {
1963
2.29M
      Lbyte_map_address = Fecode + 1;           /* Save for matching */
1964
2.29M
      Fecode += 1 + (32 / sizeof(PCRE2_UCHAR)); /* Advance past the item */
1965
1966
      /* Look past the end of the item to see if there is repeat information
1967
      following. Then obey similar code to character type repeats. */
1968
1969
2.29M
      switch (*Fecode)
1970
2.29M
        {
1971
1.29M
        case OP_CRSTAR:
1972
1.33M
        case OP_CRMINSTAR:
1973
1.36M
        case OP_CRPLUS:
1974
1.36M
        case OP_CRMINPLUS:
1975
1.63M
        case OP_CRQUERY:
1976
1.71M
        case OP_CRMINQUERY:
1977
2.03M
        case OP_CRPOSSTAR:
1978
2.04M
        case OP_CRPOSPLUS:
1979
2.06M
        case OP_CRPOSQUERY:
1980
2.06M
        fc = *Fecode++ - OP_CRSTAR;
1981
2.06M
        Lmin = rep_min[fc];
1982
2.06M
        Lmax = rep_max[fc];
1983
2.06M
        reptype = rep_typ[fc];
1984
2.06M
        break;
1985
1986
0
        case OP_CRRANGE:
1987
0
        case OP_CRMINRANGE:
1988
0
        case OP_CRPOSRANGE:
1989
0
        Lmin = GET2(Fecode, 1);
1990
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
1991
0
        if (Lmax == 0) Lmax = UINT32_MAX;       /* Max 0 => infinity */
1992
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
1993
0
        Fecode += 1 + 2 * IMM2_SIZE;
1994
0
        break;
1995
1996
233k
        default:               /* No repeat follows */
1997
233k
        Lmin = Lmax = 1;
1998
233k
        break;
1999
2.29M
        }
2000
2001
      /* First, ensure the minimum number of matches are present. */
2002
2003
2.29M
#ifdef SUPPORT_UNICODE
2004
2.29M
      if (utf)
2005
113k
        {
2006
146k
        for (i = 1; i <= Lmin; i++)
2007
54.6k
          {
2008
54.6k
          if (Feptr >= mb->end_subject)
2009
612
            {
2010
612
            SCHECK_PARTIAL();
2011
612
            RRETURN(MATCH_NOMATCH);
2012
612
            }
2013
54.0k
          GETCHARINC(fc, Feptr);
2014
54.0k
          if (fc > 255)
2015
1.07k
            {
2016
1.07k
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2017
1.07k
            }
2018
52.9k
          else
2019
52.9k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2020
54.0k
          }
2021
113k
        }
2022
2.17M
      else
2023
2.17M
#endif
2024
      /* Not UTF mode */
2025
2.17M
        {
2026
2.31M
        for (i = 1; i <= Lmin; i++)
2027
217k
          {
2028
217k
          if (Feptr >= mb->end_subject)
2029
1.43k
            {
2030
1.43k
            SCHECK_PARTIAL();
2031
1.43k
            RRETURN(MATCH_NOMATCH);
2032
1.43k
            }
2033
215k
          fc = *Feptr++;
2034
#if PCRE2_CODE_UNIT_WIDTH != 8
2035
          if (fc > 255)
2036
            {
2037
            if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2038
            }
2039
          else
2040
#endif
2041
215k
          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2042
215k
          }
2043
2.17M
        }
2044
2045
      /* If Lmax == Lmin we are done. Continue with main loop. */
2046
2047
2.19M
      if (Lmin == Lmax) continue;
2048
2049
      /* If minimizing, keep testing the rest of the expression and advancing
2050
      the pointer while it matches the class. */
2051
2052
2.03M
      if (reptype == REPTYPE_MIN)
2053
124k
        {
2054
124k
#ifdef SUPPORT_UNICODE
2055
124k
        if (utf)
2056
38.4k
          {
2057
38.4k
          for (;;)
2058
59.7k
            {
2059
59.7k
            RMATCH(Fecode, RM200);
2060
59.7k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2061
59.7k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2062
48.1k
            if (Feptr >= mb->end_subject)
2063
1.00k
              {
2064
1.00k
              SCHECK_PARTIAL();
2065
1.00k
              RRETURN(MATCH_NOMATCH);
2066
1.00k
              }
2067
47.1k
            GETCHARINC(fc, Feptr);
2068
47.1k
            if (fc > 255)
2069
1.26k
              {
2070
1.26k
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2071
1.26k
              }
2072
45.8k
            else
2073
45.8k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2074
47.1k
            }
2075
38.4k
          }
2076
86.5k
        else
2077
86.5k
#endif
2078
        /* Not UTF mode */
2079
86.5k
          {
2080
86.5k
          for (;;)
2081
573k
            {
2082
573k
            RMATCH(Fecode, RM23);
2083
573k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2084
573k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2085
537k
            if (Feptr >= mb->end_subject)
2086
6.62k
              {
2087
6.62k
              SCHECK_PARTIAL();
2088
6.62k
              RRETURN(MATCH_NOMATCH);
2089
6.62k
              }
2090
530k
            fc = *Feptr++;
2091
#if PCRE2_CODE_UNIT_WIDTH != 8
2092
            if (fc > 255)
2093
              {
2094
              if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
2095
              }
2096
            else
2097
#endif
2098
530k
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
2099
530k
            }
2100
86.5k
          }
2101
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2102
0
        }
2103
2104
      /* If maximizing, find the longest possible run, then work backwards. */
2105
2106
1.91M
      else
2107
1.91M
        {
2108
1.91M
        Lstart_eptr = Feptr;
2109
2110
1.91M
#ifdef SUPPORT_UNICODE
2111
1.91M
        if (utf)
2112
26.8k
          {
2113
243k
          for (i = Lmin; i < Lmax; i++)
2114
241k
            {
2115
241k
            int len = 1;
2116
241k
            if (Feptr >= mb->end_subject)
2117
2.39k
              {
2118
2.39k
              SCHECK_PARTIAL();
2119
2.39k
              break;
2120
2.39k
              }
2121
238k
            GETCHARLEN(fc, Feptr, len);
2122
238k
            if (fc > 255)
2123
9.18k
              {
2124
9.18k
              if (Fop == OP_CLASS) break;
2125
9.18k
              }
2126
229k
            else
2127
229k
              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2128
216k
            Feptr += len;
2129
216k
            }
2130
2131
26.8k
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2132
2133
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2134
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2135
          go too far. */
2136
2137
24.4k
          for (;;)
2138
235k
            {
2139
235k
            RMATCH(Fecode, RM201);
2140
235k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2141
235k
            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2142
211k
            BACKCHAR(Feptr);
2143
211k
            }
2144
24.4k
          }
2145
1.88M
        else
2146
1.88M
#endif
2147
          /* Not UTF mode */
2148
1.88M
          {
2149
3.27M
          for (i = Lmin; i < Lmax; i++)
2150
3.11M
            {
2151
3.11M
            if (Feptr >= mb->end_subject)
2152
22.6k
              {
2153
22.6k
              SCHECK_PARTIAL();
2154
22.6k
              break;
2155
22.6k
              }
2156
3.09M
            fc = *Feptr;
2157
#if PCRE2_CODE_UNIT_WIDTH != 8
2158
            if (fc > 255)
2159
              {
2160
              if (Fop == OP_CLASS) break;
2161
              }
2162
            else
2163
#endif
2164
3.09M
            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
2165
1.38M
            Feptr++;
2166
1.38M
            }
2167
2168
1.88M
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2169
2170
4.20M
          while (Feptr >= Lstart_eptr)
2171
2.64M
            {
2172
2.64M
            RMATCH(Fecode, RM24);
2173
2.64M
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2174
2.64M
            Feptr--;
2175
2.64M
            }
2176
1.55M
          }
2177
2178
1.58M
        RRETURN(MATCH_NOMATCH);
2179
1.58M
        }
2180
2.03M
      }
2181
2182
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
2183
2184
0
#undef Lbyte_map_address
2185
0
#undef Lbyte_map
2186
0
#undef Lstart_eptr
2187
0
#undef Lmin
2188
0
#undef Lmax
2189
2190
2191
    /* ===================================================================== */
2192
    /* Match an extended character class. In the 8-bit library, this opcode is
2193
    encountered only when UTF-8 mode mode is supported. In the 16-bit and
2194
    32-bit libraries, codepoints greater than 255 may be encountered even when
2195
    UTF is not supported. */
2196
2197
751k
#define Lstart_eptr  F->temp_sptr[0]
2198
1.13M
#define Lxclass_data F->temp_sptr[1]
2199
1.11M
#define Lmin         F->temp_32[0]
2200
1.40M
#define Lmax         F->temp_32[1]
2201
2202
0
#ifdef SUPPORT_WIDE_CHARS
2203
270k
    case OP_XCLASS:
2204
270k
      {
2205
270k
      Lxclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2206
270k
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2207
2208
270k
      switch (*Fecode)
2209
270k
        {
2210
9.59k
        case OP_CRSTAR:
2211
13.0k
        case OP_CRMINSTAR:
2212
20.0k
        case OP_CRPLUS:
2213
20.9k
        case OP_CRMINPLUS:
2214
120k
        case OP_CRQUERY:
2215
157k
        case OP_CRMINQUERY:
2216
164k
        case OP_CRPOSSTAR:
2217
167k
        case OP_CRPOSPLUS:
2218
172k
        case OP_CRPOSQUERY:
2219
172k
        fc = *Fecode++ - OP_CRSTAR;
2220
172k
        Lmin = rep_min[fc];
2221
172k
        Lmax = rep_max[fc];
2222
172k
        reptype = rep_typ[fc];
2223
172k
        break;
2224
2225
0
        case OP_CRRANGE:
2226
0
        case OP_CRMINRANGE:
2227
0
        case OP_CRPOSRANGE:
2228
0
        Lmin = GET2(Fecode, 1);
2229
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2230
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2231
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2232
0
        Fecode += 1 + 2 * IMM2_SIZE;
2233
0
        break;
2234
2235
97.9k
        default:               /* No repeat follows */
2236
97.9k
        Lmin = Lmax = 1;
2237
97.9k
        break;
2238
270k
        }
2239
2240
      /* First, ensure the minimum number of matches are present. */
2241
2242
351k
      for (i = 1; i <= Lmin; i++)
2243
108k
        {
2244
108k
        if (Feptr >= mb->end_subject)
2245
1.66k
          {
2246
1.66k
          SCHECK_PARTIAL();
2247
1.66k
          RRETURN(MATCH_NOMATCH);
2248
1.66k
          }
2249
107k
        GETCHARINCTEST(fc, Feptr);
2250
107k
        if (!PRIV(xclass)(fc, Lxclass_data,
2251
107k
            (const uint8_t*)mb->start_code, utf))
2252
26.0k
          RRETURN(MATCH_NOMATCH);
2253
107k
        }
2254
2255
      /* If Lmax == Lmin we can just continue with the main loop. */
2256
2257
242k
      if (Lmin == Lmax) continue;
2258
2259
      /* If minimizing, keep testing the rest of the expression and advancing
2260
      the pointer while it matches the class. */
2261
2262
167k
      if (reptype == REPTYPE_MIN)
2263
41.4k
        {
2264
41.4k
        for (;;)
2265
128k
          {
2266
128k
          RMATCH(Fecode, RM100);
2267
128k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2268
128k
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2269
100k
          if (Feptr >= mb->end_subject)
2270
485
            {
2271
485
            SCHECK_PARTIAL();
2272
485
            RRETURN(MATCH_NOMATCH);
2273
485
            }
2274
100k
          GETCHARINCTEST(fc, Feptr);
2275
100k
          if (!PRIV(xclass)(fc, Lxclass_data,
2276
100k
              (const uint8_t*)mb->start_code, utf))
2277
12.8k
            RRETURN(MATCH_NOMATCH);
2278
100k
          }
2279
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2280
0
        }
2281
2282
      /* If maximizing, find the longest possible run, then work backwards. */
2283
2284
125k
      else
2285
125k
        {
2286
125k
        Lstart_eptr = Feptr;
2287
758k
        for (i = Lmin; i < Lmax; i++)
2288
661k
          {
2289
661k
          int len = 1;
2290
661k
          if (Feptr >= mb->end_subject)
2291
8.65k
            {
2292
8.65k
            SCHECK_PARTIAL();
2293
8.65k
            break;
2294
8.65k
            }
2295
652k
#ifdef SUPPORT_UNICODE
2296
652k
          GETCHARLENTEST(fc, Feptr, len);
2297
#else
2298
          fc = *Feptr;
2299
#endif
2300
652k
          if (!PRIV(xclass)(fc, Lxclass_data,
2301
652k
              (const uint8_t*)mb->start_code, utf)) break;
2302
633k
          Feptr += len;
2303
633k
          }
2304
2305
125k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2306
2307
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2308
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2309
        go too far. */
2310
2311
113k
        for(;;)
2312
625k
          {
2313
625k
          RMATCH(Fecode, RM101);
2314
625k
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315
625k
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2316
512k
#ifdef SUPPORT_UNICODE
2317
512k
          if (utf) BACKCHAR(Feptr);
2318
512k
#endif
2319
512k
          }
2320
113k
        RRETURN(MATCH_NOMATCH);
2321
113k
        }
2322
2323
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2324
0
      }
2325
0
#endif  /* SUPPORT_WIDE_CHARS: end of XCLASS */
2326
2327
0
#undef Lstart_eptr
2328
0
#undef Lxclass_data
2329
0
#undef Lmin
2330
0
#undef Lmax
2331
2332
2333
    /* ===================================================================== */
2334
    /* Match a complex, set-based character class. This opcodes are used when
2335
    there is complex nesting or logical operations within the character
2336
    class. */
2337
2338
0
#define Lstart_eptr  F->temp_sptr[0]
2339
0
#define Leclass_data F->temp_sptr[1]
2340
0
#define Leclass_len  F->temp_size
2341
0
#define Lmin         F->temp_32[0]
2342
0
#define Lmax         F->temp_32[1]
2343
2344
0
#ifdef SUPPORT_WIDE_CHARS
2345
0
    case OP_ECLASS:
2346
0
      {
2347
0
      Leclass_data = Fecode + 1 + LINK_SIZE;  /* Save for matching */
2348
0
      Fecode += GET(Fecode, 1);               /* Advance past the item */
2349
0
      Leclass_len = (PCRE2_SIZE)(Fecode - Leclass_data);
2350
2351
0
      switch (*Fecode)
2352
0
        {
2353
0
        case OP_CRSTAR:
2354
0
        case OP_CRMINSTAR:
2355
0
        case OP_CRPLUS:
2356
0
        case OP_CRMINPLUS:
2357
0
        case OP_CRQUERY:
2358
0
        case OP_CRMINQUERY:
2359
0
        case OP_CRPOSSTAR:
2360
0
        case OP_CRPOSPLUS:
2361
0
        case OP_CRPOSQUERY:
2362
0
        fc = *Fecode++ - OP_CRSTAR;
2363
0
        Lmin = rep_min[fc];
2364
0
        Lmax = rep_max[fc];
2365
0
        reptype = rep_typ[fc];
2366
0
        break;
2367
2368
0
        case OP_CRRANGE:
2369
0
        case OP_CRMINRANGE:
2370
0
        case OP_CRPOSRANGE:
2371
0
        Lmin = GET2(Fecode, 1);
2372
0
        Lmax = GET2(Fecode, 1 + IMM2_SIZE);
2373
0
        if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
2374
0
        reptype = rep_typ[*Fecode - OP_CRSTAR];
2375
0
        Fecode += 1 + 2 * IMM2_SIZE;
2376
0
        break;
2377
2378
0
        default:               /* No repeat follows */
2379
0
        Lmin = Lmax = 1;
2380
0
        break;
2381
0
        }
2382
2383
      /* First, ensure the minimum number of matches are present. */
2384
2385
0
      for (i = 1; i <= Lmin; i++)
2386
0
        {
2387
0
        if (Feptr >= mb->end_subject)
2388
0
          {
2389
0
          SCHECK_PARTIAL();
2390
0
          RRETURN(MATCH_NOMATCH);
2391
0
          }
2392
0
        GETCHARINCTEST(fc, Feptr);
2393
0
        if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2394
0
                          (const uint8_t*)mb->start_code, utf))
2395
0
          RRETURN(MATCH_NOMATCH);
2396
0
        }
2397
2398
      /* If Lmax == Lmin we can just continue with the main loop. */
2399
2400
0
      if (Lmin == Lmax) continue;
2401
2402
      /* If minimizing, keep testing the rest of the expression and advancing
2403
      the pointer while it matches the class. */
2404
2405
0
      if (reptype == REPTYPE_MIN)
2406
0
        {
2407
0
        for (;;)
2408
0
          {
2409
0
          RMATCH(Fecode, RM102);
2410
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2411
0
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
2412
0
          if (Feptr >= mb->end_subject)
2413
0
            {
2414
0
            SCHECK_PARTIAL();
2415
0
            RRETURN(MATCH_NOMATCH);
2416
0
            }
2417
0
          GETCHARINCTEST(fc, Feptr);
2418
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2419
0
                            (const uint8_t*)mb->start_code, utf))
2420
0
            RRETURN(MATCH_NOMATCH);
2421
0
          }
2422
0
        PCRE2_UNREACHABLE(); /* Control never reaches here */
2423
0
        }
2424
2425
      /* If maximizing, find the longest possible run, then work backwards. */
2426
2427
0
      else
2428
0
        {
2429
0
        Lstart_eptr = Feptr;
2430
0
        for (i = Lmin; i < Lmax; i++)
2431
0
          {
2432
0
          int len = 1;
2433
0
          if (Feptr >= mb->end_subject)
2434
0
            {
2435
0
            SCHECK_PARTIAL();
2436
0
            break;
2437
0
            }
2438
0
#ifdef SUPPORT_UNICODE
2439
0
          GETCHARLENTEST(fc, Feptr, len);
2440
#else
2441
          fc = *Feptr;
2442
#endif
2443
0
          if (!PRIV(eclass)(fc, Leclass_data, Leclass_data + Leclass_len,
2444
0
                            (const uint8_t*)mb->start_code, utf))
2445
0
            break;
2446
0
          Feptr += len;
2447
0
          }
2448
2449
0
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
2450
2451
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
2452
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
2453
        go too far. */
2454
2455
0
        for(;;)
2456
0
          {
2457
0
          RMATCH(Fecode, RM103);
2458
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2459
0
          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
2460
0
#ifdef SUPPORT_UNICODE
2461
0
          if (utf) BACKCHAR(Feptr);
2462
0
#endif
2463
0
          }
2464
0
        RRETURN(MATCH_NOMATCH);
2465
0
        }
2466
2467
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
2468
0
      }
2469
0
#endif  /* SUPPORT_WIDE_CHARS: end of ECLASS */
2470
2471
0
#undef Lstart_eptr
2472
0
#undef Leclass_data
2473
0
#undef Leclass_len
2474
0
#undef Lmin
2475
0
#undef Lmax
2476
2477
2478
    /* ===================================================================== */
2479
    /* Match various character types when PCRE2_UCP is not set. These opcodes
2480
    are not generated when PCRE2_UCP is set - instead appropriate property
2481
    tests are compiled. */
2482
2483
221k
    case OP_NOT_DIGIT:
2484
221k
    if (Feptr >= mb->end_subject)
2485
7.72k
      {
2486
7.72k
      SCHECK_PARTIAL();
2487
7.72k
      RRETURN(MATCH_NOMATCH);
2488
7.72k
      }
2489
213k
    GETCHARINCTEST(fc, Feptr);
2490
213k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
2491
251
      RRETURN(MATCH_NOMATCH);
2492
213k
    Fecode++;
2493
213k
    break;
2494
2495
191k
    case OP_DIGIT:
2496
191k
    if (Feptr >= mb->end_subject)
2497
7.42k
      {
2498
7.42k
      SCHECK_PARTIAL();
2499
7.42k
      RRETURN(MATCH_NOMATCH);
2500
7.42k
      }
2501
184k
    GETCHARINCTEST(fc, Feptr);
2502
184k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
2503
184k
      RRETURN(MATCH_NOMATCH);
2504
464
    Fecode++;
2505
464
    break;
2506
2507
397k
    case OP_NOT_WHITESPACE:
2508
397k
    if (Feptr >= mb->end_subject)
2509
1.53k
      {
2510
1.53k
      SCHECK_PARTIAL();
2511
1.53k
      RRETURN(MATCH_NOMATCH);
2512
1.53k
      }
2513
395k
    GETCHARINCTEST(fc, Feptr);
2514
395k
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
2515
14.4k
      RRETURN(MATCH_NOMATCH);
2516
381k
    Fecode++;
2517
381k
    break;
2518
2519
14.8k
    case OP_WHITESPACE:
2520
14.8k
    if (Feptr >= mb->end_subject)
2521
0
      {
2522
0
      SCHECK_PARTIAL();
2523
0
      RRETURN(MATCH_NOMATCH);
2524
0
      }
2525
14.8k
    GETCHARINCTEST(fc, Feptr);
2526
14.8k
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
2527
12.7k
      RRETURN(MATCH_NOMATCH);
2528
2.09k
    Fecode++;
2529
2.09k
    break;
2530
2531
5.82M
    case OP_NOT_WORDCHAR:
2532
5.82M
    if (Feptr >= mb->end_subject)
2533
91.8k
      {
2534
91.8k
      SCHECK_PARTIAL();
2535
91.8k
      RRETURN(MATCH_NOMATCH);
2536
91.8k
      }
2537
5.73M
    GETCHARINCTEST(fc, Feptr);
2538
5.73M
    if (CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
2539
438k
      RRETURN(MATCH_NOMATCH);
2540
5.29M
    Fecode++;
2541
5.29M
    break;
2542
2543
5.77M
    case OP_WORDCHAR:
2544
5.77M
    if (Feptr >= mb->end_subject)
2545
1.10k
      {
2546
1.10k
      SCHECK_PARTIAL();
2547
1.10k
      RRETURN(MATCH_NOMATCH);
2548
1.10k
      }
2549
5.77M
    GETCHARINCTEST(fc, Feptr);
2550
5.77M
    if (!CHMAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
2551
5.07M
      RRETURN(MATCH_NOMATCH);
2552
694k
    Fecode++;
2553
694k
    break;
2554
2555
272M
    case OP_ANYNL:
2556
272M
    if (Feptr >= mb->end_subject)
2557
3.04M
      {
2558
3.04M
      SCHECK_PARTIAL();
2559
3.04M
      RRETURN(MATCH_NOMATCH);
2560
3.04M
      }
2561
269M
    GETCHARINCTEST(fc, Feptr);
2562
269M
    switch(fc)
2563
269M
      {
2564
251M
      default: RRETURN(MATCH_NOMATCH);
2565
2566
52.4k
      case CHAR_CR:
2567
52.4k
      if (Feptr >= mb->end_subject)
2568
474
        {
2569
474
        SCHECK_PARTIAL();
2570
474
        }
2571
51.9k
      else if (UCHAR21TEST(Feptr) == CHAR_LF) Feptr++;
2572
52.4k
      break;
2573
2574
11.7M
      case CHAR_LF:
2575
11.7M
      break;
2576
2577
2.94M
      case CHAR_VT:
2578
5.89M
      case CHAR_FF:
2579
5.93M
      case CHAR_NEL:
2580
5.93M
#ifndef EBCDIC
2581
5.93M
      case 0x2028:
2582
5.93M
      case 0x2029:
2583
5.93M
#endif  /* Not EBCDIC */
2584
5.93M
      if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
2585
5.93M
      break;
2586
269M
      }
2587
17.7M
    Fecode++;
2588
17.7M
    break;
2589
2590
7.47M
    case OP_NOT_HSPACE:
2591
7.47M
    if (Feptr >= mb->end_subject)
2592
145k
      {
2593
145k
      SCHECK_PARTIAL();
2594
145k
      RRETURN(MATCH_NOMATCH);
2595
145k
      }
2596
7.33M
    GETCHARINCTEST(fc, Feptr);
2597
7.33M
    switch(fc)
2598
7.33M
      {
2599
3.48M
      HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2600
7.13M
      default: break;
2601
7.33M
      }
2602
7.13M
    Fecode++;
2603
7.13M
    break;
2604
2605
51.6k
    case OP_HSPACE:
2606
51.6k
    if (Feptr >= mb->end_subject)
2607
580
      {
2608
580
      SCHECK_PARTIAL();
2609
580
      RRETURN(MATCH_NOMATCH);
2610
580
      }
2611
51.1k
    GETCHARINCTEST(fc, Feptr);
2612
51.1k
    switch(fc)
2613
51.1k
      {
2614
880
      HSPACE_CASES: break;  /* Byte and multibyte cases */
2615
50.2k
      default: RRETURN(MATCH_NOMATCH);
2616
51.1k
      }
2617
880
    Fecode++;
2618
880
    break;
2619
2620
3.45M
    case OP_NOT_VSPACE:
2621
3.45M
    if (Feptr >= mb->end_subject)
2622
1.84k
      {
2623
1.84k
      SCHECK_PARTIAL();
2624
1.84k
      RRETURN(MATCH_NOMATCH);
2625
1.84k
      }
2626
3.45M
    GETCHARINCTEST(fc, Feptr);
2627
3.45M
    switch(fc)
2628
3.45M
      {
2629
704k
      VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2630
3.33M
      default: break;
2631
3.45M
      }
2632
3.33M
    Fecode++;
2633
3.33M
    break;
2634
2635
1.84M
    case OP_VSPACE:
2636
1.84M
    if (Feptr >= mb->end_subject)
2637
13.4k
      {
2638
13.4k
      SCHECK_PARTIAL();
2639
13.4k
      RRETURN(MATCH_NOMATCH);
2640
13.4k
      }
2641
1.83M
    GETCHARINCTEST(fc, Feptr);
2642
1.83M
    switch(fc)
2643
1.83M
      {
2644
73.9k
      VSPACE_CASES: break;
2645
1.76M
      default: RRETURN(MATCH_NOMATCH);
2646
1.83M
      }
2647
73.9k
    Fecode++;
2648
73.9k
    break;
2649
2650
2651
0
#ifdef SUPPORT_UNICODE
2652
2653
    /* ===================================================================== */
2654
    /* Check the next character by Unicode property. We will get here only
2655
    if the support is in the binary; otherwise a compile-time error occurs. */
2656
2657
283k
    case OP_PROP:
2658
335k
    case OP_NOTPROP:
2659
335k
    if (Feptr >= mb->end_subject)
2660
2.40k
      {
2661
2.40k
      SCHECK_PARTIAL();
2662
2.40k
      RRETURN(MATCH_NOMATCH);
2663
2.40k
      }
2664
333k
    GETCHARINCTEST(fc, Feptr);
2665
333k
      {
2666
333k
      const uint32_t *cp;
2667
333k
      uint32_t chartype;
2668
333k
      const ucd_record *prop = GET_UCD(fc);
2669
333k
      BOOL notmatch = Fop == OP_NOTPROP;
2670
2671
333k
      switch(Fecode[1])
2672
333k
        {
2673
0
        case PT_LAMP:
2674
0
        chartype = prop->chartype;
2675
0
        if ((chartype == ucp_Lu ||
2676
0
             chartype == ucp_Ll ||
2677
0
             chartype == ucp_Lt) == notmatch)
2678
0
          RRETURN(MATCH_NOMATCH);
2679
0
        break;
2680
2681
4.06k
        case PT_GC:
2682
4.06k
        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
2683
555
          RRETURN(MATCH_NOMATCH);
2684
3.50k
        break;
2685
2686
3.50k
        case PT_PC:
2687
0
        if ((Fecode[2] == prop->chartype) == notmatch)
2688
0
          RRETURN(MATCH_NOMATCH);
2689
0
        break;
2690
2691
0
        case PT_SC:
2692
0
        if ((Fecode[2] == prop->script) == notmatch)
2693
0
          RRETURN(MATCH_NOMATCH);
2694
0
        break;
2695
2696
0
        case PT_SCX:
2697
0
          {
2698
0
          BOOL ok = (Fecode[2] == prop->script ||
2699
0
                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
2700
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2701
0
          }
2702
0
        break;
2703
2704
        /* These are specials */
2705
2706
0
        case PT_ALNUM:
2707
0
        chartype = prop->chartype;
2708
0
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2709
0
             PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
2710
0
          RRETURN(MATCH_NOMATCH);
2711
0
        break;
2712
2713
        /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2714
        which means that Perl space and POSIX space are now identical. PCRE
2715
        was changed at release 8.34. */
2716
2717
52.5k
        case PT_SPACE:    /* Perl space */
2718
52.5k
        case PT_PXSPACE:  /* POSIX space */
2719
52.5k
        switch(fc)
2720
52.5k
          {
2721
122k
          HSPACE_CASES:
2722
122k
          VSPACE_CASES:
2723
63.2k
          if (notmatch) RRETURN(MATCH_NOMATCH);
2724
778
          break;
2725
2726
43.4k
          default:
2727
43.4k
          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
2728
5.51k
            RRETURN(MATCH_NOMATCH);
2729
37.9k
          break;
2730
52.5k
          }
2731
38.7k
        break;
2732
2733
115k
        case PT_WORD:
2734
115k
        chartype = prop->chartype;
2735
115k
        if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
2736
115k
             PRIV(ucp_gentype)[chartype] == ucp_N ||
2737
115k
             chartype == ucp_Mn ||
2738
115k
             chartype == ucp_Pc) == notmatch)
2739
70.4k
          RRETURN(MATCH_NOMATCH);
2740
45.1k
        break;
2741
2742
161k
        case PT_CLIST:
2743
#if PCRE2_CODE_UNIT_WIDTH == 32
2744
            if (fc > MAX_UTF_CODE_POINT)
2745
              {
2746
              if (notmatch) break;;
2747
              RRETURN(MATCH_NOMATCH);
2748
              }
2749
#endif
2750
161k
        cp = PRIV(ucd_caseless_sets) + Fecode[2];
2751
161k
        for (;;)
2752
211k
          {
2753
211k
          if (fc < *cp)
2754
154k
            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
2755
56.5k
          if (fc == *cp++)
2756
6.42k
            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
2757
56.5k
          }
2758
7.61k
        break;
2759
2760
7.61k
        case PT_UCNC:
2761
0
        if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
2762
0
             fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
2763
0
             fc >= 0xe000) == notmatch)
2764
0
          RRETURN(MATCH_NOMATCH);
2765
0
        break;
2766
2767
0
        case PT_BIDICL:
2768
0
        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
2769
0
          RRETURN(MATCH_NOMATCH);
2770
0
        break;
2771
2772
0
        case PT_BOOL:
2773
0
          {
2774
0
          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
2775
0
            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
2776
0
          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
2777
0
          }
2778
0
        break;
2779
2780
        /* This should never occur */
2781
2782
0
        default:
2783
0
        PCRE2_DEBUG_UNREACHABLE();
2784
0
        return PCRE2_ERROR_INTERNAL;
2785
333k
        }
2786
2787
94.9k
      Fecode += 3;
2788
94.9k
      }
2789
0
    break;
2790
2791
2792
    /* ===================================================================== */
2793
    /* Match an extended Unicode sequence. We will get here only if the support
2794
    is in the binary; otherwise a compile-time error occurs. */
2795
2796
61.1k
    case OP_EXTUNI:
2797
61.1k
    if (Feptr >= mb->end_subject)
2798
387
      {
2799
387
      SCHECK_PARTIAL();
2800
387
      RRETURN(MATCH_NOMATCH);
2801
387
      }
2802
60.7k
    else
2803
60.7k
      {
2804
60.7k
      GETCHARINCTEST(fc, Feptr);
2805
60.7k
      Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject, utf,
2806
60.7k
        NULL);
2807
60.7k
      }
2808
60.7k
    CHECK_PARTIAL();
2809
60.7k
    Fecode++;
2810
60.7k
    break;
2811
2812
0
#endif  /* SUPPORT_UNICODE */
2813
2814
2815
    /* ===================================================================== */
2816
    /* Match a single character type repeatedly. Note that the property type
2817
    does not need to be in a stack frame as it is not used within an RMATCH()
2818
    loop. */
2819
2820
834M
#define Lstart_eptr  F->temp_sptr[0]
2821
282M
#define Lmin         F->temp_32[0]
2822
263M
#define Lmax         F->temp_32[1]
2823
739M
#define Lctype       F->temp_32[2]
2824
2.40M
#define Lpropvalue   F->temp_32[3]
2825
2826
0
    case OP_TYPEEXACT:
2827
0
    Lmin = Lmax = GET2(Fecode, 1);
2828
0
    Fecode += 1 + IMM2_SIZE;
2829
0
    goto REPEATTYPE;
2830
2831
0
    case OP_TYPEUPTO:
2832
0
    case OP_TYPEMINUPTO:
2833
0
    Lmin = 0;
2834
0
    Lmax = GET2(Fecode, 1);
2835
0
    reptype = (*Fecode == OP_TYPEMINUPTO)? REPTYPE_MIN : REPTYPE_MAX;
2836
0
    Fecode += 1 + IMM2_SIZE;
2837
0
    goto REPEATTYPE;
2838
2839
7.40k
    case OP_TYPEPOSSTAR:
2840
7.40k
    reptype = REPTYPE_POS;
2841
7.40k
    Lmin = 0;
2842
7.40k
    Lmax = UINT32_MAX;
2843
7.40k
    Fecode++;
2844
7.40k
    goto REPEATTYPE;
2845
2846
3.59M
    case OP_TYPEPOSPLUS:
2847
3.59M
    reptype = REPTYPE_POS;
2848
3.59M
    Lmin = 1;
2849
3.59M
    Lmax = UINT32_MAX;
2850
3.59M
    Fecode++;
2851
3.59M
    goto REPEATTYPE;
2852
2853
47.1M
    case OP_TYPEPOSQUERY:
2854
47.1M
    reptype = REPTYPE_POS;
2855
47.1M
    Lmin = 0;
2856
47.1M
    Lmax = 1;
2857
47.1M
    Fecode++;
2858
47.1M
    goto REPEATTYPE;
2859
2860
0
    case OP_TYPEPOSUPTO:
2861
0
    reptype = REPTYPE_POS;
2862
0
    Lmin = 0;
2863
0
    Lmax = GET2(Fecode, 1);
2864
0
    Fecode += 1 + IMM2_SIZE;
2865
0
    goto REPEATTYPE;
2866
2867
17.0k
    case OP_TYPESTAR:
2868
17.8k
    case OP_TYPEMINSTAR:
2869
6.49M
    case OP_TYPEPLUS:
2870
6.91M
    case OP_TYPEMINPLUS:
2871
13.8M
    case OP_TYPEQUERY:
2872
14.0M
    case OP_TYPEMINQUERY:
2873
14.0M
    fc = *Fecode++ - OP_TYPESTAR;
2874
14.0M
    Lmin = rep_min[fc];
2875
14.0M
    Lmax = rep_max[fc];
2876
14.0M
    reptype = rep_typ[fc];
2877
2878
    /* Common code for all repeated character type matches. */
2879
2880
64.7M
    REPEATTYPE:
2881
64.7M
    Lctype = *Fecode++;      /* Code for the character type */
2882
2883
64.7M
#ifdef SUPPORT_UNICODE
2884
64.7M
    if (Lctype == OP_PROP || Lctype == OP_NOTPROP)
2885
1.83M
      {
2886
1.83M
      proptype = *Fecode++;
2887
1.83M
      Lpropvalue = *Fecode++;
2888
1.83M
      }
2889
62.9M
    else proptype = -1;
2890
64.7M
#endif
2891
2892
    /* First, ensure the minimum number of matches are present. Use inline
2893
    code for maximizing the speed, and do the type test once at the start
2894
    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
2895
    loops, we can use an ordinary variable for "notmatch". The code for UTF
2896
    mode is separated out for tidiness, except for Unicode property tests. */
2897
2898
64.7M
    if (Lmin > 0)
2899
10.4M
      {
2900
10.4M
#ifdef SUPPORT_UNICODE
2901
10.4M
      if (proptype >= 0)  /* Property tests in all modes */
2902
1.80M
        {
2903
1.80M
        BOOL notmatch = Lctype == OP_NOTPROP;
2904
1.80M
        switch(proptype)
2905
1.80M
          {
2906
0
          case PT_LAMP:
2907
0
          for (i = 1; i <= Lmin; i++)
2908
0
            {
2909
0
            int chartype;
2910
0
            if (Feptr >= mb->end_subject)
2911
0
              {
2912
0
              SCHECK_PARTIAL();
2913
0
              RRETURN(MATCH_NOMATCH);
2914
0
              }
2915
0
            GETCHARINCTEST(fc, Feptr);
2916
0
            chartype = UCD_CHARTYPE(fc);
2917
0
            if ((chartype == ucp_Lu ||
2918
0
                 chartype == ucp_Ll ||
2919
0
                 chartype == ucp_Lt) == notmatch)
2920
0
              RRETURN(MATCH_NOMATCH);
2921
0
            }
2922
0
          break;
2923
2924
7.89k
          case PT_GC:
2925
15.2k
          for (i = 1; i <= Lmin; i++)
2926
7.89k
            {
2927
7.89k
            if (Feptr >= mb->end_subject)
2928
0
              {
2929
0
              SCHECK_PARTIAL();
2930
0
              RRETURN(MATCH_NOMATCH);
2931
0
              }
2932
7.89k
            GETCHARINCTEST(fc, Feptr);
2933
7.89k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
2934
566
              RRETURN(MATCH_NOMATCH);
2935
7.89k
            }
2936
7.32k
          break;
2937
2938
337k
          case PT_PC:
2939
350k
          for (i = 1; i <= Lmin; i++)
2940
337k
            {
2941
337k
            if (Feptr >= mb->end_subject)
2942
0
              {
2943
0
              SCHECK_PARTIAL();
2944
0
              RRETURN(MATCH_NOMATCH);
2945
0
              }
2946
337k
            GETCHARINCTEST(fc, Feptr);
2947
337k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
2948
325k
              RRETURN(MATCH_NOMATCH);
2949
337k
            }
2950
12.2k
          break;
2951
2952
12.2k
          case PT_SC:
2953
0
          for (i = 1; i <= Lmin; i++)
2954
0
            {
2955
0
            if (Feptr >= mb->end_subject)
2956
0
              {
2957
0
              SCHECK_PARTIAL();
2958
0
              RRETURN(MATCH_NOMATCH);
2959
0
              }
2960
0
            GETCHARINCTEST(fc, Feptr);
2961
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
2962
0
              RRETURN(MATCH_NOMATCH);
2963
0
            }
2964
0
          break;
2965
2966
0
          case PT_SCX:
2967
0
          for (i = 1; i <= Lmin; i++)
2968
0
            {
2969
0
            BOOL ok;
2970
0
            const ucd_record *prop;
2971
0
            if (Feptr >= mb->end_subject)
2972
0
              {
2973
0
              SCHECK_PARTIAL();
2974
0
              RRETURN(MATCH_NOMATCH);
2975
0
              }
2976
0
            GETCHARINCTEST(fc, Feptr);
2977
0
            prop = GET_UCD(fc);
2978
0
            ok = (prop->script == Lpropvalue ||
2979
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
2980
0
            if (ok == notmatch)
2981
0
              RRETURN(MATCH_NOMATCH);
2982
0
            }
2983
0
          break;
2984
2985
0
          case PT_ALNUM:
2986
0
          for (i = 1; i <= Lmin; i++)
2987
0
            {
2988
0
            int category;
2989
0
            if (Feptr >= mb->end_subject)
2990
0
              {
2991
0
              SCHECK_PARTIAL();
2992
0
              RRETURN(MATCH_NOMATCH);
2993
0
              }
2994
0
            GETCHARINCTEST(fc, Feptr);
2995
0
            category = UCD_CATEGORY(fc);
2996
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
2997
0
              RRETURN(MATCH_NOMATCH);
2998
0
            }
2999
0
          break;
3000
3001
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3002
          which means that Perl space and POSIX space are now identical. PCRE
3003
          was changed at release 8.34. */
3004
3005
1.39M
          case PT_SPACE:    /* Perl space */
3006
1.39M
          case PT_PXSPACE:  /* POSIX space */
3007
2.02M
          for (i = 1; i <= Lmin; i++)
3008
1.39M
            {
3009
1.39M
            if (Feptr >= mb->end_subject)
3010
9.21k
              {
3011
9.21k
              SCHECK_PARTIAL();
3012
9.21k
              RRETURN(MATCH_NOMATCH);
3013
9.21k
              }
3014
1.39M
            GETCHARINCTEST(fc, Feptr);
3015
1.39M
            switch(fc)
3016
1.39M
              {
3017
5.97M
              HSPACE_CASES:
3018
5.97M
              VSPACE_CASES:
3019
2.79M
              if (notmatch) RRETURN(MATCH_NOMATCH);
3020
342k
              break;
3021
3022
988k
              default:
3023
988k
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
3024
703k
                RRETURN(MATCH_NOMATCH);
3025
284k
              break;
3026
1.39M
              }
3027
1.39M
            }
3028
627k
          break;
3029
3030
627k
          case PT_WORD:
3031
99.3k
          for (i = 1; i <= Lmin; i++)
3032
62.9k
            {
3033
62.9k
            int chartype, category;
3034
62.9k
            if (Feptr >= mb->end_subject)
3035
1.15k
              {
3036
1.15k
              SCHECK_PARTIAL();
3037
1.15k
              RRETURN(MATCH_NOMATCH);
3038
1.15k
              }
3039
61.7k
            GETCHARINCTEST(fc, Feptr);
3040
61.7k
            chartype = UCD_CHARTYPE(fc);
3041
61.7k
            category = PRIV(ucp_gentype)[chartype];
3042
61.7k
            if ((category == ucp_L || category == ucp_N ||
3043
61.7k
                 chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
3044
25.2k
              RRETURN(MATCH_NOMATCH);
3045
61.7k
            }
3046
36.4k
          break;
3047
3048
36.4k
          case PT_CLIST:
3049
0
          for (i = 1; i <= Lmin; i++)
3050
0
            {
3051
0
            const uint32_t *cp;
3052
0
            if (Feptr >= mb->end_subject)
3053
0
              {
3054
0
              SCHECK_PARTIAL();
3055
0
              RRETURN(MATCH_NOMATCH);
3056
0
              }
3057
0
            GETCHARINCTEST(fc, Feptr);
3058
#if PCRE2_CODE_UNIT_WIDTH == 32
3059
            if (fc > MAX_UTF_CODE_POINT)
3060
              {
3061
              if (notmatch) continue;
3062
              RRETURN(MATCH_NOMATCH);
3063
              }
3064
#endif
3065
0
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3066
0
            for (;;)
3067
0
              {
3068
0
              if (fc < *cp)
3069
0
                {
3070
0
                if (notmatch) break;
3071
0
                RRETURN(MATCH_NOMATCH);
3072
0
                }
3073
0
              if (fc == *cp++)
3074
0
                {
3075
0
                if (notmatch) RRETURN(MATCH_NOMATCH);
3076
0
                break;
3077
0
                }
3078
0
              }
3079
0
            }
3080
0
          break;
3081
3082
0
          case PT_UCNC:
3083
0
          for (i = 1; i <= Lmin; i++)
3084
0
            {
3085
0
            if (Feptr >= mb->end_subject)
3086
0
              {
3087
0
              SCHECK_PARTIAL();
3088
0
              RRETURN(MATCH_NOMATCH);
3089
0
              }
3090
0
            GETCHARINCTEST(fc, Feptr);
3091
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3092
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3093
0
                 fc >= 0xe000) == notmatch)
3094
0
              RRETURN(MATCH_NOMATCH);
3095
0
            }
3096
0
          break;
3097
3098
0
          case PT_BIDICL:
3099
0
          for (i = 1; i <= Lmin; i++)
3100
0
            {
3101
0
            if (Feptr >= mb->end_subject)
3102
0
              {
3103
0
              SCHECK_PARTIAL();
3104
0
              RRETURN(MATCH_NOMATCH);
3105
0
              }
3106
0
            GETCHARINCTEST(fc, Feptr);
3107
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
3108
0
              RRETURN(MATCH_NOMATCH);
3109
0
            }
3110
0
          break;
3111
3112
0
          case PT_BOOL:
3113
0
          for (i = 1; i <= Lmin; i++)
3114
0
            {
3115
0
            BOOL ok;
3116
0
            const ucd_record *prop;
3117
0
            if (Feptr >= mb->end_subject)
3118
0
              {
3119
0
              SCHECK_PARTIAL();
3120
0
              RRETURN(MATCH_NOMATCH);
3121
0
              }
3122
0
            GETCHARINCTEST(fc, Feptr);
3123
0
            prop = GET_UCD(fc);
3124
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3125
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3126
0
            if (ok == notmatch)
3127
0
              RRETURN(MATCH_NOMATCH);
3128
0
            }
3129
0
          break;
3130
3131
          /* This should not occur */
3132
3133
0
          default:
3134
0
          PCRE2_DEBUG_UNREACHABLE();
3135
0
          return PCRE2_ERROR_INTERNAL;
3136
1.80M
          }
3137
1.80M
        }
3138
3139
      /* Match extended Unicode sequences. We will get here only if the
3140
      support is in the binary; otherwise a compile-time error occurs. */
3141
3142
8.68M
      else if (Lctype == OP_EXTUNI)
3143
45.7k
        {
3144
91.4k
        for (i = 1; i <= Lmin; i++)
3145
45.7k
          {
3146
45.7k
          if (Feptr >= mb->end_subject)
3147
0
            {
3148
0
            SCHECK_PARTIAL();
3149
0
            RRETURN(MATCH_NOMATCH);
3150
0
            }
3151
45.7k
          else
3152
45.7k
            {
3153
45.7k
            GETCHARINCTEST(fc, Feptr);
3154
45.7k
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject,
3155
45.7k
              mb->end_subject, utf, NULL);
3156
45.7k
            }
3157
45.7k
          CHECK_PARTIAL();
3158
45.7k
          }
3159
45.7k
        }
3160
8.64M
      else
3161
8.64M
#endif     /* SUPPORT_UNICODE */
3162
3163
/* Handle all other cases in UTF mode */
3164
3165
8.64M
#ifdef SUPPORT_UNICODE
3166
8.64M
      if (utf) switch(Lctype)
3167
4.15M
        {
3168
773
        case OP_ANY:
3169
1.54k
        for (i = 1; i <= Lmin; i++)
3170
773
          {
3171
773
          if (Feptr >= mb->end_subject)
3172
0
            {
3173
0
            SCHECK_PARTIAL();
3174
0
            RRETURN(MATCH_NOMATCH);
3175
0
            }
3176
773
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3177
769
          if (mb->partial != 0 &&
3178
769
              Feptr + 1 >= mb->end_subject &&
3179
769
              NLBLOCK->nltype == NLTYPE_FIXED &&
3180
769
              NLBLOCK->nllen == 2 &&
3181
769
              UCHAR21(Feptr) == NLBLOCK->nl[0])
3182
0
            {
3183
0
            mb->hitend = TRUE;
3184
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3185
0
            }
3186
769
          Feptr++;
3187
769
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3188
769
          }
3189
769
        break;
3190
3191
4.05M
        case OP_ALLANY:
3192
8.08M
        for (i = 1; i <= Lmin; i++)
3193
4.05M
          {
3194
4.05M
          if (Feptr >= mb->end_subject)
3195
18.3k
            {
3196
18.3k
            SCHECK_PARTIAL();
3197
18.3k
            RRETURN(MATCH_NOMATCH);
3198
18.3k
            }
3199
4.03M
          Feptr++;
3200
4.03M
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3201
4.03M
          }
3202
4.03M
        break;
3203
3204
4.03M
        case OP_ANYBYTE:
3205
49.0k
        if (Feptr > mb->end_subject - Lmin) RRETURN(MATCH_NOMATCH);
3206
49.0k
        Feptr += Lmin;
3207
49.0k
        break;
3208
3209
0
        case OP_ANYNL:
3210
0
        for (i = 1; i <= Lmin; i++)
3211
0
          {
3212
0
          if (Feptr >= mb->end_subject)
3213
0
            {
3214
0
            SCHECK_PARTIAL();
3215
0
            RRETURN(MATCH_NOMATCH);
3216
0
            }
3217
0
          GETCHARINC(fc, Feptr);
3218
0
          switch(fc)
3219
0
            {
3220
0
            default: RRETURN(MATCH_NOMATCH);
3221
3222
0
            case CHAR_CR:
3223
0
            if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
3224
0
            break;
3225
3226
0
            case CHAR_LF:
3227
0
            break;
3228
3229
0
            case CHAR_VT:
3230
0
            case CHAR_FF:
3231
0
            case CHAR_NEL:
3232
0
#ifndef EBCDIC
3233
0
            case 0x2028:
3234
0
            case 0x2029:
3235
0
#endif  /* Not EBCDIC */
3236
0
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3237
0
            break;
3238
0
            }
3239
0
          }
3240
0
        break;
3241
3242
30.3k
        case OP_NOT_HSPACE:
3243
57.9k
        for (i = 1; i <= Lmin; i++)
3244
30.3k
          {
3245
30.3k
          if (Feptr >= mb->end_subject)
3246
5
            {
3247
5
            SCHECK_PARTIAL();
3248
5
            RRETURN(MATCH_NOMATCH);
3249
5
            }
3250
30.3k
          GETCHARINC(fc, Feptr);
3251
30.3k
          switch(fc)
3252
30.3k
            {
3253
44.3k
            HSPACE_CASES: RRETURN(MATCH_NOMATCH);
3254
27.6k
            default: break;
3255
30.3k
            }
3256
30.3k
          }
3257
27.6k
        break;
3258
3259
27.6k
        case OP_HSPACE:
3260
0
        for (i = 1; i <= Lmin; i++)
3261
0
          {
3262
0
          if (Feptr >= mb->end_subject)
3263
0
            {
3264
0
            SCHECK_PARTIAL();
3265
0
            RRETURN(MATCH_NOMATCH);
3266
0
            }
3267
0
          GETCHARINC(fc, Feptr);
3268
0
          switch(fc)
3269
0
            {
3270
0
            HSPACE_CASES: break;
3271
0
            default: RRETURN(MATCH_NOMATCH);
3272
0
            }
3273
0
          }
3274
0
        break;
3275
3276
18.8k
        case OP_NOT_VSPACE:
3277
36.4k
        for (i = 1; i <= Lmin; i++)
3278
18.8k
          {
3279
18.8k
          if (Feptr >= mb->end_subject)
3280
0
            {
3281
0
            SCHECK_PARTIAL();
3282
0
            RRETURN(MATCH_NOMATCH);
3283
0
            }
3284
18.8k
          GETCHARINC(fc, Feptr);
3285
18.8k
          switch(fc)
3286
18.8k
            {
3287
8.45k
            VSPACE_CASES: RRETURN(MATCH_NOMATCH);
3288
17.5k
            default: break;
3289
18.8k
            }
3290
18.8k
          }
3291
17.5k
        break;
3292
3293
17.5k
        case OP_VSPACE:
3294
2.64k
        for (i = 1; i <= Lmin; i++)
3295
2.51k
          {
3296
2.51k
          if (Feptr >= mb->end_subject)
3297
0
            {
3298
0
            SCHECK_PARTIAL();
3299
0
            RRETURN(MATCH_NOMATCH);
3300
0
            }
3301
2.51k
          GETCHARINC(fc, Feptr);
3302
2.51k
          switch(fc)
3303
2.51k
            {
3304
132
            VSPACE_CASES: break;
3305
2.38k
            default: RRETURN(MATCH_NOMATCH);
3306
2.51k
            }
3307
2.51k
          }
3308
132
        break;
3309
3310
132
        case OP_NOT_DIGIT:
3311
0
        for (i = 1; i <= Lmin; i++)
3312
0
          {
3313
0
          if (Feptr >= mb->end_subject)
3314
0
            {
3315
0
            SCHECK_PARTIAL();
3316
0
            RRETURN(MATCH_NOMATCH);
3317
0
            }
3318
0
          GETCHARINC(fc, Feptr);
3319
0
          if (fc < 128 && (mb->ctypes[fc] & ctype_digit) != 0)
3320
0
            RRETURN(MATCH_NOMATCH);
3321
0
          }
3322
0
        break;
3323
3324
0
        case OP_DIGIT:
3325
0
        for (i = 1; i <= Lmin; i++)
3326
0
          {
3327
0
          uint32_t cc;
3328
0
          if (Feptr >= mb->end_subject)
3329
0
            {
3330
0
            SCHECK_PARTIAL();
3331
0
            RRETURN(MATCH_NOMATCH);
3332
0
            }
3333
0
          cc = UCHAR21(Feptr);
3334
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_digit) == 0)
3335
0
            RRETURN(MATCH_NOMATCH);
3336
0
          Feptr++;
3337
          /* No need to skip more code units - we know it has only one. */
3338
0
          }
3339
0
        break;
3340
3341
0
        case OP_NOT_WHITESPACE:
3342
0
        for (i = 1; i <= Lmin; i++)
3343
0
          {
3344
0
          uint32_t cc;
3345
0
          if (Feptr >= mb->end_subject)
3346
0
            {
3347
0
            SCHECK_PARTIAL();
3348
0
            RRETURN(MATCH_NOMATCH);
3349
0
            }
3350
0
          cc = UCHAR21(Feptr);
3351
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0)
3352
0
            RRETURN(MATCH_NOMATCH);
3353
0
          Feptr++;
3354
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3355
0
          }
3356
0
        break;
3357
3358
0
        case OP_WHITESPACE:
3359
0
        for (i = 1; i <= Lmin; i++)
3360
0
          {
3361
0
          uint32_t cc;
3362
0
          if (Feptr >= mb->end_subject)
3363
0
            {
3364
0
            SCHECK_PARTIAL();
3365
0
            RRETURN(MATCH_NOMATCH);
3366
0
            }
3367
0
          cc = UCHAR21(Feptr);
3368
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_space) == 0)
3369
0
            RRETURN(MATCH_NOMATCH);
3370
0
          Feptr++;
3371
          /* No need to skip more code units - we know it has only one. */
3372
0
          }
3373
0
        break;
3374
3375
0
        case OP_NOT_WORDCHAR:
3376
0
        for (i = 1; i <= Lmin; i++)
3377
0
          {
3378
0
          uint32_t cc;
3379
0
          if (Feptr >= mb->end_subject)
3380
0
            {
3381
0
            SCHECK_PARTIAL();
3382
0
            RRETURN(MATCH_NOMATCH);
3383
0
            }
3384
0
          cc = UCHAR21(Feptr);
3385
0
          if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0)
3386
0
            RRETURN(MATCH_NOMATCH);
3387
0
          Feptr++;
3388
0
          ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
3389
0
          }
3390
0
        break;
3391
3392
0
        case OP_WORDCHAR:
3393
0
        for (i = 1; i <= Lmin; i++)
3394
0
          {
3395
0
          uint32_t cc;
3396
0
          if (Feptr >= mb->end_subject)
3397
0
            {
3398
0
            SCHECK_PARTIAL();
3399
0
            RRETURN(MATCH_NOMATCH);
3400
0
            }
3401
0
          cc = UCHAR21(Feptr);
3402
0
          if (cc >= 128 || (mb->ctypes[cc] & ctype_word) == 0)
3403
0
            RRETURN(MATCH_NOMATCH);
3404
0
          Feptr++;
3405
          /* No need to skip more code units - we know it has only one. */
3406
0
          }
3407
0
        break;
3408
3409
0
        default:
3410
0
        PCRE2_DEBUG_UNREACHABLE();
3411
0
        return PCRE2_ERROR_INTERNAL;
3412
4.15M
        }  /* End switch(Lctype) */
3413
3414
4.48M
      else
3415
4.48M
#endif     /* SUPPORT_UNICODE */
3416
3417
      /* Code for the non-UTF case for minimum matching of operators other
3418
      than OP_PROP and OP_NOTPROP. */
3419
3420
4.48M
      switch(Lctype)
3421
4.48M
        {
3422
72.7k
        case OP_ANY:
3423
144k
        for (i = 1; i <= Lmin; i++)
3424
72.7k
          {
3425
72.7k
          if (Feptr >= mb->end_subject)
3426
0
            {
3427
0
            SCHECK_PARTIAL();
3428
0
            RRETURN(MATCH_NOMATCH);
3429
0
            }
3430
72.7k
          if (IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3431
71.4k
          if (mb->partial != 0 &&
3432
71.4k
              Feptr + 1 >= mb->end_subject &&
3433
71.4k
              NLBLOCK->nltype == NLTYPE_FIXED &&
3434
71.4k
              NLBLOCK->nllen == 2 &&
3435
71.4k
              *Feptr == NLBLOCK->nl[0])
3436
0
            {
3437
0
            mb->hitend = TRUE;
3438
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
3439
0
            }
3440
71.4k
          Feptr++;
3441
71.4k
          }
3442
71.4k
        break;
3443
3444
71.4k
        case OP_ALLANY:
3445
11.3k
        if (Feptr > mb->end_subject - Lmin)
3446
120
          {
3447
120
          SCHECK_PARTIAL();
3448
120
          RRETURN(MATCH_NOMATCH);
3449
120
          }
3450
11.1k
        Feptr += Lmin;
3451
11.1k
        break;
3452
3453
        /* This OP_ANYBYTE case will never be reached because \C gets turned
3454
        into OP_ALLANY in non-UTF mode. Cut out the code so that coverage
3455
        reports don't complain about it's never being used. */
3456
3457
/*        case OP_ANYBYTE:
3458
*        if (Feptr > mb->end_subject - Lmin)
3459
*          {
3460
*          SCHECK_PARTIAL();
3461
*          RRETURN(MATCH_NOMATCH);
3462
*          }
3463
*        Feptr += Lmin;
3464
*        break;
3465
*/
3466
3.17M
        case OP_ANYNL:
3467
3.22M
        for (i = 1; i <= Lmin; i++)
3468
3.17M
          {
3469
3.17M
          if (Feptr >= mb->end_subject)
3470
6.10k
            {
3471
6.10k
            SCHECK_PARTIAL();
3472
6.10k
            RRETURN(MATCH_NOMATCH);
3473
6.10k
            }
3474
3.16M
          switch(*Feptr++)
3475
3.16M
            {
3476
3.12M
            default: RRETURN(MATCH_NOMATCH);
3477
3478
13.1k
            case CHAR_CR:
3479
13.1k
            if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
3480
13.1k
            break;
3481
3482
19.0k
            case CHAR_LF:
3483
19.0k
            break;
3484
3485
5.15k
            case CHAR_VT:
3486
12.9k
            case CHAR_FF:
3487
13.8k
            case CHAR_NEL:
3488
#if PCRE2_CODE_UNIT_WIDTH != 8
3489
            case 0x2028:
3490
            case 0x2029:
3491
#endif
3492
13.8k
            if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) RRETURN(MATCH_NOMATCH);
3493
13.8k
            break;
3494
3.16M
            }
3495
3.16M
          }
3496
45.9k
        break;
3497
3498
45.9k
        case OP_NOT_HSPACE:
3499
50.1k
        for (i = 1; i <= Lmin; i++)
3500
25.4k
          {
3501
25.4k
          if (Feptr >= mb->end_subject)
3502
0
            {
3503
0
            SCHECK_PARTIAL();
3504
0
            RRETURN(MATCH_NOMATCH);
3505
0
            }
3506
25.4k
          switch(*Feptr++)
3507
25.4k
            {
3508
24.6k
            default: break;
3509
24.6k
            HSPACE_BYTE_CASES:
3510
#if PCRE2_CODE_UNIT_WIDTH != 8
3511
            HSPACE_MULTIBYTE_CASES:
3512
#endif
3513
1.70k
            RRETURN(MATCH_NOMATCH);
3514
25.4k
            }
3515
25.4k
          }
3516
24.6k
        break;
3517
3518
24.6k
        case OP_HSPACE:
3519
2.16k
        for (i = 1; i <= Lmin; i++)
3520
1.28k
          {
3521
1.28k
          if (Feptr >= mb->end_subject)
3522
0
            {
3523
0
            SCHECK_PARTIAL();
3524
0
            RRETURN(MATCH_NOMATCH);
3525
0
            }
3526
1.28k
          switch(*Feptr++)
3527
1.28k
            {
3528
405
            default: RRETURN(MATCH_NOMATCH);
3529
2.58k
            HSPACE_BYTE_CASES:
3530
#if PCRE2_CODE_UNIT_WIDTH != 8
3531
            HSPACE_MULTIBYTE_CASES:
3532
#endif
3533
2.58k
            break;
3534
1.28k
            }
3535
1.28k
          }
3536
882
        break;
3537
3538
746k
        case OP_NOT_VSPACE:
3539
1.47M
        for (i = 1; i <= Lmin; i++)
3540
746k
          {
3541
746k
          if (Feptr >= mb->end_subject)
3542
4.03k
            {
3543
4.03k
            SCHECK_PARTIAL();
3544
4.03k
            RRETURN(MATCH_NOMATCH);
3545
4.03k
            }
3546
742k
          switch(*Feptr++)
3547
742k
            {
3548
66.6k
            VSPACE_BYTE_CASES:
3549
#if PCRE2_CODE_UNIT_WIDTH != 8
3550
            VSPACE_MULTIBYTE_CASES:
3551
#endif
3552
66.6k
            RRETURN(MATCH_NOMATCH);
3553
723k
            default: break;
3554
742k
            }
3555
742k
          }
3556
723k
        break;
3557
3558
723k
        case OP_VSPACE:
3559
98
        for (i = 1; i <= Lmin; i++)
3560
82
          {
3561
82
          if (Feptr >= mb->end_subject)
3562
0
            {
3563
0
            SCHECK_PARTIAL();
3564
0
            RRETURN(MATCH_NOMATCH);
3565
0
            }
3566
82
          switch(*Feptr++)
3567
82
            {
3568
66
            default: RRETURN(MATCH_NOMATCH);
3569
80
            VSPACE_BYTE_CASES:
3570
#if PCRE2_CODE_UNIT_WIDTH != 8
3571
            VSPACE_MULTIBYTE_CASES:
3572
#endif
3573
80
            break;
3574
82
            }
3575
82
          }
3576
16
        break;
3577
3578
96.8k
        case OP_NOT_DIGIT:
3579
159k
        for (i = 1; i <= Lmin; i++)
3580
96.8k
          {
3581
96.8k
          if (Feptr >= mb->end_subject)
3582
4.32k
            {
3583
4.32k
            SCHECK_PARTIAL();
3584
4.32k
            RRETURN(MATCH_NOMATCH);
3585
4.32k
            }
3586
92.4k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
3587
30.2k
            RRETURN(MATCH_NOMATCH);
3588
62.2k
          Feptr++;
3589
62.2k
          }
3590
62.2k
        break;
3591
3592
62.2k
        case OP_DIGIT:
3593
0
        for (i = 1; i <= Lmin; i++)
3594
0
          {
3595
0
          if (Feptr >= mb->end_subject)
3596
0
            {
3597
0
            SCHECK_PARTIAL();
3598
0
            RRETURN(MATCH_NOMATCH);
3599
0
            }
3600
0
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
3601
0
            RRETURN(MATCH_NOMATCH);
3602
0
          Feptr++;
3603
0
          }
3604
0
        break;
3605
3606
291k
        case OP_NOT_WHITESPACE:
3607
576k
        for (i = 1; i <= Lmin; i++)
3608
291k
          {
3609
291k
          if (Feptr >= mb->end_subject)
3610
2.26k
            {
3611
2.26k
            SCHECK_PARTIAL();
3612
2.26k
            RRETURN(MATCH_NOMATCH);
3613
2.26k
            }
3614
289k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
3615
4.80k
            RRETURN(MATCH_NOMATCH);
3616
284k
          Feptr++;
3617
284k
          }
3618
284k
        break;
3619
3620
284k
        case OP_WHITESPACE:
3621
68
        for (i = 1; i <= Lmin; i++)
3622
58
          {
3623
58
          if (Feptr >= mb->end_subject)
3624
0
            {
3625
0
            SCHECK_PARTIAL();
3626
0
            RRETURN(MATCH_NOMATCH);
3627
0
            }
3628
58
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
3629
48
            RRETURN(MATCH_NOMATCH);
3630
10
          Feptr++;
3631
10
          }
3632
10
        break;
3633
3634
32.8k
        case OP_NOT_WORDCHAR:
3635
58.4k
        for (i = 1; i <= Lmin; i++)
3636
32.8k
          {
3637
32.8k
          if (Feptr >= mb->end_subject)
3638
237
            {
3639
237
            SCHECK_PARTIAL();
3640
237
            RRETURN(MATCH_NOMATCH);
3641
237
            }
3642
32.5k
          if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
3643
7.01k
            RRETURN(MATCH_NOMATCH);
3644
25.5k
          Feptr++;
3645
25.5k
          }
3646
25.5k
        break;
3647
3648
30.5k
        case OP_WORDCHAR:
3649
50.1k
        for (i = 1; i <= Lmin; i++)
3650
30.5k
          {
3651
30.5k
          if (Feptr >= mb->end_subject)
3652
21
            {
3653
21
            SCHECK_PARTIAL();
3654
21
            RRETURN(MATCH_NOMATCH);
3655
21
            }
3656
30.5k
          if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
3657
11.0k
            RRETURN(MATCH_NOMATCH);
3658
19.5k
          Feptr++;
3659
19.5k
          }
3660
19.5k
        break;
3661
3662
19.5k
        default:
3663
0
        PCRE2_DEBUG_UNREACHABLE();
3664
0
        return PCRE2_ERROR_INTERNAL;
3665
4.48M
        }
3666
10.4M
      }
3667
3668
    /* If Lmin = Lmax we are done. Continue with the main loop. */
3669
3670
60.3M
    if (Lmin == Lmax) continue;
3671
3672
    /* If minimizing, we have to test the rest of the pattern before each
3673
    subsequent match. This means we cannot use a local "notmatch" variable as
3674
    in the other cases. As all 4 temporary 32-bit values in the frame are
3675
    already in use, just test the type each time. */
3676
3677
60.3M
    if (reptype == REPTYPE_MIN)
3678
474k
      {
3679
474k
#ifdef SUPPORT_UNICODE
3680
474k
      if (proptype >= 0)
3681
31.2k
        {
3682
31.2k
        switch(proptype)
3683
31.2k
          {
3684
0
          case PT_LAMP:
3685
0
          for (;;)
3686
0
            {
3687
0
            int chartype;
3688
0
            RMATCH(Fecode, RM208);
3689
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3690
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3691
0
            if (Feptr >= mb->end_subject)
3692
0
              {
3693
0
              SCHECK_PARTIAL();
3694
0
              RRETURN(MATCH_NOMATCH);
3695
0
              }
3696
0
            GETCHARINCTEST(fc, Feptr);
3697
0
            chartype = UCD_CHARTYPE(fc);
3698
0
            if ((chartype == ucp_Lu ||
3699
0
                 chartype == ucp_Ll ||
3700
0
                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
3701
0
              RRETURN(MATCH_NOMATCH);
3702
0
            }
3703
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3704
3705
6.75k
          case PT_GC:
3706
6.75k
          for (;;)
3707
127k
            {
3708
127k
            RMATCH(Fecode, RM209);
3709
127k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3710
127k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3711
127k
            if (Feptr >= mb->end_subject)
3712
744
              {
3713
744
              SCHECK_PARTIAL();
3714
744
              RRETURN(MATCH_NOMATCH);
3715
744
              }
3716
126k
            GETCHARINCTEST(fc, Feptr);
3717
126k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3718
6.01k
              RRETURN(MATCH_NOMATCH);
3719
126k
            }
3720
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3721
3722
0
          case PT_PC:
3723
0
          for (;;)
3724
0
            {
3725
0
            RMATCH(Fecode, RM210);
3726
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3727
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3728
0
            if (Feptr >= mb->end_subject)
3729
0
              {
3730
0
              SCHECK_PARTIAL();
3731
0
              RRETURN(MATCH_NOMATCH);
3732
0
              }
3733
0
            GETCHARINCTEST(fc, Feptr);
3734
0
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3735
0
              RRETURN(MATCH_NOMATCH);
3736
0
            }
3737
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3738
3739
0
          case PT_SC:
3740
0
          for (;;)
3741
0
            {
3742
0
            RMATCH(Fecode, RM211);
3743
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3744
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3745
0
            if (Feptr >= mb->end_subject)
3746
0
              {
3747
0
              SCHECK_PARTIAL();
3748
0
              RRETURN(MATCH_NOMATCH);
3749
0
              }
3750
0
            GETCHARINCTEST(fc, Feptr);
3751
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3752
0
              RRETURN(MATCH_NOMATCH);
3753
0
            }
3754
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3755
3756
0
          case PT_SCX:
3757
0
          for (;;)
3758
0
            {
3759
0
            BOOL ok;
3760
0
            const ucd_record *prop;
3761
0
            RMATCH(Fecode, RM224);
3762
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3763
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3764
0
            if (Feptr >= mb->end_subject)
3765
0
              {
3766
0
              SCHECK_PARTIAL();
3767
0
              RRETURN(MATCH_NOMATCH);
3768
0
              }
3769
0
            GETCHARINCTEST(fc, Feptr);
3770
0
            prop = GET_UCD(fc);
3771
0
            ok = (prop->script == Lpropvalue
3772
0
                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
3773
0
            if (ok == (Lctype == OP_NOTPROP))
3774
0
              RRETURN(MATCH_NOMATCH);
3775
0
            }
3776
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3777
3778
0
          case PT_ALNUM:
3779
0
          for (;;)
3780
0
            {
3781
0
            int category;
3782
0
            RMATCH(Fecode, RM212);
3783
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3784
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3785
0
            if (Feptr >= mb->end_subject)
3786
0
              {
3787
0
              SCHECK_PARTIAL();
3788
0
              RRETURN(MATCH_NOMATCH);
3789
0
              }
3790
0
            GETCHARINCTEST(fc, Feptr);
3791
0
            category = UCD_CATEGORY(fc);
3792
0
            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
3793
0
              RRETURN(MATCH_NOMATCH);
3794
0
            }
3795
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3796
3797
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
3798
          which means that Perl space and POSIX space are now identical. PCRE
3799
          was changed at release 8.34. */
3800
3801
22.4k
          case PT_SPACE:    /* Perl space */
3802
22.4k
          case PT_PXSPACE:  /* POSIX space */
3803
22.4k
          for (;;)
3804
40.0k
            {
3805
40.0k
            RMATCH(Fecode, RM213);
3806
40.0k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807
40.0k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3808
22.4k
            if (Feptr >= mb->end_subject)
3809
0
              {
3810
0
              SCHECK_PARTIAL();
3811
0
              RRETURN(MATCH_NOMATCH);
3812
0
              }
3813
22.4k
            GETCHARINCTEST(fc, Feptr);
3814
22.4k
            switch(fc)
3815
22.4k
              {
3816
54.9k
              HSPACE_CASES:
3817
54.9k
              VSPACE_CASES:
3818
32.8k
              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3819
0
              break;
3820
3821
17.5k
              default:
3822
17.5k
              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
3823
0
                RRETURN(MATCH_NOMATCH);
3824
17.5k
              break;
3825
22.4k
              }
3826
22.4k
            }
3827
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3828
3829
0
          case PT_WORD:
3830
0
          for (;;)
3831
0
            {
3832
0
            int chartype, category;
3833
0
            RMATCH(Fecode, RM214);
3834
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3835
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3836
0
            if (Feptr >= mb->end_subject)
3837
0
              {
3838
0
              SCHECK_PARTIAL();
3839
0
              RRETURN(MATCH_NOMATCH);
3840
0
              }
3841
0
            GETCHARINCTEST(fc, Feptr);
3842
0
            chartype = UCD_CHARTYPE(fc);
3843
0
            category = PRIV(ucp_gentype)[chartype];
3844
0
            if ((category == ucp_L ||
3845
0
                 category == ucp_N ||
3846
0
                 chartype == ucp_Mn ||
3847
0
                 chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
3848
0
              RRETURN(MATCH_NOMATCH);
3849
0
            }
3850
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3851
3852
2.04k
          case PT_CLIST:
3853
2.04k
          for (;;)
3854
2.05k
            {
3855
2.05k
            const uint32_t *cp;
3856
2.05k
            RMATCH(Fecode, RM215);
3857
2.05k
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858
2.05k
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3859
2.04k
            if (Feptr >= mb->end_subject)
3860
0
              {
3861
0
              SCHECK_PARTIAL();
3862
0
              RRETURN(MATCH_NOMATCH);
3863
0
              }
3864
2.04k
            GETCHARINCTEST(fc, Feptr);
3865
#if PCRE2_CODE_UNIT_WIDTH == 32
3866
            if (fc > MAX_UTF_CODE_POINT)
3867
              {
3868
              if (Lctype == OP_NOTPROP) continue;
3869
              RRETURN(MATCH_NOMATCH);
3870
              }
3871
#endif
3872
2.04k
            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
3873
2.04k
            for (;;)
3874
3.08k
              {
3875
3.08k
              if (fc < *cp)
3876
2.04k
                {
3877
2.04k
                if (Lctype == OP_NOTPROP) break;
3878
2.04k
                RRETURN(MATCH_NOMATCH);
3879
2.04k
                }
3880
1.04k
              if (fc == *cp++)
3881
6
                {
3882
6
                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
3883
6
                break;
3884
6
                }
3885
1.04k
              }
3886
2.04k
            }
3887
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3888
3889
0
          case PT_UCNC:
3890
0
          for (;;)
3891
0
            {
3892
0
            RMATCH(Fecode, RM216);
3893
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3894
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3895
0
            if (Feptr >= mb->end_subject)
3896
0
              {
3897
0
              SCHECK_PARTIAL();
3898
0
              RRETURN(MATCH_NOMATCH);
3899
0
              }
3900
0
            GETCHARINCTEST(fc, Feptr);
3901
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
3902
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
3903
0
                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
3904
0
              RRETURN(MATCH_NOMATCH);
3905
0
            }
3906
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3907
3908
0
          case PT_BIDICL:
3909
0
          for (;;)
3910
0
            {
3911
0
            RMATCH(Fecode, RM223);
3912
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3913
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3914
0
            if (Feptr >= mb->end_subject)
3915
0
              {
3916
0
              SCHECK_PARTIAL();
3917
0
              RRETURN(MATCH_NOMATCH);
3918
0
              }
3919
0
            GETCHARINCTEST(fc, Feptr);
3920
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
3921
0
              RRETURN(MATCH_NOMATCH);
3922
0
            }
3923
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3924
3925
0
          case PT_BOOL:
3926
0
          for (;;)
3927
0
            {
3928
0
            BOOL ok;
3929
0
            const ucd_record *prop;
3930
0
            RMATCH(Fecode, RM222);
3931
0
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3932
0
            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3933
0
            if (Feptr >= mb->end_subject)
3934
0
              {
3935
0
              SCHECK_PARTIAL();
3936
0
              RRETURN(MATCH_NOMATCH);
3937
0
              }
3938
0
            GETCHARINCTEST(fc, Feptr);
3939
0
            prop = GET_UCD(fc);
3940
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
3941
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
3942
0
            if (ok == (Lctype == OP_NOTPROP))
3943
0
              RRETURN(MATCH_NOMATCH);
3944
0
            }
3945
0
          PCRE2_UNREACHABLE(); /* Control never reaches here */
3946
3947
          /* This should never occur */
3948
0
          default:
3949
0
          PCRE2_DEBUG_UNREACHABLE();
3950
0
          return PCRE2_ERROR_INTERNAL;
3951
31.2k
          }
3952
31.2k
        }
3953
3954
      /* Match extended Unicode sequences. We will get here only if the
3955
      support is in the binary; otherwise a compile-time error occurs. */
3956
3957
443k
      else if (Lctype == OP_EXTUNI)
3958
18.8k
        {
3959
18.8k
        for (;;)
3960
2.92M
          {
3961
2.92M
          RMATCH(Fecode, RM217);
3962
2.92M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3963
2.92M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3964
2.92M
          if (Feptr >= mb->end_subject)
3965
18.2k
            {
3966
18.2k
            SCHECK_PARTIAL();
3967
18.2k
            RRETURN(MATCH_NOMATCH);
3968
18.2k
            }
3969
2.90M
          else
3970
2.90M
            {
3971
2.90M
            GETCHARINCTEST(fc, Feptr);
3972
2.90M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
3973
2.90M
              utf, NULL);
3974
2.90M
            }
3975
2.90M
          CHECK_PARTIAL();
3976
2.90M
          }
3977
18.8k
        }
3978
424k
      else
3979
424k
#endif     /* SUPPORT_UNICODE */
3980
3981
      /* UTF mode for non-property testing character types. */
3982
3983
424k
#ifdef SUPPORT_UNICODE
3984
424k
      if (utf)
3985
94.3k
        {
3986
94.3k
        for (;;)
3987
6.39M
          {
3988
6.39M
          RMATCH(Fecode, RM218);
3989
6.39M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3990
6.39M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
3991
6.39M
          if (Feptr >= mb->end_subject)
3992
63.0k
            {
3993
63.0k
            SCHECK_PARTIAL();
3994
63.0k
            RRETURN(MATCH_NOMATCH);
3995
63.0k
            }
3996
6.32M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr)) RRETURN(MATCH_NOMATCH);
3997
6.32M
          GETCHARINC(fc, Feptr);
3998
6.32M
          switch(Lctype)
3999
6.32M
            {
4000
39.7k
            case OP_ANY:               /* This is the non-NL case */
4001
39.7k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4002
39.7k
                Feptr >= mb->end_subject &&
4003
39.7k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4004
39.7k
                NLBLOCK->nllen == 2 &&
4005
39.7k
                fc == NLBLOCK->nl[0])
4006
0
              {
4007
0
              mb->hitend = TRUE;
4008
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4009
0
              }
4010
39.7k
            break;
4011
4012
1.39M
            case OP_ALLANY:
4013
5.84M
            case OP_ANYBYTE:
4014
5.84M
            break;
4015
4016
0
            case OP_ANYNL:
4017
0
            switch(fc)
4018
0
              {
4019
0
              default: RRETURN(MATCH_NOMATCH);
4020
4021
0
              case CHAR_CR:
4022
0
              if (Feptr < mb->end_subject && UCHAR21(Feptr) == CHAR_LF) Feptr++;
4023
0
              break;
4024
4025
0
              case CHAR_LF:
4026
0
              break;
4027
4028
0
              case CHAR_VT:
4029
0
              case CHAR_FF:
4030
0
              case CHAR_NEL:
4031
0
#ifndef EBCDIC
4032
0
              case 0x2028:
4033
0
              case 0x2029:
4034
0
#endif  /* Not EBCDIC */
4035
0
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4036
0
                RRETURN(MATCH_NOMATCH);
4037
0
              break;
4038
0
              }
4039
0
            break;
4040
4041
443k
            case OP_NOT_HSPACE:
4042
443k
            switch(fc)
4043
443k
              {
4044
317k
              HSPACE_CASES: RRETURN(MATCH_NOMATCH);
4045
420k
              default: break;
4046
443k
              }
4047
420k
            break;
4048
4049
420k
            case OP_HSPACE:
4050
0
            switch(fc)
4051
0
              {
4052
0
              HSPACE_CASES: break;
4053
0
              default: RRETURN(MATCH_NOMATCH);
4054
0
              }
4055
0
            break;
4056
4057
0
            case OP_NOT_VSPACE:
4058
0
            switch(fc)
4059
0
              {
4060
0
              VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4061
0
              default: break;
4062
0
              }
4063
0
            break;
4064
4065
24
            case OP_VSPACE:
4066
24
            switch(fc)
4067
24
              {
4068
9
              VSPACE_CASES: break;
4069
15
              default: RRETURN(MATCH_NOMATCH);
4070
24
              }
4071
9
            break;
4072
4073
9
            case OP_NOT_DIGIT:
4074
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0)
4075
0
              RRETURN(MATCH_NOMATCH);
4076
0
            break;
4077
4078
0
            case OP_DIGIT:
4079
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_digit) == 0)
4080
0
              RRETURN(MATCH_NOMATCH);
4081
0
            break;
4082
4083
0
            case OP_NOT_WHITESPACE:
4084
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0)
4085
0
              RRETURN(MATCH_NOMATCH);
4086
0
            break;
4087
4088
0
            case OP_WHITESPACE:
4089
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_space) == 0)
4090
0
              RRETURN(MATCH_NOMATCH);
4091
0
            break;
4092
4093
0
            case OP_NOT_WORDCHAR:
4094
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0)
4095
0
              RRETURN(MATCH_NOMATCH);
4096
0
            break;
4097
4098
0
            case OP_WORDCHAR:
4099
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0)
4100
0
              RRETURN(MATCH_NOMATCH);
4101
0
            break;
4102
4103
0
            default:
4104
0
            PCRE2_DEBUG_UNREACHABLE();
4105
0
            return PCRE2_ERROR_INTERNAL;
4106
6.32M
            }
4107
6.32M
          }
4108
94.3k
        }
4109
330k
      else
4110
330k
#endif  /* SUPPORT_UNICODE */
4111
4112
      /* Not UTF mode */
4113
330k
        {
4114
330k
        for (;;)
4115
10.9M
          {
4116
10.9M
          RMATCH(Fecode, RM33);
4117
10.9M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4118
10.9M
          if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
4119
10.8M
          if (Feptr >= mb->end_subject)
4120
122k
            {
4121
122k
            SCHECK_PARTIAL();
4122
122k
            RRETURN(MATCH_NOMATCH);
4123
122k
            }
4124
10.7M
          if (Lctype == OP_ANY && IS_NEWLINE(Feptr))
4125
1.31k
            RRETURN(MATCH_NOMATCH);
4126
10.7M
          fc = *Feptr++;
4127
10.7M
          switch(Lctype)
4128
10.7M
            {
4129
354k
            case OP_ANY:               /* This is the non-NL case */
4130
354k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4131
354k
                Feptr >= mb->end_subject &&
4132
354k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4133
354k
                NLBLOCK->nllen == 2 &&
4134
354k
                fc == NLBLOCK->nl[0])
4135
0
              {
4136
0
              mb->hitend = TRUE;
4137
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4138
0
              }
4139
354k
            break;
4140
4141
2.34M
            case OP_ALLANY:
4142
2.34M
            case OP_ANYBYTE:
4143
2.34M
            break;
4144
4145
6.19k
            case OP_ANYNL:
4146
6.19k
            switch(fc)
4147
6.19k
              {
4148
5.54k
              default: RRETURN(MATCH_NOMATCH);
4149
4150
18
              case CHAR_CR:
4151
18
              if (Feptr < mb->end_subject && *Feptr == CHAR_LF) Feptr++;
4152
18
              break;
4153
4154
520
              case CHAR_LF:
4155
520
              break;
4156
4157
45
              case CHAR_VT:
4158
45
              case CHAR_FF:
4159
111
              case CHAR_NEL:
4160
#if PCRE2_CODE_UNIT_WIDTH != 8
4161
              case 0x2028:
4162
              case 0x2029:
4163
#endif
4164
111
              if (mb->bsr_convention == PCRE2_BSR_ANYCRLF)
4165
0
                RRETURN(MATCH_NOMATCH);
4166
111
              break;
4167
6.19k
              }
4168
649
            break;
4169
4170
1.08M
            case OP_NOT_HSPACE:
4171
1.08M
            switch(fc)
4172
1.08M
              {
4173
1.07M
              default: break;
4174
1.07M
              HSPACE_BYTE_CASES:
4175
#if PCRE2_CODE_UNIT_WIDTH != 8
4176
              HSPACE_MULTIBYTE_CASES:
4177
#endif
4178
33.3k
              RRETURN(MATCH_NOMATCH);
4179
1.08M
              }
4180
1.07M
            break;
4181
4182
1.07M
            case OP_HSPACE:
4183
0
            switch(fc)
4184
0
              {
4185
0
              default: RRETURN(MATCH_NOMATCH);
4186
0
              HSPACE_BYTE_CASES:
4187
#if PCRE2_CODE_UNIT_WIDTH != 8
4188
              HSPACE_MULTIBYTE_CASES:
4189
#endif
4190
0
              break;
4191
0
              }
4192
0
            break;
4193
4194
512k
            case OP_NOT_VSPACE:
4195
512k
            switch(fc)
4196
512k
              {
4197
505k
              default: break;
4198
505k
              VSPACE_BYTE_CASES:
4199
#if PCRE2_CODE_UNIT_WIDTH != 8
4200
              VSPACE_MULTIBYTE_CASES:
4201
#endif
4202
25.1k
              RRETURN(MATCH_NOMATCH);
4203
512k
              }
4204
505k
            break;
4205
4206
505k
            case OP_VSPACE:
4207
0
            switch(fc)
4208
0
              {
4209
0
              default: RRETURN(MATCH_NOMATCH);
4210
0
              VSPACE_BYTE_CASES:
4211
#if PCRE2_CODE_UNIT_WIDTH != 8
4212
              VSPACE_MULTIBYTE_CASES:
4213
#endif
4214
0
              break;
4215
0
              }
4216
0
            break;
4217
4218
290k
            case OP_NOT_DIGIT:
4219
290k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_digit) != 0)
4220
53.6k
              RRETURN(MATCH_NOMATCH);
4221
236k
            break;
4222
4223
236k
            case OP_DIGIT:
4224
498
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_digit) == 0)
4225
444
              RRETURN(MATCH_NOMATCH);
4226
54
            break;
4227
4228
5.92M
            case OP_NOT_WHITESPACE:
4229
5.92M
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_space) != 0)
4230
35.7k
              RRETURN(MATCH_NOMATCH);
4231
5.89M
            break;
4232
4233
5.89M
            case OP_WHITESPACE:
4234
0
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_space) == 0)
4235
0
              RRETURN(MATCH_NOMATCH);
4236
0
            break;
4237
4238
177k
            case OP_NOT_WORDCHAR:
4239
177k
            if (MAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0)
4240
18.5k
              RRETURN(MATCH_NOMATCH);
4241
159k
            break;
4242
4243
159k
            case OP_WORDCHAR:
4244
17.4k
            if (!MAX_255(fc) || (mb->ctypes[fc] & ctype_word) == 0)
4245
3.77k
              RRETURN(MATCH_NOMATCH);
4246
13.6k
            break;
4247
4248
13.6k
            default:
4249
0
            PCRE2_DEBUG_UNREACHABLE();
4250
0
            return PCRE2_ERROR_INTERNAL;
4251
10.7M
            }
4252
10.7M
          }
4253
330k
        }
4254
4255
0
      PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
4256
0
      }
4257
4258
    /* If maximizing, it is worth using inline code for speed, doing the type
4259
    test once at the start (i.e. keep it out of the loops). Once again,
4260
    "notmatch" can be an ordinary local variable because the loops do not call
4261
    RMATCH. */
4262
4263
59.9M
    else
4264
59.9M
      {
4265
59.9M
      Lstart_eptr = Feptr;  /* Remember where we started */
4266
4267
59.9M
#ifdef SUPPORT_UNICODE
4268
59.9M
      if (proptype >= 0)
4269
678k
        {
4270
678k
        BOOL notmatch = Lctype == OP_NOTPROP;
4271
678k
        switch(proptype)
4272
678k
          {
4273
0
          case PT_LAMP:
4274
0
          for (i = Lmin; i < Lmax; i++)
4275
0
            {
4276
0
            int chartype;
4277
0
            int len = 1;
4278
0
            if (Feptr >= mb->end_subject)
4279
0
              {
4280
0
              SCHECK_PARTIAL();
4281
0
              break;
4282
0
              }
4283
0
            GETCHARLENTEST(fc, Feptr, len);
4284
0
            chartype = UCD_CHARTYPE(fc);
4285
0
            if ((chartype == ucp_Lu ||
4286
0
                 chartype == ucp_Ll ||
4287
0
                 chartype == ucp_Lt) == notmatch)
4288
0
              break;
4289
0
            Feptr+= len;
4290
0
            }
4291
0
          break;
4292
4293
834
          case PT_GC:
4294
40.5k
          for (i = Lmin; i < Lmax; i++)
4295
40.5k
            {
4296
40.5k
            int len = 1;
4297
40.5k
            if (Feptr >= mb->end_subject)
4298
30
              {
4299
30
              SCHECK_PARTIAL();
4300
30
              break;
4301
30
              }
4302
40.4k
            GETCHARLENTEST(fc, Feptr, len);
4303
40.4k
            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
4304
39.6k
            Feptr+= len;
4305
39.6k
            }
4306
834
          break;
4307
4308
12.2k
          case PT_PC:
4309
54.9k
          for (i = Lmin; i < Lmax; i++)
4310
54.9k
            {
4311
54.9k
            int len = 1;
4312
54.9k
            if (Feptr >= mb->end_subject)
4313
0
              {
4314
0
              SCHECK_PARTIAL();
4315
0
              break;
4316
0
              }
4317
54.9k
            GETCHARLENTEST(fc, Feptr, len);
4318
54.9k
            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
4319
42.7k
            Feptr+= len;
4320
42.7k
            }
4321
12.2k
          break;
4322
4323
12.2k
          case PT_SC:
4324
0
          for (i = Lmin; i < Lmax; i++)
4325
0
            {
4326
0
            int len = 1;
4327
0
            if (Feptr >= mb->end_subject)
4328
0
              {
4329
0
              SCHECK_PARTIAL();
4330
0
              break;
4331
0
              }
4332
0
            GETCHARLENTEST(fc, Feptr, len);
4333
0
            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
4334
0
            Feptr+= len;
4335
0
            }
4336
0
          break;
4337
4338
0
          case PT_SCX:
4339
0
          for (i = Lmin; i < Lmax; i++)
4340
0
            {
4341
0
            BOOL ok;
4342
0
            const ucd_record *prop;
4343
0
            int len = 1;
4344
0
            if (Feptr >= mb->end_subject)
4345
0
              {
4346
0
              SCHECK_PARTIAL();
4347
0
              break;
4348
0
              }
4349
0
            GETCHARLENTEST(fc, Feptr, len);
4350
0
            prop = GET_UCD(fc);
4351
0
            ok = (prop->script == Lpropvalue ||
4352
0
                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
4353
0
            if (ok == notmatch) break;
4354
0
            Feptr+= len;
4355
0
            }
4356
0
          break;
4357
4358
0
          case PT_ALNUM:
4359
0
          for (i = Lmin; i < Lmax; i++)
4360
0
            {
4361
0
            int category;
4362
0
            int len = 1;
4363
0
            if (Feptr >= mb->end_subject)
4364
0
              {
4365
0
              SCHECK_PARTIAL();
4366
0
              break;
4367
0
              }
4368
0
            GETCHARLENTEST(fc, Feptr, len);
4369
0
            category = UCD_CATEGORY(fc);
4370
0
            if ((category == ucp_L || category == ucp_N) == notmatch)
4371
0
              break;
4372
0
            Feptr+= len;
4373
0
            }
4374
0
          break;
4375
4376
          /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4377
          which means that Perl space and POSIX space are now identical. PCRE
4378
          was changed at release 8.34. */
4379
4380
627k
          case PT_SPACE:    /* Perl space */
4381
627k
          case PT_PXSPACE:  /* POSIX space */
4382
5.39M
          for (i = Lmin; i < Lmax; i++)
4383
5.39M
            {
4384
5.39M
            int len = 1;
4385
5.39M
            if (Feptr >= mb->end_subject)
4386
19.0k
              {
4387
19.0k
              SCHECK_PARTIAL();
4388
19.0k
              break;
4389
19.0k
              }
4390
5.37M
            GETCHARLENTEST(fc, Feptr, len);
4391
5.37M
            switch(fc)
4392
5.37M
              {
4393
18.6M
              HSPACE_CASES:
4394
18.6M
              VSPACE_CASES:
4395
7.94M
              if (notmatch) goto ENDLOOP99;  /* Break the loop */
4396
878k
              break;
4397
4398
4.23M
              default:
4399
4.23M
              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
4400
342k
                goto ENDLOOP99;   /* Break the loop */
4401
3.89M
              break;
4402
5.37M
              }
4403
4.77M
            Feptr+= len;
4404
4.77M
            }
4405
627k
          ENDLOOP99:
4406
627k
          break;
4407
4408
37.4k
          case PT_WORD:
4409
369k
          for (i = Lmin; i < Lmax; i++)
4410
369k
            {
4411
369k
            int chartype, category;
4412
369k
            int len = 1;
4413
369k
            if (Feptr >= mb->end_subject)
4414
828
              {
4415
828
              SCHECK_PARTIAL();
4416
828
              break;
4417
828
              }
4418
368k
            GETCHARLENTEST(fc, Feptr, len);
4419
368k
            chartype = UCD_CHARTYPE(fc);
4420
368k
            category = PRIV(ucp_gentype)[chartype];
4421
368k
            if ((category == ucp_L ||
4422
368k
                 category == ucp_N ||
4423
368k
                 chartype == ucp_Mn ||
4424
368k
                 chartype == ucp_Pc) == notmatch)
4425
36.5k
              break;
4426
332k
            Feptr+= len;
4427
332k
            }
4428
37.4k
          break;
4429
4430
37.4k
          case PT_CLIST:
4431
276
          for (i = Lmin; i < Lmax; i++)
4432
276
            {
4433
276
            const uint32_t *cp;
4434
276
            int len = 1;
4435
276
            if (Feptr >= mb->end_subject)
4436
0
              {
4437
0
              SCHECK_PARTIAL();
4438
0
              break;
4439
0
              }
4440
276
            GETCHARLENTEST(fc, Feptr, len);
4441
#if PCRE2_CODE_UNIT_WIDTH == 32
4442
            if (fc > MAX_UTF_CODE_POINT)
4443
              {
4444
              if (!notmatch) goto GOT_MAX;
4445
              }
4446
            else
4447
#endif
4448
276
              {
4449
276
              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
4450
276
              for (;;)
4451
541
                {
4452
541
                if (fc < *cp)
4453
276
                  { if (notmatch) break; else goto GOT_MAX; }
4454
265
                if (fc == *cp++)
4455
0
                  { if (notmatch) goto GOT_MAX; else break; }
4456
265
                }
4457
276
              }
4458
4459
0
            Feptr += len;
4460
0
            }
4461
276
          GOT_MAX:
4462
276
          break;
4463
4464
0
          case PT_UCNC:
4465
0
          for (i = Lmin; i < Lmax; i++)
4466
0
            {
4467
0
            int len = 1;
4468
0
            if (Feptr >= mb->end_subject)
4469
0
              {
4470
0
              SCHECK_PARTIAL();
4471
0
              break;
4472
0
              }
4473
0
            GETCHARLENTEST(fc, Feptr, len);
4474
0
            if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
4475
0
                 fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
4476
0
                 fc >= 0xe000) == notmatch)
4477
0
              break;
4478
0
            Feptr += len;
4479
0
            }
4480
0
          break;
4481
4482
0
          case PT_BIDICL:
4483
0
          for (i = Lmin; i < Lmax; i++)
4484
0
            {
4485
0
            int len = 1;
4486
0
            if (Feptr >= mb->end_subject)
4487
0
              {
4488
0
              SCHECK_PARTIAL();
4489
0
              break;
4490
0
              }
4491
0
            GETCHARLENTEST(fc, Feptr, len);
4492
0
            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
4493
0
            Feptr+= len;
4494
0
            }
4495
0
          break;
4496
4497
0
          case PT_BOOL:
4498
0
          for (i = Lmin; i < Lmax; i++)
4499
0
            {
4500
0
            BOOL ok;
4501
0
            const ucd_record *prop;
4502
0
            int len = 1;
4503
0
            if (Feptr >= mb->end_subject)
4504
0
              {
4505
0
              SCHECK_PARTIAL();
4506
0
              break;
4507
0
              }
4508
0
            GETCHARLENTEST(fc, Feptr, len);
4509
0
            prop = GET_UCD(fc);
4510
0
            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
4511
0
              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
4512
0
            if (ok == notmatch) break;
4513
0
            Feptr+= len;
4514
0
            }
4515
0
          break;
4516
4517
0
          default:
4518
0
          PCRE2_DEBUG_UNREACHABLE();
4519
0
          return PCRE2_ERROR_INTERNAL;
4520
678k
          }
4521
4522
        /* Feptr is now past the end of the maximum run */
4523
4524
678k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4525
4526
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4527
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
4528
        go too far. */
4529
4530
630k
        for(;;)
4531
5.46M
          {
4532
5.46M
          if (Feptr <= Lstart_eptr) break;
4533
4.83M
          RMATCH(Fecode, RM221);
4534
4.83M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4535
4.83M
          Feptr--;
4536
4.83M
          if (utf) BACKCHAR(Feptr);
4537
4.83M
          }
4538
630k
        }
4539
4540
      /* Match extended Unicode grapheme clusters. We will get here only if the
4541
      support is in the binary; otherwise a compile-time error occurs. */
4542
4543
59.2M
      else if (Lctype == OP_EXTUNI)
4544
36.0k
        {
4545
4.59M
        for (i = Lmin; i < Lmax; i++)
4546
4.59M
          {
4547
4.59M
          if (Feptr >= mb->end_subject)
4548
36.0k
            {
4549
36.0k
            SCHECK_PARTIAL();
4550
36.0k
            break;
4551
36.0k
            }
4552
4.55M
          else
4553
4.55M
            {
4554
4.55M
            GETCHARINCTEST(fc, Feptr);
4555
4.55M
            Feptr = PRIV(extuni)(fc, Feptr, mb->start_subject, mb->end_subject,
4556
4.55M
              utf, NULL);
4557
4.55M
            }
4558
4.55M
          CHECK_PARTIAL();
4559
4.55M
          }
4560
4561
        /* Feptr is now past the end of the maximum run */
4562
4563
36.0k
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4564
4565
        /* We use <= Lstart_eptr rather than == Lstart_eptr to detect the start
4566
        of the run while backtracking because the use of \C in UTF mode can
4567
        cause BACKCHAR to move back past Lstart_eptr. This is just palliative;
4568
        the use of \C in UTF mode is fraught with danger. */
4569
4570
36.0k
        for(;;)
4571
4.59M
          {
4572
4.59M
          int lgb, rgb;
4573
4.59M
          PCRE2_SPTR fptr;
4574
4575
4.59M
          if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4576
4.55M
          RMATCH(Fecode, RM219);
4577
4.55M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4578
4579
          /* Backtracking over an extended grapheme cluster involves inspecting
4580
          the previous two characters (if present) to see if a break is
4581
          permitted between them. */
4582
4583
4.55M
          Feptr--;
4584
4.55M
          if (!utf) fc = *Feptr; else
4585
2.70M
            {
4586
2.70M
            BACKCHAR(Feptr);
4587
2.70M
            GETCHAR(fc, Feptr);
4588
2.70M
            }
4589
4.55M
          rgb = UCD_GRAPHBREAK(fc);
4590
4591
4.55M
          for (;;)
4592
4.56M
            {
4593
4.56M
            if (Feptr <= Lstart_eptr) break;   /* At start of char run */
4594
4.52M
            fptr = Feptr - 1;
4595
4.52M
            if (!utf) fc = *fptr; else
4596
2.70M
              {
4597
2.70M
              BACKCHAR(fptr);
4598
2.70M
              GETCHAR(fc, fptr);
4599
2.70M
              }
4600
4.52M
            lgb = UCD_GRAPHBREAK(fc);
4601
4.52M
            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
4602
6.82k
            Feptr = fptr;
4603
6.82k
            rgb = lgb;
4604
6.82k
            }
4605
4.55M
          }
4606
36.0k
        }
4607
4608
59.1M
      else
4609
59.1M
#endif   /* SUPPORT_UNICODE */
4610
4611
59.1M
#ifdef SUPPORT_UNICODE
4612
59.1M
      if (utf)
4613
51.1M
        {
4614
51.1M
        switch(Lctype)
4615
51.1M
          {
4616
14.5k
          case OP_ANY:
4617
30.9k
          for (i = Lmin; i < Lmax; i++)
4618
16.3k
            {
4619
16.3k
            if (Feptr >= mb->end_subject)
4620
3
              {
4621
3
              SCHECK_PARTIAL();
4622
3
              break;
4623
3
              }
4624
16.3k
            if (IS_NEWLINE(Feptr)) break;
4625
16.3k
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4626
16.3k
                Feptr + 1 >= mb->end_subject &&
4627
16.3k
                NLBLOCK->nltype == NLTYPE_FIXED &&
4628
16.3k
                NLBLOCK->nllen == 2 &&
4629
16.3k
                UCHAR21(Feptr) == NLBLOCK->nl[0])
4630
0
              {
4631
0
              mb->hitend = TRUE;
4632
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4633
0
              }
4634
16.3k
            Feptr++;
4635
16.3k
            ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4636
16.3k
            }
4637
14.5k
          break;
4638
4639
4.02M
          case OP_ALLANY:
4640
4.02M
          if (Lmax < UINT32_MAX)
4641
1.13k
            {
4642
2.24k
            for (i = Lmin; i < Lmax; i++)
4643
1.13k
              {
4644
1.13k
              if (Feptr >= mb->end_subject)
4645
28
                {
4646
28
                SCHECK_PARTIAL();
4647
28
                break;
4648
28
                }
4649
1.10k
              Feptr++;
4650
1.10k
              ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++);
4651
1.10k
              }
4652
1.13k
            }
4653
4.02M
          else
4654
4.02M
            {
4655
4.02M
            Feptr = mb->end_subject;   /* Unlimited UTF-8 repeat */
4656
4.02M
            SCHECK_PARTIAL();
4657
4.02M
            }
4658
4.02M
          break;
4659
4660
          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
4661
4662
4.02M
          case OP_ANYBYTE:
4663
248
          fc = Lmax - Lmin;
4664
248
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4665
248
            {
4666
248
            Feptr = mb->end_subject;
4667
248
            SCHECK_PARTIAL();
4668
248
            }
4669
0
          else Feptr += fc;
4670
248
          break;
4671
4672
47.0M
          case OP_ANYNL:
4673
54.7M
          for (i = Lmin; i < Lmax; i++)
4674
47.0M
            {
4675
47.0M
            int len = 1;
4676
47.0M
            if (Feptr >= mb->end_subject)
4677
667k
              {
4678
667k
              SCHECK_PARTIAL();
4679
667k
              break;
4680
667k
              }
4681
46.3M
            GETCHARLEN(fc, Feptr, len);
4682
46.3M
            if (fc == CHAR_CR)
4683
0
              {
4684
0
              if (++Feptr >= mb->end_subject) break;
4685
0
              if (UCHAR21(Feptr) == CHAR_LF) Feptr++;
4686
0
              }
4687
46.3M
            else
4688
46.3M
              {
4689
46.3M
              if (fc != CHAR_LF &&
4690
46.3M
                  (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4691
43.6M
                   (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4692
43.6M
#ifndef EBCDIC
4693
43.6M
                    && fc != 0x2028 && fc != 0x2029
4694
43.6M
#endif  /* Not EBCDIC */
4695
43.6M
                    )))
4696
38.6M
                break;
4697
7.72M
              Feptr += len;
4698
7.72M
              }
4699
46.3M
            }
4700
47.0M
          break;
4701
4702
47.0M
          case OP_NOT_HSPACE:
4703
3.70k
          case OP_HSPACE:
4704
90.9k
          for (i = Lmin; i < Lmax; i++)
4705
89.2k
            {
4706
89.2k
            BOOL gotspace;
4707
89.2k
            int len = 1;
4708
89.2k
            if (Feptr >= mb->end_subject)
4709
350
              {
4710
350
              SCHECK_PARTIAL();
4711
350
              break;
4712
350
              }
4713
88.8k
            GETCHARLEN(fc, Feptr, len);
4714
88.8k
            switch(fc)
4715
88.8k
              {
4716
1.60k
              HSPACE_CASES: gotspace = TRUE; break;
4717
87.2k
              default: gotspace = FALSE; break;
4718
88.8k
              }
4719
88.8k
            if (gotspace == (Lctype == OP_NOT_HSPACE)) break;
4720
87.2k
            Feptr += len;
4721
87.2k
            }
4722
3.70k
          break;
4723
4724
17.5k
          case OP_NOT_VSPACE:
4725
20.5k
          case OP_VSPACE:
4726
294k
          for (i = Lmin; i < Lmax; i++)
4727
294k
            {
4728
294k
            BOOL gotspace;
4729
294k
            int len = 1;
4730
294k
            if (Feptr >= mb->end_subject)
4731
24
              {
4732
24
              SCHECK_PARTIAL();
4733
24
              break;
4734
24
              }
4735
294k
            GETCHARLEN(fc, Feptr, len);
4736
294k
            switch(fc)
4737
294k
              {
4738
17.7k
              VSPACE_CASES: gotspace = TRUE; break;
4739
276k
              default: gotspace = FALSE; break;
4740
294k
              }
4741
294k
            if (gotspace == (Lctype == OP_NOT_VSPACE)) break;
4742
273k
            Feptr += len;
4743
273k
            }
4744
20.5k
          break;
4745
4746
20.5k
          case OP_NOT_DIGIT:
4747
0
          for (i = Lmin; i < Lmax; i++)
4748
0
            {
4749
0
            int len = 1;
4750
0
            if (Feptr >= mb->end_subject)
4751
0
              {
4752
0
              SCHECK_PARTIAL();
4753
0
              break;
4754
0
              }
4755
0
            GETCHARLEN(fc, Feptr, len);
4756
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_digit) != 0) break;
4757
0
            Feptr+= len;
4758
0
            }
4759
0
          break;
4760
4761
0
          case OP_DIGIT:
4762
0
          for (i = Lmin; i < Lmax; i++)
4763
0
            {
4764
0
            int len = 1;
4765
0
            if (Feptr >= mb->end_subject)
4766
0
              {
4767
0
              SCHECK_PARTIAL();
4768
0
              break;
4769
0
              }
4770
0
            GETCHARLEN(fc, Feptr, len);
4771
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_digit) == 0) break;
4772
0
            Feptr+= len;
4773
0
            }
4774
0
          break;
4775
4776
0
          case OP_NOT_WHITESPACE:
4777
0
          for (i = Lmin; i < Lmax; i++)
4778
0
            {
4779
0
            int len = 1;
4780
0
            if (Feptr >= mb->end_subject)
4781
0
              {
4782
0
              SCHECK_PARTIAL();
4783
0
              break;
4784
0
              }
4785
0
            GETCHARLEN(fc, Feptr, len);
4786
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_space) != 0) break;
4787
0
            Feptr+= len;
4788
0
            }
4789
0
          break;
4790
4791
0
          case OP_WHITESPACE:
4792
0
          for (i = Lmin; i < Lmax; i++)
4793
0
            {
4794
0
            int len = 1;
4795
0
            if (Feptr >= mb->end_subject)
4796
0
              {
4797
0
              SCHECK_PARTIAL();
4798
0
              break;
4799
0
              }
4800
0
            GETCHARLEN(fc, Feptr, len);
4801
0
            if (fc >= 256 ||(mb->ctypes[fc] & ctype_space) == 0) break;
4802
0
            Feptr+= len;
4803
0
            }
4804
0
          break;
4805
4806
0
          case OP_NOT_WORDCHAR:
4807
0
          for (i = Lmin; i < Lmax; i++)
4808
0
            {
4809
0
            int len = 1;
4810
0
            if (Feptr >= mb->end_subject)
4811
0
              {
4812
0
              SCHECK_PARTIAL();
4813
0
              break;
4814
0
              }
4815
0
            GETCHARLEN(fc, Feptr, len);
4816
0
            if (fc < 256 && (mb->ctypes[fc] & ctype_word) != 0) break;
4817
0
            Feptr+= len;
4818
0
            }
4819
0
          break;
4820
4821
0
          case OP_WORDCHAR:
4822
0
          for (i = Lmin; i < Lmax; i++)
4823
0
            {
4824
0
            int len = 1;
4825
0
            if (Feptr >= mb->end_subject)
4826
0
              {
4827
0
              SCHECK_PARTIAL();
4828
0
              break;
4829
0
              }
4830
0
            GETCHARLEN(fc, Feptr, len);
4831
0
            if (fc >= 256 || (mb->ctypes[fc] & ctype_word) == 0) break;
4832
0
            Feptr+= len;
4833
0
            }
4834
0
          break;
4835
4836
0
          default:
4837
0
          PCRE2_DEBUG_UNREACHABLE();
4838
0
          return PCRE2_ERROR_INTERNAL;
4839
51.1M
          }
4840
4841
51.1M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
4842
4843
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
4844
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't go
4845
        too far. */
4846
4847
4.06M
        for(;;)
4848
333M
          {
4849
333M
          if (Feptr <= Lstart_eptr) break;
4850
329M
          RMATCH(Fecode, RM220);
4851
329M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4852
329M
          Feptr--;
4853
329M
          BACKCHAR(Feptr);
4854
329M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr &&
4855
329M
              UCHAR21(Feptr) == CHAR_NL && UCHAR21(Feptr - 1) == CHAR_CR)
4856
0
            Feptr--;
4857
329M
          }
4858
4.06M
        }
4859
8.08M
      else
4860
8.08M
#endif  /* SUPPORT_UNICODE */
4861
4862
      /* Not UTF mode */
4863
8.08M
        {
4864
8.08M
        switch(Lctype)
4865
8.08M
          {
4866
2.80M
          case OP_ANY:
4867
11.0M
          for (i = Lmin; i < Lmax; i++)
4868
8.33M
            {
4869
8.33M
            if (Feptr >= mb->end_subject)
4870
14.5k
              {
4871
14.5k
              SCHECK_PARTIAL();
4872
14.5k
              break;
4873
14.5k
              }
4874
8.32M
            if (IS_NEWLINE(Feptr)) break;
4875
8.26M
            if (mb->partial != 0 &&    /* Take care with CRLF partial */
4876
8.26M
                Feptr + 1 >= mb->end_subject &&
4877
8.26M
                NLBLOCK->nltype == NLTYPE_FIXED &&
4878
8.26M
                NLBLOCK->nllen == 2 &&
4879
8.26M
                *Feptr == NLBLOCK->nl[0])
4880
0
              {
4881
0
              mb->hitend = TRUE;
4882
0
              if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
4883
0
              }
4884
8.26M
            Feptr++;
4885
8.26M
            }
4886
2.80M
          break;
4887
4888
4.19M
          case OP_ALLANY:
4889
4.19M
          case OP_ANYBYTE:
4890
4.19M
          fc = Lmax - Lmin;
4891
4.19M
          if (fc > (uint32_t)(mb->end_subject - Feptr))
4892
32.3k
            {
4893
32.3k
            Feptr = mb->end_subject;
4894
32.3k
            SCHECK_PARTIAL();
4895
32.3k
            }
4896
4.16M
          else Feptr += fc;
4897
4.19M
          break;
4898
4899
4.19M
          case OP_ANYNL:
4900
94.3k
          for (i = Lmin; i < Lmax; i++)
4901
92.7k
            {
4902
92.7k
            if (Feptr >= mb->end_subject)
4903
6.51k
              {
4904
6.51k
              SCHECK_PARTIAL();
4905
6.51k
              break;
4906
6.51k
              }
4907
86.2k
            fc = *Feptr;
4908
86.2k
            if (fc == CHAR_CR)
4909
7.66k
              {
4910
7.66k
              if (++Feptr >= mb->end_subject) break;
4911
1.40k
              if (*Feptr == CHAR_LF) Feptr++;
4912
1.40k
              }
4913
78.5k
            else
4914
78.5k
              {
4915
78.5k
              if (fc != CHAR_LF && (mb->bsr_convention == PCRE2_BSR_ANYCRLF ||
4916
77.1k
                 (fc != CHAR_VT && fc != CHAR_FF && fc != CHAR_NEL
4917
#if PCRE2_CODE_UNIT_WIDTH != 8
4918
                 && fc != 0x2028 && fc != 0x2029
4919
#endif
4920
77.1k
                 ))) break;
4921
25.1k
              Feptr++;
4922
25.1k
              }
4923
86.2k
            }
4924
67.7k
          break;
4925
4926
67.7k
          case OP_NOT_HSPACE:
4927
462k
          for (i = Lmin; i < Lmax; i++)
4928
462k
            {
4929
462k
            if (Feptr >= mb->end_subject)
4930
1.00k
              {
4931
1.00k
              SCHECK_PARTIAL();
4932
1.00k
              break;
4933
1.00k
              }
4934
461k
            switch(*Feptr)
4935
461k
              {
4936
455k
              default: Feptr++; break;
4937
12.2k
              HSPACE_BYTE_CASES:
4938
#if PCRE2_CODE_UNIT_WIDTH != 8
4939
              HSPACE_MULTIBYTE_CASES:
4940
#endif
4941
12.2k
              goto ENDLOOP00;
4942
461k
              }
4943
461k
            }
4944
7.44k
          ENDLOOP00:
4945
7.44k
          break;
4946
4947
1.13k
          case OP_HSPACE:
4948
6.79k
          for (i = Lmin; i < Lmax; i++)
4949
6.79k
            {
4950
6.79k
            if (Feptr >= mb->end_subject)
4951
0
              {
4952
0
              SCHECK_PARTIAL();
4953
0
              break;
4954
0
              }
4955
6.79k
            switch(*Feptr)
4956
6.79k
              {
4957
882
              default: goto ENDLOOP01;
4958
17.5k
              HSPACE_BYTE_CASES:
4959
#if PCRE2_CODE_UNIT_WIDTH != 8
4960
              HSPACE_MULTIBYTE_CASES:
4961
#endif
4962
17.5k
              Feptr++; break;
4963
6.79k
              }
4964
6.79k
            }
4965
882
          ENDLOOP01:
4966
882
          break;
4967
4968
721k
          case OP_NOT_VSPACE:
4969
27.7M
          for (i = Lmin; i < Lmax; i++)
4970
27.7M
            {
4971
27.7M
            if (Feptr >= mb->end_subject)
4972
702
              {
4973
702
              SCHECK_PARTIAL();
4974
702
              break;
4975
702
              }
4976
27.7M
            switch(*Feptr)
4977
27.7M
              {
4978
27.0M
              default: Feptr++; break;
4979
2.61M
              VSPACE_BYTE_CASES:
4980
#if PCRE2_CODE_UNIT_WIDTH != 8
4981
              VSPACE_MULTIBYTE_CASES:
4982
#endif
4983
2.61M
              goto ENDLOOP02;
4984
27.7M
              }
4985
27.7M
            }
4986
721k
          ENDLOOP02:
4987
721k
          break;
4988
4989
5.61k
          case OP_VSPACE:
4990
32
          for (i = Lmin; i < Lmax; i++)
4991
32
            {
4992
32
            if (Feptr >= mb->end_subject)
4993
0
              {
4994
0
              SCHECK_PARTIAL();
4995
0
              break;
4996
0
              }
4997
32
            switch(*Feptr)
4998
32
              {
4999
24
              default: goto ENDLOOP03;
5000
40
              VSPACE_BYTE_CASES:
5001
#if PCRE2_CODE_UNIT_WIDTH != 8
5002
              VSPACE_MULTIBYTE_CASES:
5003
#endif
5004
40
              Feptr++; break;
5005
32
              }
5006
32
            }
5007
24
          ENDLOOP03:
5008
24
          break;
5009
5010
387
          case OP_NOT_DIGIT:
5011
731
          for (i = Lmin; i < Lmax; i++)
5012
417
            {
5013
417
            if (Feptr >= mb->end_subject)
5014
0
              {
5015
0
              SCHECK_PARTIAL();
5016
0
              break;
5017
0
              }
5018
417
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_digit) != 0)
5019
73
              break;
5020
344
            Feptr++;
5021
344
            }
5022
387
          break;
5023
5024
10.9k
          case OP_DIGIT:
5025
11.5k
          for (i = Lmin; i < Lmax; i++)
5026
10.9k
            {
5027
10.9k
            if (Feptr >= mb->end_subject)
5028
282
              {
5029
282
              SCHECK_PARTIAL();
5030
282
              break;
5031
282
              }
5032
10.7k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_digit) == 0)
5033
10.1k
              break;
5034
586
            Feptr++;
5035
586
            }
5036
10.9k
          break;
5037
5038
155k
          case OP_NOT_WHITESPACE:
5039
4.27M
          for (i = Lmin; i < Lmax; i++)
5040
4.27M
            {
5041
4.27M
            if (Feptr >= mb->end_subject)
5042
32.3k
              {
5043
32.3k
              SCHECK_PARTIAL();
5044
32.3k
              break;
5045
32.3k
              }
5046
4.23M
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_space) != 0)
5047
123k
              break;
5048
4.11M
            Feptr++;
5049
4.11M
            }
5050
155k
          break;
5051
5052
155k
          case OP_WHITESPACE:
5053
16.0k
          for (i = Lmin; i < Lmax; i++)
5054
13.2k
            {
5055
13.2k
            if (Feptr >= mb->end_subject)
5056
16
              {
5057
16
              SCHECK_PARTIAL();
5058
16
              break;
5059
16
              }
5060
13.2k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_space) == 0)
5061
10.3k
              break;
5062
2.88k
            Feptr++;
5063
2.88k
            }
5064
13.2k
          break;
5065
5066
13.2k
          case OP_NOT_WORDCHAR:
5067
158k
          for (i = Lmin; i < Lmax; i++)
5068
158k
            {
5069
158k
            if (Feptr >= mb->end_subject)
5070
1.27k
              {
5071
1.27k
              SCHECK_PARTIAL();
5072
1.27k
              break;
5073
1.27k
              }
5074
157k
            if (MAX_255(*Feptr) && (mb->ctypes[*Feptr] & ctype_word) != 0)
5075
5.49k
              break;
5076
151k
            Feptr++;
5077
151k
            }
5078
6.82k
          break;
5079
5080
104k
          case OP_WORDCHAR:
5081
476k
          for (i = Lmin; i < Lmax; i++)
5082
445k
            {
5083
445k
            if (Feptr >= mb->end_subject)
5084
372
              {
5085
372
              SCHECK_PARTIAL();
5086
372
              break;
5087
372
              }
5088
445k
            if (!MAX_255(*Feptr) || (mb->ctypes[*Feptr] & ctype_word) == 0)
5089
73.4k
              break;
5090
371k
            Feptr++;
5091
371k
            }
5092
104k
          break;
5093
5094
104k
          default:
5095
0
          PCRE2_DEBUG_UNREACHABLE();
5096
0
          return PCRE2_ERROR_INTERNAL;
5097
8.08M
          }
5098
5099
8.08M
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
5100
5101
7.91M
        for (;;)
5102
51.9M
          {
5103
51.9M
          if (Feptr == Lstart_eptr) break;
5104
44.0M
          RMATCH(Fecode, RM34);
5105
44.0M
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5106
44.0M
          Feptr--;
5107
44.0M
          if (Lctype == OP_ANYNL && Feptr > Lstart_eptr && *Feptr == CHAR_LF &&
5108
44.0M
              Feptr[-1] == CHAR_CR) Feptr--;
5109
44.0M
          }
5110
7.91M
        }
5111
59.9M
      }
5112
12.6M
    break;  /* End of repeat character type processing */
5113
5114
12.6M
#undef Lstart_eptr
5115
12.6M
#undef Lmin
5116
12.6M
#undef Lmax
5117
12.6M
#undef Lctype
5118
12.6M
#undef Lpropvalue
5119
5120
5121
    /* ===================================================================== */
5122
    /* Match a back reference, possibly repeatedly. Look past the end of the
5123
    item to see if there is repeat information following. The OP_REF and
5124
    OP_REFI opcodes are used for a reference to a numbered group or to a
5125
    non-duplicated named group. For a duplicated named group, OP_DNREF and
5126
    OP_DNREFI are used. In this case we must scan the list of groups to which
5127
    the name refers, and use the first one that is set. */
5128
5129
12.6M
#define Lmin      F->temp_32[0]
5130
12.6M
#define Lmax      F->temp_32[1]
5131
12.6M
#define Lcaseless F->temp_32[2]
5132
12.6M
#define Lcaseopts F->temp_32[3]
5133
12.6M
#define Lstart    F->temp_sptr[0]
5134
12.6M
#define Loffset   F->temp_size
5135
5136
12.6M
    case OP_DNREF:
5137
0
    case OP_DNREFI:
5138
0
    Lcaseless = (Fop == OP_DNREFI);
5139
0
    Lcaseopts = (Fop == OP_DNREFI)? Fecode[1 + 2*IMM2_SIZE] : 0;
5140
0
      {
5141
0
      int count = GET2(Fecode, 1+IMM2_SIZE);
5142
0
      PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5143
0
      Fecode += 1 + 2*IMM2_SIZE + (Fop == OP_DNREFI? 1 : 0);
5144
5145
0
      while (count-- > 0)
5146
0
        {
5147
0
        Loffset = (GET2(slot, 0) << 1) - 2;
5148
0
        if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET) break;
5149
0
        slot += mb->name_entry_size;
5150
0
        }
5151
0
      }
5152
0
    goto REF_REPEAT;
5153
5154
0
    case OP_REF:
5155
0
    case OP_REFI:
5156
0
    Lcaseless = (Fop == OP_REFI);
5157
0
    Lcaseopts = (Fop == OP_REFI)? Fecode[1 + IMM2_SIZE] : 0;
5158
0
    Loffset = (GET2(Fecode, 1) << 1) - 2;
5159
0
    Fecode += 1 + IMM2_SIZE + (Fop == OP_REFI? 1 : 0);
5160
5161
    /* Set up for repetition, or handle the non-repeated case. The maximum and
5162
    minimum must be in the heap frame, but as they are short-term values, we
5163
    use temporary fields. */
5164
5165
0
    REF_REPEAT:
5166
0
    switch (*Fecode)
5167
0
      {
5168
0
      case OP_CRSTAR:
5169
0
      case OP_CRMINSTAR:
5170
0
      case OP_CRPLUS:
5171
0
      case OP_CRMINPLUS:
5172
0
      case OP_CRQUERY:
5173
0
      case OP_CRMINQUERY:
5174
0
      fc = *Fecode++ - OP_CRSTAR;
5175
0
      Lmin = rep_min[fc];
5176
0
      Lmax = rep_max[fc];
5177
0
      reptype = rep_typ[fc];
5178
0
      break;
5179
5180
0
      case OP_CRRANGE:
5181
0
      case OP_CRMINRANGE:
5182
0
      Lmin = GET2(Fecode, 1);
5183
0
      Lmax = GET2(Fecode, 1 + IMM2_SIZE);
5184
0
      reptype = rep_typ[*Fecode - OP_CRSTAR];
5185
0
      if (Lmax == 0) Lmax = UINT32_MAX;  /* Max 0 => infinity */
5186
0
      Fecode += 1 + 2 * IMM2_SIZE;
5187
0
      break;
5188
5189
0
      default:                  /* No repeat follows */
5190
0
        {
5191
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &length);
5192
0
        if (rrc != 0)
5193
0
          {
5194
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5195
0
          CHECK_PARTIAL();
5196
0
          RRETURN(MATCH_NOMATCH);
5197
0
          }
5198
0
        }
5199
0
      Feptr += length;
5200
0
      continue;              /* With the main loop */
5201
0
      }
5202
5203
    /* Handle repeated back references. If a set group has length zero, just
5204
    continue with the main loop, because it matches however many times. For an
5205
    unset reference, if the minimum is zero, we can also just continue. We can
5206
    also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
5207
    group behave as a zero-length group. For any other unset cases, carrying
5208
    on will result in NOMATCH. */
5209
5210
0
    if (Loffset < Foffset_top && Fovector[Loffset] != PCRE2_UNSET)
5211
0
      {
5212
0
      if (Fovector[Loffset] == Fovector[Loffset + 1]) continue;
5213
0
      }
5214
0
    else  /* Group is not set */
5215
0
      {
5216
0
      if (Lmin == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
5217
0
        continue;
5218
0
      }
5219
5220
    /* First, ensure the minimum number of matches are present. */
5221
5222
0
    for (i = 1; i <= Lmin; i++)
5223
0
      {
5224
0
      PCRE2_SIZE slength;
5225
0
      rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5226
0
      if (rrc != 0)
5227
0
        {
5228
0
        if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5229
0
        CHECK_PARTIAL();
5230
0
        RRETURN(MATCH_NOMATCH);
5231
0
        }
5232
0
      Feptr += slength;
5233
0
      }
5234
5235
    /* If min = max, we are done. They are not both allowed to be zero. */
5236
5237
0
    if (Lmin == Lmax) continue;
5238
5239
    /* If minimizing, keep trying and advancing the pointer. */
5240
5241
0
    if (reptype == REPTYPE_MIN)
5242
0
      {
5243
0
      for (;;)
5244
0
        {
5245
0
        PCRE2_SIZE slength;
5246
0
        RMATCH(Fecode, RM20);
5247
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5248
0
        if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
5249
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5250
0
        if (rrc != 0)
5251
0
          {
5252
0
          if (rrc > 0) Feptr = mb->end_subject;   /* Partial match */
5253
0
          CHECK_PARTIAL();
5254
0
          RRETURN(MATCH_NOMATCH);
5255
0
          }
5256
0
        Feptr += slength;
5257
0
        }
5258
5259
0
      PCRE2_UNREACHABLE(); /* Control never reaches here */
5260
0
      }
5261
5262
    /* If maximizing, find the longest string and work backwards, as long as
5263
    the matched lengths for each iteration are the same. */
5264
5265
0
    else
5266
0
      {
5267
0
      BOOL samelengths = TRUE;
5268
0
      Lstart = Feptr;     /* Starting position */
5269
0
      Flength = Fovector[Loffset+1] - Fovector[Loffset];
5270
5271
0
      for (i = Lmin; i < Lmax; i++)
5272
0
        {
5273
0
        PCRE2_SIZE slength;
5274
0
        rrc = match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5275
0
        if (rrc != 0)
5276
0
          {
5277
          /* Can't use CHECK_PARTIAL because we don't want to update Feptr in
5278
          the soft partial matching case. */
5279
5280
0
          if (rrc > 0 && mb->partial != 0 &&
5281
0
              mb->end_subject > mb->start_used_ptr)
5282
0
            {
5283
0
            mb->hitend = TRUE;
5284
0
            if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
5285
0
            }
5286
0
          break;
5287
0
          }
5288
5289
0
        if (slength != Flength) samelengths = FALSE;
5290
0
        Feptr += slength;
5291
0
        }
5292
5293
      /* If the length matched for each repetition is the same as the length of
5294
      the captured group, we can easily work backwards. This is the normal
5295
      case. However, in caseless UTF-8 mode there are pairs of case-equivalent
5296
      characters whose lengths (in terms of code units) differ. However, this
5297
      is very rare, so we handle it by re-matching fewer and fewer times. */
5298
5299
0
      if (samelengths)
5300
0
        {
5301
0
        while (Feptr >= Lstart)
5302
0
          {
5303
0
          RMATCH(Fecode, RM21);
5304
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5305
0
          Feptr -= Flength;
5306
0
          }
5307
0
        }
5308
5309
      /* The rare case of non-matching lengths. Re-scan the repetition for each
5310
      iteration. We know that match_ref() will succeed every time. */
5311
5312
0
      else
5313
0
        {
5314
0
        Lmax = i;
5315
0
        for (;;)
5316
0
          {
5317
0
          RMATCH(Fecode, RM22);
5318
0
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5319
0
          if (Feptr == Lstart) break; /* Failed after minimal repetition */
5320
0
          Feptr = Lstart;
5321
0
          Lmax--;
5322
0
          for (i = Lmin; i < Lmax; i++)
5323
0
            {
5324
0
            PCRE2_SIZE slength;
5325
0
            (void)match_ref(Loffset, Lcaseless, Lcaseopts, F, mb, &slength);
5326
0
            Feptr += slength;
5327
0
            }
5328
0
          }
5329
0
        }
5330
5331
0
      RRETURN(MATCH_NOMATCH);
5332
0
      }
5333
5334
0
    PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
5335
5336
0
#undef Lcaseless
5337
0
#undef Lmin
5338
0
#undef Lmax
5339
0
#undef Lstart
5340
0
#undef Loffset
5341
5342
5343
5344
/* ========================================================================= */
5345
/*           Opcodes for the start of various parenthesized items            */
5346
/* ========================================================================= */
5347
5348
    /* In all cases, if the result of RMATCH() is MATCH_THEN, check whether the
5349
    (*THEN) is within the current branch by comparing the address of OP_THEN
5350
    that is passed back with the end of the branch. If (*THEN) is within the
5351
    current branch, and the branch is one of two or more alternatives (it
5352
    either starts or ends with OP_ALT), we have reached the limit of THEN's
5353
    action, so convert the return code to NOMATCH, which will cause normal
5354
    backtracking to happen from now on. Otherwise, THEN is passed back to an
5355
    outer alternative. This implements Perl's treatment of parenthesized
5356
    groups, where a group not containing | does not affect the current
5357
    alternative, that is, (X) is NOT the same as (X|(*F)). */
5358
5359
5360
    /* ===================================================================== */
5361
    /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a non-possessive
5362
    bracket group, indicating that it may occur zero times. It may repeat
5363
    infinitely, or not at all - i.e. it could be ()* or ()? or even (){0} in
5364
    the pattern. Brackets with fixed upper repeat limits are compiled as a
5365
    number of copies, with the optional ones preceded by BRAZERO or BRAMINZERO.
5366
    Possessive groups with possible zero repeats are preceded by BRAPOSZERO. */
5367
5368
503
#define Lnext_ecode F->temp_sptr[0]
5369
5370
32
    case OP_BRAZERO:
5371
32
    Lnext_ecode = Fecode + 1;
5372
32
    RMATCH(Lnext_ecode, RM9);
5373
29
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5374
221
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5375
29
    Fecode = Lnext_ecode + 1 + LINK_SIZE;
5376
29
    break;
5377
5378
0
    case OP_BRAMINZERO:
5379
0
    Lnext_ecode = Fecode + 1;
5380
0
    do Lnext_ecode += GET(Lnext_ecode, 1); while (*Lnext_ecode == OP_ALT);
5381
0
    RMATCH(Lnext_ecode + 1 + LINK_SIZE, RM10);
5382
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5383
0
    Fecode++;
5384
0
    break;
5385
5386
0
#undef Lnext_ecode
5387
5388
0
    case OP_SKIPZERO:
5389
0
    Fecode++;
5390
0
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
5391
0
    Fecode += 1 + LINK_SIZE;
5392
0
    break;
5393
5394
5395
    /* ===================================================================== */
5396
    /* Handle possessive brackets with an unlimited repeat. The end of these
5397
    brackets will always be OP_KETRPOS, which returns MATCH_KETRPOS without
5398
    going further in the pattern. */
5399
5400
3.67k
#define Lframe_type    F->temp_32[0]
5401
6.50k
#define Lmatched_once  F->temp_32[1]
5402
2.03k
#define Lzero_allowed  F->temp_32[2]
5403
3.34k
#define Lstart_eptr    F->temp_sptr[0]
5404
1.78k
#define Lstart_group   F->temp_sptr[1]
5405
5406
0
    case OP_BRAPOSZERO:
5407
0
    Lzero_allowed = TRUE;                /* Zero repeat is allowed */
5408
0
    Fecode += 1;
5409
0
    if (*Fecode == OP_CBRAPOS || *Fecode == OP_SCBRAPOS)
5410
0
      goto POSSESSIVE_CAPTURE;
5411
0
    goto POSSESSIVE_NON_CAPTURE;
5412
5413
0
    case OP_BRAPOS:
5414
0
    case OP_SBRAPOS:
5415
0
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5416
5417
0
    POSSESSIVE_NON_CAPTURE:
5418
0
    Lframe_type = GF_NOCAPTURE;          /* Remembered frame type */
5419
0
    goto POSSESSIVE_GROUP;
5420
5421
400
    case OP_CBRAPOS:
5422
1.70k
    case OP_SCBRAPOS:
5423
1.70k
    Lzero_allowed = FALSE;               /* Zero repeat not allowed */
5424
5425
1.70k
    POSSESSIVE_CAPTURE:
5426
1.70k
    number = GET2(Fecode, 1+LINK_SIZE);
5427
1.70k
    Lframe_type = GF_CAPTURE | number;   /* Remembered frame type */
5428
5429
1.70k
    POSSESSIVE_GROUP:
5430
1.70k
    Lmatched_once = FALSE;               /* Never matched */
5431
1.70k
    Lstart_group = Fecode;               /* Start of this group */
5432
5433
1.70k
    for (;;)
5434
1.96k
      {
5435
1.96k
      Lstart_eptr = Feptr;               /* Position at group start */
5436
1.96k
      group_frame_type = Lframe_type;
5437
1.96k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM8);
5438
1.96k
      if (rrc == MATCH_KETRPOS)
5439
1.38k
        {
5440
1.38k
        Lmatched_once = TRUE;            /* Matched at least once */
5441
1.38k
        if (Feptr == Lstart_eptr)        /* Empty match; skip to end */
5442
1.30k
          {
5443
1.30k
          do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5444
1.30k
          break;
5445
1.30k
          }
5446
5447
76
        Fecode = Lstart_group;
5448
76
        continue;
5449
1.38k
        }
5450
5451
      /* See comment above about handling THEN. */
5452
5453
582
      if (rrc == MATCH_THEN)
5454
0
        {
5455
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5456
0
        if (mb->verb_ecode_ptr < next_ecode &&
5457
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5458
0
          rrc = MATCH_NOMATCH;
5459
0
        }
5460
5461
582
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5462
582
      Fecode += GET(Fecode, 1);
5463
582
      if (*Fecode != OP_ALT) break;
5464
582
      }
5465
5466
    /* Success if matched something or zero repeat allowed */
5467
5468
1.70k
    if (Lmatched_once || Lzero_allowed)
5469
1.38k
      {
5470
1.38k
      Fecode += 1 + LINK_SIZE;
5471
1.38k
      break;
5472
1.38k
      }
5473
5474
324
    RRETURN(MATCH_NOMATCH);
5475
5476
0
#undef Lmatched_once
5477
0
#undef Lzero_allowed
5478
0
#undef Lframe_type
5479
0
#undef Lstart_eptr
5480
0
#undef Lstart_group
5481
5482
5483
    /* ===================================================================== */
5484
    /* Handle non-capturing brackets that cannot match an empty string. When we
5485
    get to the final alternative within the brackets, as long as there are no
5486
    THEN's in the pattern, we can optimize by not recording a new backtracking
5487
    point. (Ideally we should test for a THEN within this group, but we don't
5488
    have that information.) Don't do this if we are at the very top level,
5489
    however, because that would make handling assertions and once-only brackets
5490
    messier when there is nothing to go back to. */
5491
5492
9.65M
#define Lframe_type F->temp_32[0]     /* Set for all that use GROUPLOOP */
5493
9.44k
#define Lnext_branch F->temp_sptr[0]  /* Used only in OP_BRA handling */
5494
5495
362k
    case OP_BRA:
5496
362k
    if (mb->hasthen || Frdepth == 0)
5497
360k
      {
5498
360k
      Lframe_type = 0;
5499
360k
      goto GROUPLOOP;
5500
360k
      }
5501
5502
1.29k
    for (;;)
5503
3.57k
      {
5504
3.57k
      Lnext_branch = Fecode + GET(Fecode, 1);
5505
3.57k
      if (*Lnext_branch != OP_ALT) break;
5506
5507
      /* This is never the final branch. We do not need to test for MATCH_THEN
5508
      here because this code is not used when there is a THEN in the pattern. */
5509
5510
2.29k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM1);
5511
2.28k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5512
2.28k
      Fecode = Lnext_branch;
5513
2.28k
      }
5514
5515
    /* Hit the start of the final branch. Continue at this level. */
5516
5517
1.28k
    Fecode += PRIV(OP_lengths)[*Fecode];
5518
1.28k
    break;
5519
5520
0
#undef Lnext_branch
5521
5522
5523
    /* ===================================================================== */
5524
    /* Handle a capturing bracket, other than those that are possessive with an
5525
    unlimited repeat. */
5526
5527
2.03M
    case OP_CBRA:
5528
2.03M
    case OP_SCBRA:
5529
2.03M
    Lframe_type = GF_CAPTURE | GET2(Fecode, 1+LINK_SIZE);
5530
2.03M
    goto GROUPLOOP;
5531
5532
5533
    /* ===================================================================== */
5534
    /* Atomic groups and non-capturing brackets that can match an empty string
5535
    must record a backtracking point and also set up a chained frame. */
5536
5537
0
    case OP_ONCE:
5538
0
    case OP_SCRIPT_RUN:
5539
20
    case OP_SBRA:
5540
20
    Lframe_type = GF_NOCAPTURE | Fop;
5541
5542
2.39M
    GROUPLOOP:
5543
2.39M
    for (;;)
5544
7.25M
      {
5545
7.25M
      group_frame_type = Lframe_type;
5546
7.25M
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM2);
5547
7.25M
      if (rrc == MATCH_THEN)
5548
0
        {
5549
0
        PCRE2_SPTR next_ecode = Fecode + GET(Fecode,1);
5550
0
        if (mb->verb_ecode_ptr < next_ecode &&
5551
0
            (*Fecode == OP_ALT || *next_ecode == OP_ALT))
5552
0
          rrc = MATCH_NOMATCH;
5553
0
        }
5554
7.25M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5555
7.25M
      Fecode += GET(Fecode, 1);
5556
7.25M
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5557
7.25M
      }
5558
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5559
5560
0
#undef Lframe_type
5561
5562
5563
    /* ===================================================================== */
5564
    /* Pattern recursion either matches the current regex, or some
5565
    subexpression. The offset data is the offset to the starting bracket from
5566
    the start of the whole pattern. This is so that it works from duplicated
5567
    subpatterns. For a whole-pattern recursion, we have to infer the number
5568
    zero. */
5569
5570
1.01M
#define Lframe_type F->temp_32[0]
5571
3.01M
#define Lstart_branch F->temp_sptr[0]
5572
5573
18.2k
    case OP_RECURSE:
5574
18.2k
    bracode = mb->start_code + GET(Fecode, 1);
5575
18.2k
    number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
5576
5577
    /* If we are already in a pattern recursion, check for repeating the same
5578
    one without changing the subject pointer or the last referenced character
5579
    in the subject. This should catch convoluted mutual recursions; some
5580
    simple cases are caught at compile time. However, there are rare cases when
5581
    this check needs to be turned off. In this case, actual recursion loops
5582
    will be caught by the match or heap limits. */
5583
5584
18.2k
    if (Fcurrent_recurse != RECURSE_UNSET)
5585
18.2k
      {
5586
18.2k
      offset = Flast_group_offset;
5587
18.2k
      while (offset != PCRE2_UNSET)
5588
18.2k
        {
5589
18.2k
        N = (heapframe *)((char *)match_data->heapframes + offset);
5590
18.2k
        P = (heapframe *)((char *)N - frame_size);
5591
18.2k
        if (N->group_frame_type == (GF_RECURSE | number))
5592
18.2k
          {
5593
18.2k
          if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
5594
18.2k
               (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
5595
15
            return PCRE2_ERROR_RECURSELOOP;
5596
18.1k
          break;
5597
18.2k
          }
5598
43
        offset = P->last_group_offset;
5599
43
        }
5600
18.2k
      }
5601
5602
    /* Remember the current last referenced character and then run the
5603
    recursion branch by branch. */
5604
5605
18.2k
    F->recurse_last_used = mb->last_used_ptr;
5606
18.2k
    Lstart_branch = bracode;
5607
18.2k
    Lframe_type = GF_RECURSE | number;
5608
5609
18.2k
    for (;;)
5610
1.00M
      {
5611
1.00M
      PCRE2_SPTR next_ecode;
5612
5613
1.00M
      group_frame_type = Lframe_type;
5614
1.00M
      RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM11);
5615
1.00M
      next_ecode = Lstart_branch + GET(Lstart_branch,1);
5616
5617
      /* Handle backtracking verbs, which are defined in a range that can
5618
      easily be tested for. PCRE does not allow THEN, SKIP, PRUNE or COMMIT to
5619
      escape beyond a recursion; they cause a NOMATCH for the entire recursion.
5620
5621
      When one of these verbs triggers, the current recursion group number is
5622
      recorded. If it matches the recursion we are processing, the verb
5623
      happened within the recursion and we must deal with it. Otherwise it must
5624
      have happened after the recursion completed, and so has to be passed
5625
      back. See comment above about handling THEN. */
5626
5627
1.00M
      if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX &&
5628
1.00M
          mb->verb_current_recurse == (Lframe_type ^ GF_RECURSE))
5629
0
        {
5630
0
        if (rrc == MATCH_THEN && mb->verb_ecode_ptr < next_ecode &&
5631
0
            (*Lstart_branch == OP_ALT || *next_ecode == OP_ALT))
5632
0
          rrc = MATCH_NOMATCH;
5633
0
        else RRETURN(MATCH_NOMATCH);
5634
0
        }
5635
5636
      /* Note that carrying on after (*ACCEPT) in a recursion is handled in the
5637
      OP_ACCEPT code. Nothing needs to be done here. */
5638
5639
1.00M
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5640
1.00M
      Lstart_branch = next_ecode;
5641
1.00M
      if (*Lstart_branch != OP_ALT) RRETURN(MATCH_NOMATCH);
5642
1.00M
      }
5643
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
5644
5645
0
#undef Lframe_type
5646
0
#undef Lstart_branch
5647
5648
5649
    /* ===================================================================== */
5650
    /* Positive assertions are like other groups except that PCRE doesn't allow
5651
    the effect of (*THEN) to escape beyond an assertion; it is therefore
5652
    treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
5653
    captures and mark retained. Any other return is an error. */
5654
5655
1.84k
#define Lframe_type  F->temp_32[0]
5656
5657
0
    case OP_ASSERT:
5658
292
    case OP_ASSERTBACK:
5659
292
    case OP_ASSERT_NA:
5660
414
    case OP_ASSERTBACK_NA:
5661
414
    Lframe_type = GF_NOCAPTURE | Fop;
5662
414
    for (;;)
5663
1.42k
      {
5664
1.42k
      group_frame_type = Lframe_type;
5665
1.42k
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM3);
5666
1.42k
      if (rrc == MATCH_ACCEPT)
5667
0
        {
5668
0
        memcpy(Fovector,
5669
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5670
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5671
0
        Foffset_top = assert_accept_frame->offset_top;
5672
0
        Fmark = assert_accept_frame->mark;
5673
0
        break;
5674
0
        }
5675
1.42k
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
5676
1.42k
      Fecode += GET(Fecode, 1);
5677
1.42k
      if (*Fecode != OP_ALT) RRETURN(MATCH_NOMATCH);
5678
1.42k
      }
5679
5680
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5681
0
    Fecode += 1 + LINK_SIZE;
5682
0
    break;
5683
5684
0
#undef Lframe_type
5685
5686
5687
    /* ===================================================================== */
5688
    /* Handle negative assertions. Loop for each non-matching branch as for
5689
    positive assertions. */
5690
5691
112
#define Lframe_type  F->temp_32[0]
5692
5693
28
    case OP_ASSERT_NOT:
5694
28
    case OP_ASSERTBACK_NOT:
5695
28
    Lframe_type  = GF_NOCAPTURE | Fop;
5696
5697
28
    for (;;)
5698
84
      {
5699
84
      group_frame_type = Lframe_type;
5700
84
      RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM4);
5701
84
      switch(rrc)
5702
84
        {
5703
0
        case MATCH_ACCEPT:   /* Assertion matched, therefore it fails. */
5704
28
        case MATCH_MATCH:
5705
28
        RRETURN (MATCH_NOMATCH);
5706
5707
56
        case MATCH_NOMATCH:  /* Branch failed, try next if present. */
5708
56
        case MATCH_THEN:
5709
56
        Fecode += GET(Fecode, 1);
5710
56
        if (*Fecode != OP_ALT) goto ASSERT_NOT_FAILED;
5711
56
        break;
5712
5713
56
        case MATCH_COMMIT:   /* Assertion forced to fail, therefore continue. */
5714
0
        case MATCH_SKIP:
5715
0
        case MATCH_PRUNE:
5716
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5717
0
        goto ASSERT_NOT_FAILED;
5718
5719
0
        default:             /* Pass back any other return */
5720
0
        RRETURN(rrc);
5721
84
        }
5722
84
      }
5723
5724
    /* None of the branches have matched or there was a backtrack to (*COMMIT),
5725
    (*SKIP), (*PRUNE), or (*THEN) in the last branch. This is success for a
5726
    negative assertion, so carry on. */
5727
5728
0
    ASSERT_NOT_FAILED:
5729
0
    Fecode += 1 + LINK_SIZE;
5730
0
    break;
5731
5732
0
#undef Lframe_type
5733
5734
    /* ===================================================================== */
5735
    /* Handle scan substring operation. */
5736
5737
0
#define Lframe_type          F->temp_32[0]
5738
0
#define Lextra_size          F->temp_32[1]
5739
0
#define Lsaved_moptions      F->temp_32[2]
5740
0
#define Lsaved_end_subject   F->temp_sptr[0]
5741
0
#define Lsaved_eptr          F->temp_sptr[1]
5742
0
#define Ltrue_end_extra      F->temp_size
5743
5744
0
    case OP_ASSERT_SCS:
5745
0
      {
5746
0
      PCRE2_SPTR ecode = Fecode + 1 + LINK_SIZE;
5747
0
      uint32_t extra_size = 0;
5748
0
      int count;
5749
0
      PCRE2_SPTR slot;
5750
5751
      /* Disable compiler warning. */
5752
0
      offset = 0;
5753
0
      (void)offset;
5754
5755
0
      for (;;)
5756
0
        {
5757
0
        if (*ecode == OP_CREF)
5758
0
          {
5759
0
          extra_size += 1+IMM2_SIZE;
5760
0
          offset = (GET2(ecode, 1) << 1) - 2;
5761
0
          ecode += 1+IMM2_SIZE;
5762
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5763
0
            goto SCS_OFFSET_FOUND;
5764
0
          continue;
5765
0
          }
5766
5767
0
        if (*ecode != OP_DNCREF) RRETURN(MATCH_NOMATCH);
5768
5769
0
        count = GET2(ecode, 1 + IMM2_SIZE);
5770
0
        slot = mb->name_table + GET2(ecode, 1) * mb->name_entry_size;
5771
0
        extra_size += 1+2*IMM2_SIZE;
5772
0
        ecode += 1+2*IMM2_SIZE;
5773
5774
0
        while (count > 0)
5775
0
          {
5776
0
          offset = (GET2(slot, 0) << 1) - 2;
5777
0
          if (offset < Foffset_top && Fovector[offset] != PCRE2_UNSET)
5778
0
            goto SCS_OFFSET_FOUND;
5779
0
          slot += mb->name_entry_size;
5780
0
          count--;
5781
0
          }
5782
0
        }
5783
5784
0
      SCS_OFFSET_FOUND:
5785
5786
      /* Skip remaining options. */
5787
0
      for (;;)
5788
0
        {
5789
0
        if (*ecode == OP_CREF)
5790
0
          {
5791
0
          extra_size += 1+IMM2_SIZE;
5792
0
          ecode += 1+IMM2_SIZE;
5793
0
          }
5794
0
        else if (*ecode == OP_DNCREF)
5795
0
          {
5796
0
          extra_size += 1+2*IMM2_SIZE;
5797
0
          ecode += 1+2*IMM2_SIZE;
5798
0
          }
5799
0
        else break;
5800
0
        }
5801
5802
0
      Lextra_size = extra_size;
5803
0
      }
5804
5805
0
    Lsaved_end_subject = mb->end_subject;
5806
0
    Ltrue_end_extra = mb->true_end_subject - mb->end_subject;
5807
0
    Lsaved_eptr = Feptr;
5808
0
    Lsaved_moptions = mb->moptions;
5809
5810
0
    Feptr = mb->start_subject + Fovector[offset];
5811
0
    mb->true_end_subject = mb->end_subject =
5812
0
      mb->start_subject + Fovector[offset + 1];
5813
0
    mb->moptions &= ~PCRE2_NOTEOL;
5814
5815
0
    Lframe_type = GF_NOCAPTURE | Fop;
5816
0
    for (;;)
5817
0
      {
5818
0
      group_frame_type = Lframe_type;
5819
0
      RMATCH(Fecode + 1 + LINK_SIZE + Lextra_size, RM38);
5820
0
      if (rrc == MATCH_ACCEPT)
5821
0
        {
5822
0
        memcpy(Fovector,
5823
0
              (char *)assert_accept_frame + offsetof(heapframe, ovector),
5824
0
              assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5825
0
        Foffset_top = assert_accept_frame->offset_top;
5826
0
        Fmark = assert_accept_frame->mark;
5827
0
        break;
5828
0
        }
5829
5830
0
      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
5831
0
        {
5832
0
        mb->end_subject = Lsaved_end_subject;
5833
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5834
0
        mb->moptions = Lsaved_moptions;
5835
0
        RRETURN(rrc);
5836
0
        }
5837
5838
0
      Fecode += GET(Fecode, 1);
5839
0
      if (*Fecode != OP_ALT)
5840
0
        {
5841
0
        mb->end_subject = Lsaved_end_subject;
5842
0
        mb->true_end_subject = mb->end_subject + Ltrue_end_extra;
5843
0
        mb->moptions = Lsaved_moptions;
5844
0
        RRETURN(MATCH_NOMATCH);
5845
0
        }
5846
0
      Lextra_size = 0;
5847
0
      }
5848
5849
0
    do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
5850
0
    Fecode += 1 + LINK_SIZE;
5851
0
    Feptr = Lsaved_eptr;
5852
0
    break;
5853
5854
0
#undef Lframe_type
5855
0
#undef Lextra_size
5856
0
#undef Lsaved_end_subject
5857
0
#undef Lsaved_eptr
5858
0
#undef Ltrue_end_extra
5859
0
#undef Lsave_moptions
5860
5861
    /* ===================================================================== */
5862
    /* The callout item calls an external function, if one is provided, passing
5863
    details of the match so far. This is mainly for debugging, though the
5864
    function is able to force a failure. */
5865
5866
0
    case OP_CALLOUT:
5867
0
    case OP_CALLOUT_STR:
5868
0
    rrc = do_callout(F, mb, &length);
5869
0
    if (rrc > 0) RRETURN(MATCH_NOMATCH);
5870
0
    if (rrc < 0) RRETURN(rrc);
5871
0
    Fecode += length;
5872
0
    break;
5873
5874
5875
    /* ===================================================================== */
5876
    /* Conditional group: compilation checked that there are no more than two
5877
    branches. If the condition is false, skipping the first branch takes us
5878
    past the end of the item if there is only one branch, but that's exactly
5879
    what we want. */
5880
5881
0
    case OP_COND:
5882
0
    case OP_SCOND:
5883
5884
    /* The variable Flength will be added to Fecode when the condition is
5885
    false, to get to the second branch. Setting it to the offset to the ALT or
5886
    KET, then incrementing Fecode achieves this effect. However, if the second
5887
    branch is non-existent, we must point to the KET so that the end of the
5888
    group is correctly processed. We now have Fecode pointing to the condition
5889
    or callout. */
5890
5891
0
    Flength = GET(Fecode, 1);    /* Offset to the second branch */
5892
0
    if (Fecode[Flength] != OP_ALT) Flength -= 1 + LINK_SIZE;
5893
0
    Fecode += 1 + LINK_SIZE;     /* From this opcode */
5894
5895
    /* Because of the way auto-callout works during compile, a callout item is
5896
    inserted between OP_COND and an assertion condition. Such a callout can
5897
    also be inserted manually. */
5898
5899
0
    if (*Fecode == OP_CALLOUT || *Fecode == OP_CALLOUT_STR)
5900
0
      {
5901
0
      rrc = do_callout(F, mb, &length);
5902
0
      if (rrc > 0) RRETURN(MATCH_NOMATCH);
5903
0
      if (rrc < 0) RRETURN(rrc);
5904
5905
      /* Advance Fecode past the callout, so it now points to the condition. We
5906
      must adjust Flength so that the value of Fecode+Flength is unchanged. */
5907
5908
0
      Fecode += length;
5909
0
      Flength -= length;
5910
0
      }
5911
5912
    /* Test the various possible conditions */
5913
5914
0
    condition = FALSE;
5915
0
    switch(*Fecode)
5916
0
      {
5917
0
      case OP_RREF:                  /* Group recursion test */
5918
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5919
0
        {
5920
0
        number = GET2(Fecode, 1);
5921
0
        condition = (number == RREF_ANY || number == Fcurrent_recurse);
5922
0
        }
5923
0
      break;
5924
5925
0
      case OP_DNRREF:       /* Duplicate named group recursion test */
5926
0
      if (Fcurrent_recurse != RECURSE_UNSET)
5927
0
        {
5928
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5929
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5930
0
        while (count-- > 0)
5931
0
          {
5932
0
          number = GET2(slot, 0);
5933
0
          condition = number == Fcurrent_recurse;
5934
0
          if (condition) break;
5935
0
          slot += mb->name_entry_size;
5936
0
          }
5937
0
        }
5938
0
      break;
5939
5940
0
      case OP_CREF:                         /* Numbered group used test */
5941
0
      offset = (GET2(Fecode, 1) << 1) - 2;  /* Doubled ref number */
5942
0
      condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5943
0
      break;
5944
5945
0
      case OP_DNCREF:      /* Duplicate named group used test */
5946
0
        {
5947
0
        int count = GET2(Fecode, 1 + IMM2_SIZE);
5948
0
        PCRE2_SPTR slot = mb->name_table + GET2(Fecode, 1) * mb->name_entry_size;
5949
0
        while (count-- > 0)
5950
0
          {
5951
0
          offset = (GET2(slot, 0) << 1) - 2;
5952
0
          condition = offset < Foffset_top && Fovector[offset] != PCRE2_UNSET;
5953
0
          if (condition) break;
5954
0
          slot += mb->name_entry_size;
5955
0
          }
5956
0
        }
5957
0
      break;
5958
5959
0
      case OP_FALSE:
5960
0
      case OP_FAIL:   /* The assertion (?!) becomes OP_FAIL */
5961
0
      break;
5962
5963
0
      case OP_TRUE:
5964
0
      condition = TRUE;
5965
0
      break;
5966
5967
      /* The condition is an assertion. Run code similar to the assertion code
5968
      above. */
5969
5970
0
#define Lpositive      F->temp_32[0]
5971
0
#define Lstart_branch  F->temp_sptr[0]
5972
5973
0
      default:
5974
0
      Lpositive = (*Fecode == OP_ASSERT || *Fecode == OP_ASSERTBACK);
5975
0
      Lstart_branch = Fecode;
5976
5977
0
      for (;;)
5978
0
        {
5979
0
        group_frame_type = GF_CONDASSERT | *Fecode;
5980
0
        RMATCH(Lstart_branch + PRIV(OP_lengths)[*Lstart_branch], RM5);
5981
5982
0
        switch(rrc)
5983
0
          {
5984
0
          case MATCH_ACCEPT:  /* Save captures */
5985
0
          memcpy(Fovector,
5986
0
                (char *)assert_accept_frame + offsetof(heapframe, ovector),
5987
0
                assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
5988
0
          Foffset_top = assert_accept_frame->offset_top;
5989
5990
          /* Fall through */
5991
          /* In the case of a match, the captures have already been put into
5992
          the current frame. */
5993
5994
0
          case MATCH_MATCH:
5995
0
          condition = Lpositive;   /* TRUE for positive assertion */
5996
0
          break;
5997
5998
          /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
5999
          assertion; it is therefore always treated as NOMATCH. */
6000
6001
0
          case MATCH_NOMATCH:
6002
0
          case MATCH_THEN:
6003
0
          Lstart_branch += GET(Lstart_branch, 1);
6004
0
          if (*Lstart_branch == OP_ALT) continue;  /* Try next branch */
6005
0
          condition = !Lpositive;  /* TRUE for negative assertion */
6006
0
          break;
6007
6008
          /* These force no match without checking other branches. */
6009
6010
0
          case MATCH_COMMIT:
6011
0
          case MATCH_SKIP:
6012
0
          case MATCH_PRUNE:
6013
0
          condition = !Lpositive;
6014
0
          break;
6015
6016
0
          default:
6017
0
          RRETURN(rrc);
6018
0
          }
6019
0
        break;  /* Out of the branch loop */
6020
0
        }
6021
6022
      /* If the condition is true, find the end of the assertion so that
6023
      advancing past it gets us to the start of the first branch. */
6024
6025
0
      if (condition)
6026
0
        {
6027
0
        do Fecode += GET(Fecode, 1); while (*Fecode == OP_ALT);
6028
0
        }
6029
0
      break;  /* End of assertion condition */
6030
0
      }
6031
6032
0
#undef Lpositive
6033
0
#undef Lstart_branch
6034
6035
    /* Choose branch according to the condition. */
6036
6037
0
    Fecode += condition? PRIV(OP_lengths)[*Fecode] : Flength;
6038
6039
    /* If the opcode is OP_SCOND it means we are at a repeated conditional
6040
    group that might match an empty string. We must therefore descend a level
6041
    so that the start is remembered for checking. For OP_COND we can just
6042
    continue at this level. */
6043
6044
0
    if (Fop == OP_SCOND)
6045
0
      {
6046
0
      group_frame_type  = GF_NOCAPTURE | Fop;
6047
0
      RMATCH(Fecode, RM35);
6048
0
      RRETURN(rrc);
6049
0
      }
6050
0
    break;
6051
6052
6053
6054
/* ========================================================================= */
6055
/*                  End of start of parenthesis opcodes                      */
6056
/* ========================================================================= */
6057
6058
6059
    /* ===================================================================== */
6060
    /* Move the subject pointer back by one fixed amount. This occurs at the
6061
    start of each branch that has a fixed length in a lookbehind assertion. If
6062
    we are too close to the start to move back, fail. When working with UTF-8
6063
    we move back a number of characters, not bytes. */
6064
6065
606
    case OP_REVERSE:
6066
606
    number = GET2(Fecode, 1);
6067
606
#ifdef SUPPORT_UNICODE
6068
606
    if (utf)
6069
0
      {
6070
      /* We used to do a simpler `while (number-- > 0)` but that triggers
6071
      clang's unsigned integer overflow sanitizer. */
6072
0
      while (number > 0)
6073
0
        {
6074
0
        --number;
6075
0
        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
6076
0
        Feptr--;
6077
0
        BACKCHAR(Feptr);
6078
0
        }
6079
0
      }
6080
606
    else
6081
606
#endif
6082
6083
    /* No UTF support, or not in UTF mode: count is code unit count */
6084
6085
606
      {
6086
606
      if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
6087
486
      Feptr -= number;
6088
486
      }
6089
6090
    /* Save the earliest consulted character, then skip to next opcode */
6091
6092
486
    if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
6093
486
    Fecode += 1 + IMM2_SIZE;
6094
486
    break;
6095
6096
6097
    /* ===================================================================== */
6098
    /* Move the subject pointer back by a variable amount. This occurs at the
6099
    start of each branch of a lookbehind assertion when the branch has a
6100
    variable, but limited, length. A loop is needed to try matching the branch
6101
    after moving back different numbers of characters. If we are too close to
6102
    the start to move back even the minimum amount, fail. When working with
6103
    UTF-8 we move back a number of characters, not bytes. */
6104
6105
4.30k
#define Lmin F->temp_32[0]
6106
5.38k
#define Lmax F->temp_32[1]
6107
710
#define Leptr F->temp_sptr[0]
6108
6109
710
    case OP_VREVERSE:
6110
710
    Lmin = GET2(Fecode, 1);
6111
710
    Lmax = GET2(Fecode, 1 + IMM2_SIZE);
6112
710
    Leptr = Feptr;
6113
6114
    /* Move back by the maximum branch length and then work forwards. This
6115
    ensures that items such as \d{3,5} get the maximum length, which is
6116
    relevant for captures, and makes for Perl compatibility. */
6117
6118
710
#ifdef SUPPORT_UNICODE
6119
710
    if (utf)
6120
0
      {
6121
0
      for (i = 0; i < Lmax; i++)
6122
0
        {
6123
0
        if (Feptr == mb->start_subject)
6124
0
          {
6125
0
          if (i < Lmin) RRETURN(MATCH_NOMATCH);
6126
0
          Lmax = i;
6127
0
          break;
6128
0
          }
6129
0
        Feptr--;
6130
0
        BACKCHAR(Feptr);
6131
0
        }
6132
0
      }
6133
710
    else
6134
710
#endif
6135
6136
    /* No UTF support or not in UTF mode */
6137
6138
710
      {
6139
710
      ptrdiff_t diff = Feptr - mb->start_subject;
6140
710
      uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? (int)diff : 0);
6141
710
      if (Lmin > available) RRETURN(MATCH_NOMATCH);
6142
630
      if (Lmax > available) Lmax = available;
6143
630
      Feptr -= Lmax;
6144
630
      }
6145
6146
    /* Now try matching, moving forward one character on failure, until we
6147
    reach the minimum back length. */
6148
6149
630
    for (;;)
6150
2.88k
      {
6151
2.88k
      RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
6152
2.88k
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6153
2.88k
      if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
6154
2.25k
      Feptr++;
6155
2.25k
#ifdef SUPPORT_UNICODE
6156
2.25k
      if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
6157
2.25k
#endif
6158
2.25k
      }
6159
0
    PCRE2_UNREACHABLE(); /* Control never reaches here */
6160
6161
0
#undef Lmin
6162
0
#undef Lmax
6163
0
#undef Leptr
6164
6165
    /* ===================================================================== */
6166
    /* An alternation is the end of a branch; scan along to find the end of the
6167
    bracketed group. */
6168
6169
2.47M
    case OP_ALT:
6170
2.47M
    branch_end = Fecode;
6171
13.4M
    do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
6172
2.47M
    break;
6173
6174
6175
    /* ===================================================================== */
6176
    /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
6177
    starting frame was added to the chained frames in order to remember the
6178
    starting subject position for the group. (Not true for OP_BRA when it's a
6179
    whole pattern recursion, but that is handled separately below.)*/
6180
6181
2.48M
    case OP_KET:
6182
2.48M
    case OP_KETRMIN:
6183
2.48M
    case OP_KETRMAX:
6184
2.48M
    case OP_KETRPOS:
6185
6186
2.48M
    bracode = Fecode - GET(Fecode, 1);
6187
6188
2.48M
    if (branch_end == NULL) branch_end = Fecode;
6189
2.48M
    branch_start = bracode;
6190
14.7M
    while (branch_start + GET(branch_start, 1) != branch_end)
6191
12.2M
      branch_start += GET(branch_start, 1);
6192
2.48M
    branch_end = NULL;
6193
6194
    /* Point N to the frame at the start of the most recent group, and P to its
6195
    predecessor. Remember the subject pointer at the start of the group. */
6196
6197
2.48M
    if (*bracode != OP_BRA && *bracode != OP_COND)
6198
2.48M
      {
6199
2.48M
      N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
6200
2.48M
      P = (heapframe *)((char *)N - frame_size);
6201
2.48M
      Flast_group_offset = P->last_group_offset;
6202
6203
#ifdef DEBUG_SHOW_RMATCH
6204
      fprintf(stderr, "++ KET for frame=%d type=%x prev char offset=%lu\n",
6205
        N->rdepth, N->group_frame_type,
6206
        (char *)P->eptr - (char *)mb->start_subject);
6207
#endif
6208
6209
      /* If we are at the end of an assertion that is a condition, first check
6210
      to see if we are at the end of a variable-length branch in a lookbehind.
6211
      If this is the case and we have not landed on the current character,
6212
      return no match. Compare code below for non-condition lookbehinds. In
6213
      other cases, return a match, discarding any intermediate backtracking
6214
      points. Copy back the mark setting and the captures into the frame before
6215
      N so that they are set on return. Doing this for all assertions, both
6216
      positive and negative, seems to match what Perl does. */
6217
6218
2.48M
      if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
6219
0
        {
6220
0
        if ((*bracode == OP_ASSERTBACK || *bracode == OP_ASSERTBACK_NOT) &&
6221
0
            branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6222
0
          RRETURN(MATCH_NOMATCH);
6223
0
        memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
6224
0
          Foffset_top * sizeof(PCRE2_SIZE));
6225
0
        P->offset_top = Foffset_top;
6226
0
        P->mark = Fmark;
6227
0
        Fback_frame = (char *)F - (char *)P;
6228
0
        RRETURN(MATCH_MATCH);
6229
0
        }
6230
2.48M
      }
6231
4.58k
    else P = NULL;   /* Indicates starting frame not recorded */
6232
6233
    /* The group was not a conditional assertion. */
6234
6235
2.48M
    switch (*bracode)
6236
2.48M
      {
6237
      /* Whole pattern recursion is handled as a recursion into group 0, but
6238
      the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
6239
      group - a design mistake: it should perhaps have been capture group 0.
6240
      Anyway, that means the end of such recursion must be handled here. It is
6241
      detected by checking for an immediately following OP_END when we are
6242
      recursing in group 0. If this is not the end of a whole-pattern
6243
      recursion, there is nothing to be done. */
6244
6245
4.58k
      case OP_BRA:
6246
4.58k
      if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
6247
6248
      /* It is the end of whole-pattern recursion. */
6249
6250
0
      offset = Flast_group_offset;
6251
6252
      /* Corrupted heapframes?. Trigger an assert and return an error */
6253
0
      PCRE2_ASSERT(offset != PCRE2_UNSET);
6254
0
      if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
6255
6256
0
      N = (heapframe *)((char *)match_data->heapframes + offset);
6257
0
      P = (heapframe *)((char *)N - frame_size);
6258
0
      Flast_group_offset = P->last_group_offset;
6259
6260
      /* Reinstate the previous set of captures and then carry on after the
6261
      recursion call. */
6262
6263
0
      memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6264
0
        Foffset_top * sizeof(PCRE2_SIZE));
6265
0
      Foffset_top = P->offset_top;
6266
0
      Fcapture_last = P->capture_last;
6267
0
      Fcurrent_recurse = P->current_recurse;
6268
0
      Fecode = P->ecode + 1 + LINK_SIZE;
6269
0
      continue;  /* With next opcode */
6270
6271
0
      case OP_COND:     /* No need to do anything for these */
6272
0
      case OP_SCOND:
6273
0
      break;
6274
6275
      /* Non-atomic positive assertions are like OP_BRA, except that the
6276
      subject pointer must be put back to where it was at the start of the
6277
      assertion. For a variable lookbehind, check its end point. */
6278
6279
30
      case OP_ASSERTBACK_NA:
6280
30
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6281
0
        RRETURN(MATCH_NOMATCH);
6282
      /* Fall through */
6283
6284
30
      case OP_ASSERT_NA:
6285
30
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6286
30
      Feptr = P->eptr;
6287
30
      break;
6288
6289
      /* Atomic positive assertions are like OP_ONCE, except that in addition
6290
      the subject pointer must be put back to where it was at the start of the
6291
      assertion. For a variable lookbehind, check its end point. */
6292
6293
112
      case OP_ASSERTBACK:
6294
112
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6295
0
        RRETURN(MATCH_NOMATCH);
6296
      /* Fall through */
6297
6298
112
      case OP_ASSERT:
6299
112
      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6300
112
      Feptr = P->eptr;
6301
      /* Fall through */
6302
6303
      /* For an atomic group, discard internal backtracking points. We must
6304
      also ensure that any remaining branches within the top-level of the group
6305
      are not tried. Do this by adjusting the code pointer within the backtrack
6306
      frame so that it points to the final branch. */
6307
6308
112
      case OP_ONCE:
6309
112
      Fback_frame = ((char *)F - (char *)P);
6310
112
      for (;;)
6311
602
        {
6312
602
        uint32_t y = GET(P->ecode,1);
6313
602
        if ((P->ecode)[y] != OP_ALT) break;
6314
490
        P->ecode += y;
6315
490
        }
6316
112
      break;
6317
6318
      /* A matching negative assertion returns MATCH, which is turned into
6319
      NOMATCH at the assertion level. For a variable lookbehind, check its end
6320
      point. */
6321
6322
0
      case OP_ASSERTBACK_NOT:
6323
0
      if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
6324
0
        RRETURN(MATCH_NOMATCH);
6325
      /* Fall through */
6326
6327
28
      case OP_ASSERT_NOT:
6328
28
      RRETURN(MATCH_MATCH);
6329
6330
      /* A scan substring group must preserve the current end_subject,
6331
      and restore it before the backtracking is performed into its sub
6332
      pattern. */
6333
6334
0
      case OP_ASSERT_SCS:
6335
0
      F->temp_sptr[0] = mb->end_subject;
6336
0
      mb->end_subject = P->temp_sptr[0];
6337
0
      mb->true_end_subject = mb->end_subject + P->temp_size;
6338
0
      Feptr = P->temp_sptr[1];
6339
6340
0
      RMATCH(Fecode + 1 + LINK_SIZE, RM39);
6341
6342
0
      mb->end_subject = F->temp_sptr[0];
6343
0
      mb->true_end_subject = mb->end_subject;
6344
0
      RRETURN(rrc);
6345
0
      break;
6346
6347
      /* At the end of a script run, apply the script-checking rules. This code
6348
      will never by exercised if Unicode support it not compiled, because in
6349
      that environment script runs cause an error at compile time. */
6350
6351
0
      case OP_SCRIPT_RUN:
6352
0
      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
6353
0
      break;
6354
6355
      /* Whole-pattern recursion is coded as a recurse into group 0, and is
6356
      handled with OP_BRA above. Other recursion is handled here. */
6357
6358
2.47M
      case OP_CBRA:
6359
2.47M
      case OP_CBRAPOS:
6360
2.48M
      case OP_SCBRA:
6361
2.48M
      case OP_SCBRAPOS:
6362
2.48M
      number = GET2(bracode, 1+LINK_SIZE);
6363
6364
      /* Handle a recursively called group. We reinstate the previous set of
6365
      captures and then carry on after the recursion call. */
6366
6367
2.48M
      if (Fcurrent_recurse == number)
6368
19
        {
6369
19
        P = (heapframe *)((char *)N - frame_size);
6370
19
        memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
6371
19
          Foffset_top * sizeof(PCRE2_SIZE));
6372
19
        Foffset_top = P->offset_top;
6373
19
        Fcapture_last = P->capture_last;
6374
19
        Fcurrent_recurse = P->current_recurse;
6375
19
        Fecode = P->ecode + 1 + LINK_SIZE;
6376
19
        continue;  /* With next opcode */
6377
19
        }
6378
6379
      /* Deal with actual capturing. */
6380
6381
2.48M
      offset = (number << 1) - 2;
6382
2.48M
      Fcapture_last = number;
6383
2.48M
      Fovector[offset] = P->eptr - mb->start_subject;
6384
2.48M
      Fovector[offset+1] = Feptr - mb->start_subject;
6385
2.48M
      if (offset >= Foffset_top) Foffset_top = offset + 2;
6386
2.48M
      break;
6387
2.48M
      }  /* End actions relating to the starting opcode */
6388
6389
    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
6390
    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
6391
    at a time from the outer level. This must precede the empty string test -
6392
    in this case that test is done at the outer level. */
6393
6394
2.48M
    if (*Fecode == OP_KETRPOS)
6395
1.38k
      {
6396
1.38k
      memcpy((char *)P + offsetof(heapframe, eptr),
6397
1.38k
             (char *)F + offsetof(heapframe, eptr),
6398
1.38k
             frame_copy_size);
6399
1.38k
      RRETURN(MATCH_KETRPOS);
6400
1.38k
      }
6401
6402
    /* Handle the different kinds of closing brackets. A non-repeating ket
6403
    needs no special action, just continuing at this level. This also happens
6404
    for the repeating kets if the group matched no characters, in order to
6405
    forcibly break infinite loops. Otherwise, the repeating kets try the rest
6406
    of the pattern or restart from the preceding bracket, in the appropriate
6407
    order. */
6408
6409
2.48M
    if (Fop != OP_KET && (P == NULL || Feptr != P->eptr))
6410
175
      {
6411
175
      if (Fop == OP_KETRMIN)
6412
0
        {
6413
0
        RMATCH(Fecode + 1 + LINK_SIZE, RM6);
6414
0
        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6415
0
        Fecode -= GET(Fecode, 1);
6416
0
        break;   /* End of ket processing */
6417
0
        }
6418
6419
      /* Repeat the maximum number of times (KETRMAX) */
6420
6421
175
      RMATCH(bracode, RM7);
6422
175
      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6423
175
      }
6424
6425
    /* Carry on at this level for a non-repeating ket, or after matching an
6426
    empty string, or after repeating for a maximum number of times. */
6427
6428
2.48M
    Fecode += 1 + LINK_SIZE;
6429
2.48M
    break;
6430
6431
6432
    /* ===================================================================== */
6433
    /* Start and end of line assertions, not multiline mode. */
6434
6435
344k
    case OP_CIRC:   /* Start of line, unless PCRE2_NOTBOL is set. */
6436
344k
    if (Feptr != mb->start_subject || (mb->moptions & PCRE2_NOTBOL) != 0)
6437
344k
      RRETURN(MATCH_NOMATCH);
6438
446
    Fecode++;
6439
446
    break;
6440
6441
243k
    case OP_SOD:    /* Unconditional start of subject */
6442
243k
    if (Feptr != mb->start_subject) RRETURN(MATCH_NOMATCH);
6443
14
    Fecode++;
6444
14
    break;
6445
6446
    /* When PCRE2_NOTEOL is unset, assert before the subject end, or a
6447
    terminating newline unless PCRE2_DOLLAR_ENDONLY is set. */
6448
6449
183k
    case OP_DOLL:
6450
183k
    if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6451
183k
    if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
6452
6453
    /* Fall through */
6454
    /* Unconditional end of subject assertion (\z). */
6455
6456
1.23k
    case OP_EOD:
6457
1.23k
    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
6458
270
    if (mb->partial != 0)
6459
0
      {
6460
0
      mb->hitend = TRUE;
6461
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6462
0
      }
6463
270
    Fecode++;
6464
270
    break;
6465
6466
    /* End of subject or ending \n assertion (\Z) */
6467
6468
383
    case OP_EODN:
6469
184k
    ASSERT_NL_OR_EOS:
6470
184k
    if (Feptr < mb->true_end_subject &&
6471
184k
        (!IS_NEWLINE(Feptr) || Feptr != mb->true_end_subject - mb->nllen))
6472
184k
      {
6473
184k
      if (mb->partial != 0 &&
6474
184k
          Feptr + 1 >= mb->end_subject &&
6475
184k
          NLBLOCK->nltype == NLTYPE_FIXED &&
6476
184k
          NLBLOCK->nllen == 2 &&
6477
184k
          UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6478
0
        {
6479
0
        mb->hitend = TRUE;
6480
0
        if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6481
0
        }
6482
184k
      RRETURN(MATCH_NOMATCH);
6483
184k
      }
6484
6485
    /* Either at end of string or \n before end. */
6486
6487
24
    if (mb->partial != 0)
6488
0
      {
6489
0
      mb->hitend = TRUE;
6490
0
      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6491
0
      }
6492
24
    Fecode++;
6493
24
    break;
6494
6495
6496
    /* ===================================================================== */
6497
    /* Start and end of line assertions, multiline mode. */
6498
6499
    /* Start of subject unless notbol, or after any newline except for one at
6500
    the very end, unless PCRE2_ALT_CIRCUMFLEX is set. */
6501
6502
7.96k
    case OP_CIRCM:
6503
7.96k
    if ((mb->moptions & PCRE2_NOTBOL) != 0 && Feptr == mb->start_subject)
6504
0
      RRETURN(MATCH_NOMATCH);
6505
7.96k
    if (Feptr != mb->start_subject &&
6506
7.96k
        ((Feptr == mb->end_subject &&
6507
7.92k
           (mb->poptions & PCRE2_ALT_CIRCUMFLEX) == 0) ||
6508
7.92k
         !WAS_NEWLINE(Feptr)))
6509
7.69k
      RRETURN(MATCH_NOMATCH);
6510
273
    Fecode++;
6511
273
    break;
6512
6513
    /* Assert before any newline, or before end of subject unless noteol is
6514
    set. */
6515
6516
158k
    case OP_DOLLM:
6517
158k
    if (Feptr < mb->end_subject)
6518
158k
      {
6519
158k
      if (!IS_NEWLINE(Feptr))
6520
156k
        {
6521
156k
        if (mb->partial != 0 &&
6522
156k
            Feptr + 1 >= mb->end_subject &&
6523
156k
            NLBLOCK->nltype == NLTYPE_FIXED &&
6524
156k
            NLBLOCK->nllen == 2 &&
6525
156k
            UCHAR21TEST(Feptr) == NLBLOCK->nl[0])
6526
0
          {
6527
0
          mb->hitend = TRUE;
6528
0
          if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
6529
0
          }
6530
156k
        RRETURN(MATCH_NOMATCH);
6531
156k
        }
6532
158k
      }
6533
603
    else
6534
603
      {
6535
603
      if ((mb->moptions & PCRE2_NOTEOL) != 0) RRETURN(MATCH_NOMATCH);
6536
603
      SCHECK_PARTIAL();
6537
603
      }
6538
2.24k
    Fecode++;
6539
2.24k
    break;
6540
6541
6542
    /* ===================================================================== */
6543
    /* Start of match assertion */
6544
6545
1.03k
    case OP_SOM:
6546
1.03k
    if (Feptr != mb->start_subject + mb->start_offset) RRETURN(MATCH_NOMATCH);
6547
11
    Fecode++;
6548
11
    break;
6549
6550
6551
    /* ===================================================================== */
6552
    /* Reset the start of match point */
6553
6554
0
    case OP_SET_SOM:
6555
0
    Fstart_match = Feptr;
6556
0
    Fecode++;
6557
0
    break;
6558
6559
6560
    /* ===================================================================== */
6561
    /* Word boundary assertions. Find out if the previous and current
6562
    characters are "word" characters. It takes a bit more work in UTF mode.
6563
    Characters > 255 are assumed to be "non-word" characters when PCRE2_UCP is
6564
    not set. When it is set, use Unicode properties if available, even when not
6565
    in UTF mode. Remember the earliest and latest consulted characters. */
6566
6567
121k
    case OP_NOT_WORD_BOUNDARY:
6568
123k
    case OP_WORD_BOUNDARY:
6569
136k
    case OP_NOT_UCP_WORD_BOUNDARY:
6570
140k
    case OP_UCP_WORD_BOUNDARY:
6571
140k
    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
6572
139k
      {
6573
139k
      PCRE2_SPTR lastptr = Feptr - 1;
6574
139k
#ifdef SUPPORT_UNICODE
6575
139k
      if (utf)
6576
16.6k
        {
6577
16.6k
        BACKCHAR(lastptr);
6578
16.6k
        GETCHAR(fc, lastptr);
6579
16.6k
        }
6580
123k
      else
6581
123k
#endif  /* SUPPORT_UNICODE */
6582
123k
      fc = *lastptr;
6583
139k
      if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
6584
139k
#ifdef SUPPORT_UNICODE
6585
139k
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6586
16.6k
        {
6587
16.6k
        int chartype = UCD_CHARTYPE(fc);
6588
16.6k
        int category = PRIV(ucp_gentype)[chartype];
6589
16.6k
        prev_is_word = (category == ucp_L || category == ucp_N ||
6590
16.6k
          chartype == ucp_Mn || chartype == ucp_Pc);
6591
16.6k
        }
6592
123k
      else
6593
123k
#endif  /* SUPPORT_UNICODE */
6594
123k
      prev_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6595
139k
      }
6596
6597
    /* Get status of next character */
6598
6599
140k
    if (Feptr >= mb->end_subject)
6600
83
      {
6601
83
      SCHECK_PARTIAL();
6602
83
      cur_is_word = FALSE;
6603
83
      }
6604
140k
    else
6605
140k
      {
6606
140k
      PCRE2_SPTR nextptr = Feptr + 1;
6607
140k
#ifdef SUPPORT_UNICODE
6608
140k
      if (utf)
6609
16.8k
        {
6610
16.8k
        FORWARDCHARTEST(nextptr, mb->end_subject);
6611
16.8k
        GETCHAR(fc, Feptr);
6612
16.8k
        }
6613
123k
      else
6614
123k
#endif  /* SUPPORT_UNICODE */
6615
123k
      fc = *Feptr;
6616
140k
      if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
6617
140k
#ifdef SUPPORT_UNICODE
6618
140k
      if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
6619
16.8k
        {
6620
16.8k
        int chartype = UCD_CHARTYPE(fc);
6621
16.8k
        int category = PRIV(ucp_gentype)[chartype];
6622
16.8k
        cur_is_word = (category == ucp_L || category == ucp_N ||
6623
16.8k
          chartype == ucp_Mn || chartype == ucp_Pc);
6624
16.8k
        }
6625
123k
      else
6626
123k
#endif  /* SUPPORT_UNICODE */
6627
123k
      cur_is_word = CHMAX_255(fc) && (mb->ctypes[fc] & ctype_word) != 0;
6628
140k
      }
6629
6630
    /* Now see if the situation is what we want */
6631
6632
140k
    if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
6633
134k
         cur_is_word == prev_is_word : cur_is_word != prev_is_word)
6634
27.3k
      RRETURN(MATCH_NOMATCH);
6635
112k
    break;
6636
6637
6638
    /* ===================================================================== */
6639
    /* Backtracking (*VERB)s, with and without arguments. Note that if the
6640
    pattern is successfully matched, we do not come back from RMATCH. */
6641
6642
112k
    case OP_MARK:
6643
1.47k
    Fmark = mb->nomatch_mark = Fecode + 2;
6644
1.47k
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM12);
6645
6646
    /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
6647
    argument, and we must check whether that argument matches this MARK's
6648
    argument. It is passed back in mb->verb_skip_ptr. If it does match, we
6649
    return MATCH_SKIP with mb->verb_skip_ptr now pointing to the subject
6650
    position that corresponds to this mark. Otherwise, pass back the return
6651
    code unaltered. */
6652
6653
1.47k
    if (rrc == MATCH_SKIP_ARG &&
6654
1.47k
             PRIV(strcmp)(Fecode + 2, mb->verb_skip_ptr) == 0)
6655
0
      {
6656
0
      mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6657
0
      RRETURN(MATCH_SKIP);
6658
0
      }
6659
1.47k
    RRETURN(rrc);
6660
6661
0
    case OP_FAIL:
6662
0
    RRETURN(MATCH_NOMATCH);
6663
6664
    /* Record the current recursing group number in mb->verb_current_recurse
6665
    when a backtracking return such as MATCH_COMMIT is given. This enables the
6666
    recurse processing to catch verbs from within the recursion. */
6667
6668
0
    case OP_COMMIT:
6669
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM13);
6670
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6671
0
    mb->verb_current_recurse = Fcurrent_recurse;
6672
0
    RRETURN(MATCH_COMMIT);
6673
6674
0
    case OP_COMMIT_ARG:
6675
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6676
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
6677
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6678
0
    mb->verb_current_recurse = Fcurrent_recurse;
6679
0
    RRETURN(MATCH_COMMIT);
6680
6681
0
    case OP_PRUNE:
6682
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
6683
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6684
0
    mb->verb_current_recurse = Fcurrent_recurse;
6685
0
    RRETURN(MATCH_PRUNE);
6686
6687
0
    case OP_PRUNE_ARG:
6688
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6689
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM15);
6690
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6691
0
    mb->verb_current_recurse = Fcurrent_recurse;
6692
0
    RRETURN(MATCH_PRUNE);
6693
6694
0
    case OP_SKIP:
6695
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM16);
6696
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6697
0
    mb->verb_skip_ptr = Feptr;   /* Pass back current position */
6698
0
    mb->verb_current_recurse = Fcurrent_recurse;
6699
0
    RRETURN(MATCH_SKIP);
6700
6701
    /* Note that, for Perl compatibility, SKIP with an argument does NOT set
6702
    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
6703
    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
6704
    that failed and any that precede it (either they also failed, or were not
6705
    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
6706
    SKIP_ARG gets to top level, the match is re-run with mb->ignore_skip_arg
6707
    set to the count of the one that failed. */
6708
6709
0
    case OP_SKIP_ARG:
6710
0
    mb->skip_arg_count++;
6711
0
    if (mb->skip_arg_count <= mb->ignore_skip_arg)
6712
0
      {
6713
0
      Fecode += PRIV(OP_lengths)[*Fecode] + Fecode[1];
6714
0
      break;
6715
0
      }
6716
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM17);
6717
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6718
6719
    /* Pass back the current skip name and return the special MATCH_SKIP_ARG
6720
    return code. This will either be caught by a matching MARK, or get to the
6721
    top, where it causes a rematch with mb->ignore_skip_arg set to the value of
6722
    mb->skip_arg_count. */
6723
6724
0
    mb->verb_skip_ptr = Fecode + 2;
6725
0
    mb->verb_current_recurse = Fcurrent_recurse;
6726
0
    RRETURN(MATCH_SKIP_ARG);
6727
6728
    /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
6729
    the branch in which it occurs can be determined. */
6730
6731
0
    case OP_THEN:
6732
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM18);
6733
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6734
0
    mb->verb_ecode_ptr = Fecode;
6735
0
    mb->verb_current_recurse = Fcurrent_recurse;
6736
0
    RRETURN(MATCH_THEN);
6737
6738
0
    case OP_THEN_ARG:
6739
0
    Fmark = mb->nomatch_mark = Fecode + 2;
6740
0
    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM19);
6741
0
    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6742
0
    mb->verb_ecode_ptr = Fecode;
6743
0
    mb->verb_current_recurse = Fcurrent_recurse;
6744
0
    RRETURN(MATCH_THEN);
6745
6746
6747
    /* ===================================================================== */
6748
    /* There's been some horrible disaster. Arrival here can only mean there is
6749
    something seriously wrong in the code above or the OP_xxx definitions. */
6750
6751
0
    default:
6752
0
    PCRE2_DEBUG_UNREACHABLE();
6753
0
    return PCRE2_ERROR_INTERNAL;
6754
572M
    }
6755
6756
  /* Do not insert any code in here without much thought; it is assumed
6757
  that "continue" in the code above comes out to here to repeat the main
6758
  loop. */
6759
6760
572M
  }  /* End of main loop */
6761
6762
0
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
6763
6764
/* ========================================================================= */
6765
/* The RRETURN() macro jumps here. The number that is saved in Freturn_id
6766
indicates which label we actually want to return to. The value in Frdepth is
6767
the index number of the frame in the vector. The return value has been placed
6768
in rrc. */
6769
6770
427M
#define LBL(val) case val: goto L_RM##val;
6771
6772
427M
RETURN_SWITCH:
6773
427M
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
6774
427M
if (Frdepth == 0) return rrc;                     /* Exit from the top level */
6775
427M
F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
6776
427M
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
6777
6778
#ifdef DEBUG_SHOW_RMATCH
6779
fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
6780
#endif
6781
6782
427M
switch (Freturn_id)
6783
427M
  {
6784
7.25M
  LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6785
1.00M
  LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
6786
2.64M
  LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
6787
6.58M
  LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
6788
44.0M
  LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39)
6789
6790
0
#ifdef SUPPORT_WIDE_CHARS
6791
625k
  LBL(100) LBL(101) LBL(102) LBL(103)
6792
0
#endif
6793
6794
0
#ifdef SUPPORT_UNICODE
6795
269k
  LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
6796
127k
  LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
6797
329M
  LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
6798
4.83M
  LBL(221) LBL(222) LBL(223) LBL(224)
6799
0
#endif
6800
6801
0
  default:
6802
0
  PCRE2_DEBUG_UNREACHABLE();
6803
0
  return PCRE2_ERROR_INTERNAL;
6804
427M
  }
6805
427M
#undef LBL
6806
427M
}
6807
6808
6809
/*************************************************
6810
*           Match a Regular Expression           *
6811
*************************************************/
6812
6813
/* This function applies a compiled pattern to a subject string and picks out
6814
portions of the string if it matches. Two elements in the vector are set for
6815
each substring: the offsets to the start and end of the substring.
6816
6817
Arguments:
6818
  code            points to the compiled expression
6819
  subject         points to the subject string
6820
  length          length of subject string (may contain binary zeros)
6821
  start_offset    where to start in the subject string
6822
  options         option bits
6823
  match_data      points to a match_data block
6824
  mcontext        points a PCRE2 context
6825
6826
Returns:          > 0 => success; value is the number of ovector pairs filled
6827
                  = 0 => success, but ovector is not big enough
6828
                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
6829
                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
6830
                  < -2 => some kind of unexpected problem
6831
*/
6832
6833
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
6834
pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
6835
  PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
6836
  pcre2_match_context *mcontext)
6837
4.32k
{
6838
4.32k
int rc;
6839
4.32k
int was_zero_terminated = 0;
6840
4.32k
const uint8_t *start_bits = NULL;
6841
4.32k
const pcre2_real_code *re = (const pcre2_real_code *)code;
6842
6843
4.32k
BOOL anchored;
6844
4.32k
BOOL firstline;
6845
4.32k
BOOL has_first_cu = FALSE;
6846
4.32k
BOOL has_req_cu = FALSE;
6847
4.32k
BOOL startline;
6848
6849
4.32k
#if PCRE2_CODE_UNIT_WIDTH == 8
6850
4.32k
PCRE2_SPTR memchr_found_first_cu;
6851
4.32k
PCRE2_SPTR memchr_found_first_cu2;
6852
4.32k
#endif
6853
6854
4.32k
PCRE2_UCHAR first_cu = 0;
6855
4.32k
PCRE2_UCHAR first_cu2 = 0;
6856
4.32k
PCRE2_UCHAR req_cu = 0;
6857
4.32k
PCRE2_UCHAR req_cu2 = 0;
6858
6859
4.32k
PCRE2_SPTR bumpalong_limit;
6860
4.32k
PCRE2_SPTR end_subject;
6861
4.32k
PCRE2_SPTR true_end_subject;
6862
4.32k
PCRE2_SPTR start_match;
6863
4.32k
PCRE2_SPTR req_cu_ptr;
6864
4.32k
PCRE2_SPTR start_partial;
6865
4.32k
PCRE2_SPTR match_partial;
6866
6867
#ifdef SUPPORT_JIT
6868
BOOL use_jit;
6869
#endif
6870
6871
/* This flag is needed even when Unicode is not supported for convenience
6872
(it is used by the IS_NEWLINE macro). */
6873
6874
4.32k
BOOL utf = FALSE;
6875
6876
4.32k
#ifdef SUPPORT_UNICODE
6877
4.32k
BOOL ucp = FALSE;
6878
4.32k
BOOL allow_invalid;
6879
4.32k
uint32_t fragment_options = 0;
6880
#ifdef SUPPORT_JIT
6881
BOOL jit_checked_utf = FALSE;
6882
#endif
6883
4.32k
#endif  /* SUPPORT_UNICODE */
6884
6885
4.32k
PCRE2_SIZE frame_size;
6886
4.32k
PCRE2_SIZE heapframes_size;
6887
6888
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
6889
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
6890
6891
4.32k
pcre2_callout_block cb;
6892
4.32k
match_block actual_match_block;
6893
4.32k
match_block *mb = &actual_match_block;
6894
6895
/* Recognize NULL, length 0 as an empty string. */
6896
6897
4.32k
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
6898
6899
/* Plausibility checks */
6900
6901
4.32k
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
6902
4.32k
if (code == NULL || subject == NULL || match_data == NULL)
6903
0
  return PCRE2_ERROR_NULL;
6904
6905
4.32k
start_match = subject + start_offset;
6906
4.32k
req_cu_ptr = start_match - 1;
6907
4.32k
if (length == PCRE2_ZERO_TERMINATED)
6908
0
  {
6909
0
  length = PRIV(strlen)(subject);
6910
0
  was_zero_terminated = 1;
6911
0
  }
6912
4.32k
true_end_subject = end_subject = subject + length;
6913
6914
4.32k
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
6915
6916
/* Check that the first field in the block is the magic number. */
6917
6918
4.32k
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
6919
6920
/* Check the code unit width. */
6921
6922
4.32k
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
6923
0
  return PCRE2_ERROR_BADMODE;
6924
6925
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
6926
options variable for this function. Users of PCRE2 who are not calling the
6927
function directly would like to have a way of setting these flags, in the same
6928
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
6929
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
6930
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which we now
6931
transfer to the options for this function. The bits are guaranteed to be
6932
adjacent, but do not have the same values. This bit of Boolean trickery assumes
6933
that the match-time bits are not more significant than the flag bits. If by
6934
accident this is not the case, a compile-time division by zero error will
6935
occur. */
6936
6937
12.9k
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
6938
8.65k
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
6939
4.32k
options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
6940
4.32k
#undef FF
6941
4.32k
#undef OO
6942
6943
/* If the pattern was successfully studied with JIT support, we will run the
6944
JIT executable instead of the rest of this function. Most options must be set
6945
at compile time for the JIT code to be usable. */
6946
6947
#ifdef SUPPORT_JIT
6948
use_jit = (re->executable_jit != NULL &&
6949
          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
6950
#endif
6951
6952
/* Initialize UTF/UCP parameters. */
6953
6954
4.32k
#ifdef SUPPORT_UNICODE
6955
4.32k
utf = (re->overall_options & PCRE2_UTF) != 0;
6956
4.32k
allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
6957
4.32k
ucp = (re->overall_options & PCRE2_UCP) != 0;
6958
4.32k
#endif  /* SUPPORT_UNICODE */
6959
6960
/* Convert the partial matching flags into an integer. */
6961
6962
4.32k
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
6963
4.32k
              ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
6964
6965
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
6966
time. */
6967
6968
4.32k
if (mb->partial != 0 &&
6969
4.32k
   ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
6970
0
  return PCRE2_ERROR_BADOPTION;
6971
6972
/* It is an error to set an offset limit without setting the flag at compile
6973
time. */
6974
6975
4.32k
if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
6976
4.32k
     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
6977
0
  return PCRE2_ERROR_BADOFFSETLIMIT;
6978
6979
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
6980
free the memory that was obtained. Set the field to NULL for no match cases. */
6981
6982
4.32k
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
6983
0
  {
6984
0
  match_data->memctl.free((void *)match_data->subject,
6985
0
    match_data->memctl.memory_data);
6986
0
  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
6987
0
  }
6988
4.32k
match_data->subject = NULL;
6989
6990
/* Zero the error offset in case the first code unit is invalid UTF. */
6991
6992
4.32k
match_data->startchar = 0;
6993
6994
6995
/* ============================= JIT matching ============================== */
6996
6997
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
6998
requested or invalid UTF can be handled. We check only the portion of the
6999
subject that might be be inspected during matching - from the offset minus the
7000
maximum lookbehind to the given length. This saves time when a small part of a
7001
large subject is being matched by the use of a starting offset. Note that the
7002
maximum lookbehind is a number of characters, not code units. */
7003
7004
#ifdef SUPPORT_JIT
7005
if (use_jit)
7006
  {
7007
#ifdef SUPPORT_UNICODE
7008
  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
7009
    {
7010
7011
    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7012
    character start. */
7013
7014
#if PCRE2_CODE_UNIT_WIDTH != 32
7015
    if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7016
      {
7017
      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7018
#if PCRE2_CODE_UNIT_WIDTH == 8
7019
      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7020
#else
7021
      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7022
#endif
7023
      }
7024
#endif  /* WIDTH != 32 */
7025
7026
    /* Move back by the maximum lookbehind, just in case it happens at the very
7027
    start of matching. */
7028
7029
#if PCRE2_CODE_UNIT_WIDTH != 32
7030
    for (unsigned int i = re->max_lookbehind; i > 0 && start_match > subject; i--)
7031
      {
7032
      start_match--;
7033
      while (start_match > subject &&
7034
#if PCRE2_CODE_UNIT_WIDTH == 8
7035
      (*start_match & 0xc0) == 0x80)
7036
#else  /* 16-bit */
7037
      (*start_match & 0xfc00) == 0xdc00)
7038
#endif
7039
        start_match--;
7040
      }
7041
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7042
7043
    /* In the 32-bit library, one code unit equals one character. However,
7044
    we cannot just subtract the lookbehind and then compare pointers, because
7045
    a very large lookbehind could create an invalid pointer. */
7046
7047
    if (start_offset >= re->max_lookbehind)
7048
      start_match -= re->max_lookbehind;
7049
    else
7050
      start_match = subject;
7051
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7052
7053
    /* Validate the relevant portion of the subject. Adjust the offset of an
7054
    invalid code point to be an absolute offset in the whole string. */
7055
7056
    match_data->rc = PRIV(valid_utf)(start_match,
7057
      length - (start_match - subject), &(match_data->startchar));
7058
    if (match_data->rc != 0)
7059
      {
7060
      match_data->startchar += start_match - subject;
7061
      return match_data->rc;
7062
      }
7063
    jit_checked_utf = TRUE;
7064
    }
7065
#endif  /* SUPPORT_UNICODE */
7066
7067
  /* If JIT returns BADOPTION, which means that the selected complete or
7068
  partial matching mode was not compiled, fall through to the interpreter. */
7069
7070
  rc = pcre2_jit_match(code, subject, length, start_offset, options,
7071
    match_data, mcontext);
7072
  if (rc != PCRE2_ERROR_JIT_BADOPTION)
7073
    {
7074
    match_data->subject_length = length;
7075
    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
7076
      {
7077
      length = CU2BYTES(length + was_zero_terminated);
7078
      match_data->subject = match_data->memctl.malloc(length,
7079
        match_data->memctl.memory_data);
7080
      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
7081
      memcpy((void *)match_data->subject, subject, length);
7082
      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
7083
      }
7084
    return rc;
7085
    }
7086
  }
7087
#endif  /* SUPPORT_JIT */
7088
7089
/* ========================= End of JIT matching ========================== */
7090
7091
7092
/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
7093
start of the subject. A UTF check when there is a non-zero offset may change
7094
this. */
7095
7096
4.32k
mb->check_subject = subject;
7097
7098
/* If a UTF subject string was not checked for validity in the JIT code above,
7099
check it here, and handle support for invalid UTF strings. The check above
7100
happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
7101
If we get here in those circumstances, it means the subject string is valid,
7102
but for some reason JIT matching was not successful. There is no need to check
7103
the subject again.
7104
7105
We check only the portion of the subject that might be be inspected during
7106
matching - from the offset minus the maximum lookbehind to the given length.
7107
This saves time when a small part of a large subject is being matched by the
7108
use of a starting offset. Note that the maximum lookbehind is a number of
7109
characters, not code units.
7110
7111
Note also that support for invalid UTF forces a check, overriding the setting
7112
of PCRE2_NO_CHECK_UTF. */
7113
7114
4.32k
#ifdef SUPPORT_UNICODE
7115
4.32k
if (utf &&
7116
#ifdef SUPPORT_JIT
7117
    !jit_checked_utf &&
7118
#endif
7119
4.32k
    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
7120
1.23k
  {
7121
1.23k
#if PCRE2_CODE_UNIT_WIDTH != 32
7122
1.23k
  BOOL skipped_bad_start = FALSE;
7123
1.23k
#endif
7124
7125
  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
7126
  character start. If we are handling invalid UTF, just skip over such code
7127
  units. Otherwise, give an appropriate error. */
7128
7129
1.23k
#if PCRE2_CODE_UNIT_WIDTH != 32
7130
1.23k
  if (allow_invalid)
7131
0
    {
7132
0
    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
7133
0
      {
7134
0
      start_match++;
7135
0
      skipped_bad_start = TRUE;
7136
0
      }
7137
0
    }
7138
1.23k
  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
7139
0
    {
7140
0
    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
7141
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7142
0
    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
7143
#else
7144
    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
7145
#endif
7146
0
    }
7147
1.23k
#endif  /* WIDTH != 32 */
7148
7149
  /* The mb->check_subject field points to the start of UTF checking;
7150
  lookbehinds can go back no further than this. */
7151
7152
1.23k
  mb->check_subject = start_match;
7153
7154
  /* Move back by the maximum lookbehind, just in case it happens at the very
7155
  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
7156
  units above. */
7157
7158
1.23k
#if PCRE2_CODE_UNIT_WIDTH != 32
7159
1.23k
  if (!skipped_bad_start)
7160
1.23k
    {
7161
1.23k
    unsigned int i;
7162
1.23k
    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
7163
0
      {
7164
0
      mb->check_subject--;
7165
0
      while (mb->check_subject > subject &&
7166
0
#if PCRE2_CODE_UNIT_WIDTH == 8
7167
0
      (*mb->check_subject & 0xc0) == 0x80)
7168
#else  /* 16-bit */
7169
      (*mb->check_subject & 0xfc00) == 0xdc00)
7170
#endif
7171
0
        mb->check_subject--;
7172
0
      }
7173
1.23k
    }
7174
#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7175
7176
  /* In the 32-bit library, one code unit equals one character. However,
7177
  we cannot just subtract the lookbehind and then compare pointers, because
7178
  a very large lookbehind could create an invalid pointer. */
7179
7180
  if (start_offset >= re->max_lookbehind)
7181
    mb->check_subject -= re->max_lookbehind;
7182
  else
7183
    mb->check_subject = subject;
7184
#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
7185
7186
  /* Validate the relevant portion of the subject. There's a loop in case we
7187
  encounter bad UTF in the characters preceding start_match which we are
7188
  scanning because of a lookbehind. */
7189
7190
1.23k
  for (;;)
7191
1.23k
    {
7192
1.23k
    match_data->rc = PRIV(valid_utf)(mb->check_subject,
7193
1.23k
      length - (mb->check_subject - subject), &(match_data->startchar));
7194
7195
1.23k
    if (match_data->rc == 0) break;   /* Valid UTF string */
7196
7197
    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
7198
    whole string. If we are handling invalid UTF strings, set end_subject to
7199
    stop before the bad code unit, and set the options to "not end of line".
7200
    Otherwise return the error. */
7201
7202
140
    match_data->startchar += mb->check_subject - subject;
7203
140
    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
7204
0
    end_subject = subject + match_data->startchar;
7205
7206
    /* If the end precedes start_match, it means there is invalid UTF in the
7207
    extra code units we reversed over because of a lookbehind. Advance past the
7208
    first bad code unit, and then skip invalid character starting code units in
7209
    8-bit and 16-bit modes, and try again with the original end point. */
7210
7211
0
    if (end_subject < start_match)
7212
0
      {
7213
0
      mb->check_subject = end_subject + 1;
7214
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7215
0
      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
7216
0
        mb->check_subject++;
7217
0
#endif
7218
0
      end_subject = true_end_subject;
7219
0
      }
7220
7221
    /* Otherwise, set the not end of line option, and do the match. */
7222
7223
0
    else
7224
0
      {
7225
0
      fragment_options = PCRE2_NOTEOL;
7226
0
      break;
7227
0
      }
7228
0
    }
7229
1.23k
  }
7230
4.18k
#endif  /* SUPPORT_UNICODE */
7231
7232
/* A NULL match context means "use a default context", but we take the memory
7233
control functions from the pattern. */
7234
7235
4.18k
if (mcontext == NULL)
7236
0
  {
7237
0
  mcontext = (pcre2_match_context *)(&PRIV(default_match_context));
7238
0
  mb->memctl = re->memctl;
7239
0
  }
7240
4.18k
else mb->memctl = mcontext->memctl;
7241
7242
4.18k
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
7243
4.18k
firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
7244
4.18k
startline = (re->flags & PCRE2_STARTLINE) != 0;
7245
4.18k
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
7246
4.18k
  true_end_subject : subject + mcontext->offset_limit;
7247
7248
/* Initialize and set up the fixed fields in the callout block, with a pointer
7249
in the match block. */
7250
7251
4.18k
mb->cb = &cb;
7252
4.18k
cb.version = 2;
7253
4.18k
cb.subject = subject;
7254
4.18k
cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
7255
4.18k
cb.callout_flags = 0;
7256
7257
/* Fill in the remaining fields in the match block, except for moptions, which
7258
gets set later. */
7259
7260
4.18k
mb->callout = mcontext->callout;
7261
4.18k
mb->callout_data = mcontext->callout_data;
7262
7263
4.18k
mb->start_subject = subject;
7264
4.18k
mb->start_offset = start_offset;
7265
4.18k
mb->end_subject = end_subject;
7266
4.18k
mb->true_end_subject = true_end_subject;
7267
4.18k
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
7268
4.18k
mb->allowemptypartial = (re->max_lookbehind > 0) ||
7269
4.18k
    (re->flags & PCRE2_MATCH_EMPTY) != 0;
7270
4.18k
mb->poptions = re->overall_options;          /* Pattern options */
7271
4.18k
mb->ignore_skip_arg = 0;
7272
4.18k
mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
7273
7274
/* The name table is needed for finding all the numbers associated with a
7275
given name, for condition testing. The code follows the name table. */
7276
7277
4.18k
mb->name_table = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code));
7278
4.18k
mb->name_count = re->name_count;
7279
4.18k
mb->name_entry_size = re->name_entry_size;
7280
4.18k
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
7281
7282
/* Process the \R and newline settings. */
7283
7284
4.18k
mb->bsr_convention = re->bsr_convention;
7285
4.18k
mb->nltype = NLTYPE_FIXED;
7286
4.18k
switch(re->newline_convention)
7287
4.18k
  {
7288
0
  case PCRE2_NEWLINE_CR:
7289
0
  mb->nllen = 1;
7290
0
  mb->nl[0] = CHAR_CR;
7291
0
  break;
7292
7293
4.18k
  case PCRE2_NEWLINE_LF:
7294
4.18k
  mb->nllen = 1;
7295
4.18k
  mb->nl[0] = CHAR_NL;
7296
4.18k
  break;
7297
7298
0
  case PCRE2_NEWLINE_NUL:
7299
0
  mb->nllen = 1;
7300
0
  mb->nl[0] = CHAR_NUL;
7301
0
  break;
7302
7303
0
  case PCRE2_NEWLINE_CRLF:
7304
0
  mb->nllen = 2;
7305
0
  mb->nl[0] = CHAR_CR;
7306
0
  mb->nl[1] = CHAR_NL;
7307
0
  break;
7308
7309
0
  case PCRE2_NEWLINE_ANY:
7310
0
  mb->nltype = NLTYPE_ANY;
7311
0
  break;
7312
7313
0
  case PCRE2_NEWLINE_ANYCRLF:
7314
0
  mb->nltype = NLTYPE_ANYCRLF;
7315
0
  break;
7316
7317
0
  default:
7318
0
  PCRE2_DEBUG_UNREACHABLE();
7319
0
  return PCRE2_ERROR_INTERNAL;
7320
4.18k
  }
7321
7322
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
7323
vector at the end, whose size depends on the number of capturing parentheses in
7324
the pattern. It is not used at all if there are no capturing parentheses.
7325
7326
  frame_size                   is the total size of each frame
7327
  match_data->heapframes       is the pointer to the frames vector
7328
  match_data->heapframes_size  is the allocated size of the vector
7329
7330
We must pad the frame_size for alignment to ensure subsequent frames are as
7331
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
7332
array, that does not guarantee it is suitably aligned for pointers, as some
7333
architectures have pointers that are larger than a size_t. */
7334
7335
4.18k
frame_size = (offsetof(heapframe, ovector) +
7336
4.18k
  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
7337
4.18k
  ~(HEAPFRAME_ALIGNMENT - 1);
7338
7339
/* Limits set in the pattern override the match context only if they are
7340
smaller. */
7341
7342
4.18k
mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
7343
4.18k
  mcontext->heap_limit : re->limit_heap);
7344
7345
4.18k
mb->match_limit = (mcontext->match_limit < re->limit_match)?
7346
4.18k
  mcontext->match_limit : re->limit_match;
7347
7348
4.18k
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
7349
4.18k
  mcontext->depth_limit : re->limit_depth;
7350
7351
/* If a pattern has very many capturing parentheses, the frame size may be very
7352
large. Set the initial frame vector size to ensure that there are at least 10
7353
available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
7354
greater than the heap limit, get as large a vector as possible. */
7355
7356
4.18k
heapframes_size = frame_size * 10;
7357
4.18k
if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
7358
4.18k
if (heapframes_size / 1024 > mb->heap_limit)
7359
0
  {
7360
0
  PCRE2_SIZE max_size = 1024 * mb->heap_limit;
7361
0
  if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
7362
0
  heapframes_size = max_size;
7363
0
  }
7364
7365
/* If an existing frame vector in the match_data block is large enough, we can
7366
use it. Otherwise, free any pre-existing vector and get a new one. */
7367
7368
4.18k
if (match_data->heapframes_size < heapframes_size)
7369
155
  {
7370
155
  match_data->memctl.free(match_data->heapframes,
7371
155
    match_data->memctl.memory_data);
7372
155
  match_data->heapframes = match_data->memctl.malloc(heapframes_size,
7373
155
    match_data->memctl.memory_data);
7374
155
  if (match_data->heapframes == NULL)
7375
0
    {
7376
0
    match_data->heapframes_size = 0;
7377
0
    return PCRE2_ERROR_NOMEMORY;
7378
0
    }
7379
155
  match_data->heapframes_size = heapframes_size;
7380
155
  }
7381
7382
/* Write to the ovector within the first frame to mark every capture unset and
7383
to avoid uninitialized memory read errors when it is copied to a new frame. */
7384
7385
4.18k
memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
7386
4.18k
  frame_size - offsetof(heapframe, ovector));
7387
7388
/* Pointers to the individual character tables */
7389
7390
4.18k
mb->lcc = re->tables + lcc_offset;
7391
4.18k
mb->fcc = re->tables + fcc_offset;
7392
4.18k
mb->ctypes = re->tables + ctypes_offset;
7393
7394
/* Set up the first code unit to match, if available. If there's no first code
7395
unit there may be a bitmap of possible first characters. */
7396
7397
4.18k
if ((re->flags & PCRE2_FIRSTSET) != 0)
7398
652
  {
7399
652
  has_first_cu = TRUE;
7400
652
  first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
7401
652
  if ((re->flags & PCRE2_FIRSTCASELESS) != 0)
7402
7
    {
7403
7
    first_cu2 = TABLE_GET(first_cu, mb->fcc, first_cu);
7404
7
#ifdef SUPPORT_UNICODE
7405
7
#if PCRE2_CODE_UNIT_WIDTH == 8
7406
7
    if (first_cu > 127 && ucp && !utf) first_cu2 = UCD_OTHERCASE(first_cu);
7407
#else
7408
    if (first_cu > 127 && (utf || ucp)) first_cu2 = UCD_OTHERCASE(first_cu);
7409
#endif
7410
7
#endif  /* SUPPORT_UNICODE */
7411
7
    }
7412
652
  }
7413
3.53k
else
7414
3.53k
  if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != 0)
7415
1.57k
    start_bits = re->start_bitmap;
7416
7417
/* There may also be a "last known required character" set. */
7418
7419
4.18k
if ((re->flags & PCRE2_LASTSET) != 0)
7420
578
  {
7421
578
  has_req_cu = TRUE;
7422
578
  req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
7423
578
  if ((re->flags & PCRE2_LASTCASELESS) != 0)
7424
75
    {
7425
75
    req_cu2 = TABLE_GET(req_cu, mb->fcc, req_cu);
7426
75
#ifdef SUPPORT_UNICODE
7427
75
#if PCRE2_CODE_UNIT_WIDTH == 8
7428
75
    if (req_cu > 127 && ucp && !utf) req_cu2 = UCD_OTHERCASE(req_cu);
7429
#else
7430
    if (req_cu > 127 && (utf || ucp)) req_cu2 = UCD_OTHERCASE(req_cu);
7431
#endif
7432
75
#endif  /* SUPPORT_UNICODE */
7433
75
    }
7434
578
  }
7435
7436
7437
/* ==========================================================================*/
7438
7439
/* Loop for handling unanchored repeated matching attempts; for anchored regexs
7440
the loop runs just once. */
7441
7442
4.18k
#ifdef SUPPORT_UNICODE
7443
4.18k
FRAGMENT_RESTART:
7444
4.18k
#endif
7445
7446
4.18k
start_partial = match_partial = NULL;
7447
4.18k
mb->hitend = FALSE;
7448
7449
4.18k
#if PCRE2_CODE_UNIT_WIDTH == 8
7450
4.18k
memchr_found_first_cu = NULL;
7451
4.18k
memchr_found_first_cu2 = NULL;
7452
4.18k
#endif
7453
7454
4.18k
for(;;)
7455
364k
  {
7456
364k
  PCRE2_SPTR new_start_match;
7457
7458
  /* ----------------- Start of match optimizations ---------------- */
7459
7460
  /* There are some optimizations that avoid running the match if a known
7461
  starting point is not found, or if a known later code unit is not present.
7462
  However, there is an option (settable at compile time) that disables these,
7463
  for testing and for ensuring that all callouts do actually occur. */
7464
7465
364k
  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0)
7466
364k
    {
7467
    /* If firstline is TRUE, the start of the match is constrained to the first
7468
    line of a multiline string. That is, the match must be before or at the
7469
    first newline following the start of matching. Temporarily adjust
7470
    end_subject so that we stop the scans for a first code unit at a newline.
7471
    If the match fails at the newline, later code breaks the loop. */
7472
7473
364k
    if (firstline)
7474
0
      {
7475
0
      PCRE2_SPTR t = start_match;
7476
0
#ifdef SUPPORT_UNICODE
7477
0
      if (utf)
7478
0
        {
7479
0
        while (t < end_subject && !IS_NEWLINE(t))
7480
0
          {
7481
0
          t++;
7482
0
          ACROSSCHAR(t < end_subject, t, t++);
7483
0
          }
7484
0
        }
7485
0
      else
7486
0
#endif
7487
0
      while (t < end_subject && !IS_NEWLINE(t)) t++;
7488
0
      end_subject = t;
7489
0
      }
7490
7491
    /* Anchored: check the first code unit if one is recorded. This may seem
7492
    pointless but it can help in detecting a no match case without scanning for
7493
    the required code unit. */
7494
7495
364k
    if (anchored)
7496
190
      {
7497
190
      if (has_first_cu || start_bits != NULL)
7498
55
        {
7499
55
        BOOL ok = start_match < end_subject;
7500
55
        if (ok)
7501
55
          {
7502
55
          PCRE2_UCHAR c = UCHAR21TEST(start_match);
7503
55
          ok = has_first_cu && (c == first_cu || c == first_cu2);
7504
55
          if (!ok && start_bits != NULL)
7505
35
            {
7506
#if PCRE2_CODE_UNIT_WIDTH != 8
7507
            if (c > 255) c = 255;
7508
#endif
7509
35
            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
7510
35
            }
7511
55
          }
7512
55
        if (!ok)
7513
19
          {
7514
19
          rc = MATCH_NOMATCH;
7515
19
          break;
7516
19
          }
7517
55
        }
7518
190
      }
7519
7520
    /* Not anchored. Advance to a unique first code unit if there is one. */
7521
7522
364k
    else
7523
364k
      {
7524
364k
      if (has_first_cu)
7525
1.64k
        {
7526
1.64k
        if (first_cu != first_cu2)  /* Caseless */
7527
13
          {
7528
          /* In 16-bit and 32_bit modes we have to do our own search, so can
7529
          look for both cases at once. */
7530
7531
#if PCRE2_CODE_UNIT_WIDTH != 8
7532
          PCRE2_UCHAR smc;
7533
          while (start_match < end_subject &&
7534
                (smc = UCHAR21TEST(start_match)) != first_cu &&
7535
                 smc != first_cu2)
7536
            start_match++;
7537
#else
7538
          /* In 8-bit mode, the use of memchr() gives a big speed up, even
7539
          though we have to call it twice in order to find the earliest
7540
          occurrence of the code unit in either of its cases. Caching is used
7541
          to remember the positions of previously found code units. This can
7542
          make a huge difference when the strings are very long and only one
7543
          case is actually present. */
7544
7545
13
          PCRE2_SPTR pp1 = NULL;
7546
13
          PCRE2_SPTR pp2 = NULL;
7547
13
          PCRE2_SIZE searchlength = end_subject - start_match;
7548
7549
          /* If we haven't got a previously found position for first_cu, or if
7550
          the current starting position is later, we need to do a search. If
7551
          the code unit is not found, set it to the end. */
7552
7553
13
          if (memchr_found_first_cu == NULL ||
7554
13
              start_match > memchr_found_first_cu)
7555
7
            {
7556
7
            pp1 = memchr(start_match, first_cu, searchlength);
7557
7
            memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1;
7558
7
            }
7559
7560
          /* If the start is before a previously found position, use the
7561
          previous position, or NULL if a previous search failed. */
7562
7563
6
          else pp1 = (memchr_found_first_cu == end_subject)? NULL :
7564
6
            memchr_found_first_cu;
7565
7566
          /* Do the same thing for the other case. */
7567
7568
13
          if (memchr_found_first_cu2 == NULL ||
7569
13
              start_match > memchr_found_first_cu2)
7570
13
            {
7571
13
            pp2 = memchr(start_match, first_cu2, searchlength);
7572
13
            memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2;
7573
13
            }
7574
7575
0
          else pp2 = (memchr_found_first_cu2 == end_subject)? NULL :
7576
0
            memchr_found_first_cu2;
7577
7578
          /* Set the start to the end of the subject if neither case was found.
7579
          Otherwise, use the earlier found point. */
7580
7581
13
          if (pp1 == NULL)
7582
10
            start_match = (pp2 == NULL)? end_subject : pp2;
7583
3
          else
7584
3
            start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2;
7585
7586
13
#endif  /* 8-bit handling */
7587
13
          }
7588
7589
        /* The caseful case is much simpler. */
7590
7591
1.63k
        else
7592
1.63k
          {
7593
#if PCRE2_CODE_UNIT_WIDTH != 8
7594
          while (start_match < end_subject && UCHAR21TEST(start_match) !=
7595
                 first_cu)
7596
            start_match++;
7597
#else
7598
1.63k
          start_match = memchr(start_match, first_cu, end_subject - start_match);
7599
1.63k
          if (start_match == NULL) start_match = end_subject;
7600
1.63k
#endif
7601
1.63k
          }
7602
7603
        /* If we can't find the required first code unit, having reached the
7604
        true end of the subject, break the bumpalong loop, to force a match
7605
        failure, except when doing partial matching, when we let the next cycle
7606
        run at the end of the subject. To see why, consider the pattern
7607
        /(?<=abc)def/, which partially matches "abc", even though the string
7608
        does not contain the starting character "d". If we have not reached the
7609
        true end of the subject (PCRE2_FIRSTLINE caused end_subject to be
7610
        temporarily modified) we also let the cycle run, because the matching
7611
        string is legitimately allowed to start with the first code unit of a
7612
        newline. */
7613
7614
1.64k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7615
278
          {
7616
278
          rc = MATCH_NOMATCH;
7617
278
          break;
7618
278
          }
7619
1.64k
        }
7620
7621
      /* If there's no first code unit, advance to just after a linebreak for a
7622
      multiline match if required. */
7623
7624
362k
      else if (startline)
7625
0
        {
7626
0
        if (start_match > mb->start_subject + start_offset)
7627
0
          {
7628
0
#ifdef SUPPORT_UNICODE
7629
0
          if (utf)
7630
0
            {
7631
0
            while (start_match < end_subject && !WAS_NEWLINE(start_match))
7632
0
              {
7633
0
              start_match++;
7634
0
              ACROSSCHAR(start_match < end_subject, start_match, start_match++);
7635
0
              }
7636
0
            }
7637
0
          else
7638
0
#endif
7639
0
          while (start_match < end_subject && !WAS_NEWLINE(start_match))
7640
0
            start_match++;
7641
7642
          /* If we have just passed a CR and the newline option is ANY or
7643
          ANYCRLF, and we are now at a LF, advance the match position by one
7644
          more code unit. */
7645
7646
0
          if (start_match[-1] == CHAR_CR &&
7647
0
               (mb->nltype == NLTYPE_ANY || mb->nltype == NLTYPE_ANYCRLF) &&
7648
0
               start_match < end_subject &&
7649
0
               UCHAR21TEST(start_match) == CHAR_NL)
7650
0
            start_match++;
7651
0
          }
7652
0
        }
7653
7654
      /* If there's no first code unit or a requirement for a multiline line
7655
      start, advance to a non-unique first code unit if any have been
7656
      identified. The bitmap contains only 256 bits. When code units are 16 or
7657
      32 bits wide, all code units greater than 254 set the 255 bit. */
7658
7659
362k
      else if (start_bits != NULL)
7660
68.3k
        {
7661
113k
        while (start_match < end_subject)
7662
113k
          {
7663
113k
          uint32_t c = UCHAR21TEST(start_match);
7664
#if PCRE2_CODE_UNIT_WIDTH != 8
7665
          if (c > 255) c = 255;
7666
#endif
7667
113k
          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
7668
45.6k
          start_match++;
7669
45.6k
          }
7670
7671
        /* See comment above in first_cu checking about the next few lines. */
7672
7673
68.3k
        if (mb->partial == 0 && start_match >= mb->end_subject)
7674
762
          {
7675
762
          rc = MATCH_NOMATCH;
7676
762
          break;
7677
762
          }
7678
68.3k
        }
7679
364k
      }   /* End first code unit handling */
7680
7681
    /* Restore fudged end_subject */
7682
7683
363k
    end_subject = mb->end_subject;
7684
7685
    /* The following two optimizations must be disabled for partial matching. */
7686
7687
363k
    if (mb->partial == 0)
7688
363k
      {
7689
363k
      PCRE2_SPTR p;
7690
7691
      /* The minimum matching length is a lower bound; no string of that length
7692
      may actually match the pattern. Although the value is, strictly, in
7693
      characters, we treat it as code units to avoid spending too much time in
7694
      this optimization. */
7695
7696
363k
      if (end_subject - start_match < re->minlength)
7697
2.21k
        {
7698
2.21k
        rc = MATCH_NOMATCH;
7699
2.21k
        break;
7700
2.21k
        }
7701
7702
      /* If req_cu is set, we know that that code unit must appear in the
7703
      subject for the (non-partial) match to succeed. If the first code unit is
7704
      set, req_cu must be later in the subject; otherwise the test starts at
7705
      the match point. This optimization can save a huge amount of backtracking
7706
      in patterns with nested unlimited repeats that aren't going to match.
7707
      Writing separate code for caseful/caseless versions makes it go faster,
7708
      as does using an autoincrement and backing off on a match. As in the case
7709
      of the first code unit, using memchr() in the 8-bit library gives a big
7710
      speed up. Unlike the first_cu check above, we do not need to call
7711
      memchr() twice in the caseless case because we only need to check for the
7712
      presence of the character in either case, not find the first occurrence.
7713
7714
      The search can be skipped if the code unit was found later than the
7715
      current starting point in a previous iteration of the bumpalong loop.
7716
7717
      HOWEVER: when the subject string is very, very long, searching to its end
7718
      can take a long time, and give bad performance on quite ordinary
7719
      anchored patterns. This showed up when somebody was matching something
7720
      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
7721
      string is sufficiently long, but it's worth searching a lot more for
7722
      unanchored patterns. */
7723
7724
361k
      p = start_match + (has_first_cu? 1:0);
7725
361k
      if (has_req_cu && p > req_cu_ptr)
7726
1.43k
        {
7727
1.43k
        PCRE2_SIZE check_length = end_subject - start_match;
7728
7729
1.43k
        if (check_length < REQ_CU_MAX ||
7730
1.43k
              (!anchored && check_length < REQ_CU_MAX * 1000))
7731
1.43k
          {
7732
1.43k
          if (req_cu != req_cu2)  /* Caseless */
7733
66
            {
7734
#if PCRE2_CODE_UNIT_WIDTH != 8
7735
            while (p < end_subject)
7736
              {
7737
              uint32_t pp = UCHAR21INCTEST(p);
7738
              if (pp == req_cu || pp == req_cu2) { p--; break; }
7739
              }
7740
#else  /* 8-bit code units */
7741
66
            PCRE2_SPTR pp = p;
7742
66
            p = memchr(pp, req_cu, end_subject - pp);
7743
66
            if (p == NULL)
7744
17
              {
7745
17
              p = memchr(pp, req_cu2, end_subject - pp);
7746
17
              if (p == NULL) p = end_subject;
7747
17
              }
7748
66
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
7749
66
            }
7750
7751
          /* The caseful case */
7752
7753
1.36k
          else
7754
1.36k
            {
7755
#if PCRE2_CODE_UNIT_WIDTH != 8
7756
            while (p < end_subject)
7757
              {
7758
              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
7759
              }
7760
7761
#else  /* 8-bit code units */
7762
1.36k
            p = memchr(p, req_cu, end_subject - p);
7763
1.36k
            if (p == NULL) p = end_subject;
7764
1.36k
#endif
7765
1.36k
            }
7766
7767
          /* If we can't find the required code unit, break the bumpalong loop,
7768
          forcing a match failure. */
7769
7770
1.43k
          if (p >= end_subject)
7771
171
            {
7772
171
            rc = MATCH_NOMATCH;
7773
171
            break;
7774
171
            }
7775
7776
          /* If we have found the required code unit, save the point where we
7777
          found it, so that we don't search again next time round the bumpalong
7778
          loop if the start hasn't yet passed this code unit. */
7779
7780
1.26k
          req_cu_ptr = p;
7781
1.26k
          }
7782
1.43k
        }
7783
361k
      }
7784
363k
    }
7785
7786
  /* ------------ End of start of match optimizations ------------ */
7787
7788
  /* Give no match if we have passed the bumpalong limit. */
7789
7790
360k
  if (start_match > bumpalong_limit)
7791
0
    {
7792
0
    rc = MATCH_NOMATCH;
7793
0
    break;
7794
0
    }
7795
7796
  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7797
  first starting point for which a partial match was found. */
7798
7799
360k
  cb.start_match = (PCRE2_SIZE)(start_match - subject);
7800
360k
  cb.callout_flags |= PCRE2_CALLOUT_STARTMATCH;
7801
7802
360k
  mb->start_used_ptr = start_match;
7803
360k
  mb->last_used_ptr = start_match;
7804
360k
#ifdef SUPPORT_UNICODE
7805
360k
  mb->moptions = options | fragment_options;
7806
#else
7807
  mb->moptions = options;
7808
#endif
7809
360k
  mb->match_call_count = 0;
7810
360k
  mb->end_offset_top = 0;
7811
360k
  mb->skip_arg_count = 0;
7812
7813
#ifdef DEBUG_SHOW_OPS
7814
  fprintf(stderr, "++ Calling match()\n");
7815
#endif
7816
7817
360k
  rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
7818
360k
    match_data, mb);
7819
7820
#ifdef DEBUG_SHOW_OPS
7821
  fprintf(stderr, "++ match() returned %d\n\n", rc);
7822
#endif
7823
7824
360k
  if (mb->hitend && start_partial == NULL)
7825
0
    {
7826
0
    start_partial = mb->start_used_ptr;
7827
0
    match_partial = start_match;
7828
0
    }
7829
7830
360k
  switch(rc)
7831
360k
    {
7832
    /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7833
    the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7834
    entirely. The only way we can do that is to re-do the match at the same
7835
    point, with a flag to force SKIP with an argument to be ignored. Just
7836
    treating this case as NOMATCH does not work because it does not check other
7837
    alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7838
7839
0
    case MATCH_SKIP_ARG:
7840
0
    new_start_match = start_match;
7841
0
    mb->ignore_skip_arg = mb->skip_arg_count;
7842
0
    break;
7843
7844
    /* SKIP passes back the next starting point explicitly, but if it is no
7845
    greater than the match we have just done, treat it as NOMATCH. */
7846
7847
0
    case MATCH_SKIP:
7848
0
    if (mb->verb_skip_ptr > start_match)
7849
0
      {
7850
0
      new_start_match = mb->verb_skip_ptr;
7851
0
      break;
7852
0
      }
7853
    /* Fall through */
7854
7855
    /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7856
    exactly like PRUNE. Unset ignore SKIP-with-argument. */
7857
7858
360k
    case MATCH_NOMATCH:
7859
360k
    case MATCH_PRUNE:
7860
360k
    case MATCH_THEN:
7861
360k
    mb->ignore_skip_arg = 0;
7862
360k
    new_start_match = start_match + 1;
7863
360k
#ifdef SUPPORT_UNICODE
7864
360k
    if (utf)
7865
104k
      ACROSSCHAR(new_start_match < end_subject, new_start_match,
7866
360k
        new_start_match++);
7867
360k
#endif
7868
360k
    break;
7869
7870
    /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7871
7872
0
    case MATCH_COMMIT:
7873
0
    rc = MATCH_NOMATCH;
7874
0
    goto ENDLOOP;
7875
7876
    /* Any other return is either a match, or some kind of error. */
7877
7878
589
    default:
7879
589
    goto ENDLOOP;
7880
360k
    }
7881
7882
  /* Control reaches here for the various types of "no match at this point"
7883
  result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7884
7885
360k
  rc = MATCH_NOMATCH;
7886
7887
  /* If PCRE2_FIRSTLINE is set, the match must happen before or at the first
7888
  newline in the subject (though it may continue over the newline). Therefore,
7889
  if we have just failed to match, starting at a newline, do not continue. */
7890
7891
360k
  if (firstline && IS_NEWLINE(start_match)) break;
7892
7893
  /* Advance to new matching position */
7894
7895
360k
  start_match = new_start_match;
7896
7897
  /* Break the loop if the pattern is anchored or if we have passed the end of
7898
  the subject. */
7899
7900
360k
  if (anchored || start_match > end_subject) break;
7901
7902
  /* If we have just passed a CR and we are now at a LF, and the pattern does
7903
  not contain any explicit matches for \r or \n, and the newline option is CRLF
7904
  or ANY or ANYCRLF, advance the match position by one more code unit. In
7905
  normal matching start_match will aways be greater than the first position at
7906
  this stage, but a failed *SKIP can cause a return at the same point, which is
7907
  why the first test exists. */
7908
7909
360k
  if (start_match > subject + start_offset &&
7910
360k
      start_match[-1] == CHAR_CR &&
7911
360k
      start_match < end_subject &&
7912
360k
      *start_match == CHAR_NL &&
7913
360k
      (re->flags & PCRE2_HASCRORLF) == 0 &&
7914
360k
        (mb->nltype == NLTYPE_ANY ||
7915
180
         mb->nltype == NLTYPE_ANYCRLF ||
7916
180
         mb->nllen == 2))
7917
0
    start_match++;
7918
7919
360k
  mb->mark = NULL;   /* Reset for start of next match attempt */
7920
360k
  }                  /* End of for(;;) "bumpalong" loop */
7921
7922
/* ==========================================================================*/
7923
7924
/* When we reach here, one of the following stopping conditions is true:
7925
7926
(1) The match succeeded, either completely, or partially;
7927
7928
(2) The pattern is anchored or the match was failed after (*COMMIT);
7929
7930
(3) We are past the end of the subject or the bumpalong limit;
7931
7932
(4) PCRE2_FIRSTLINE is set and we have failed to match at a newline, because
7933
    this option requests that a match occur at or before the first newline in
7934
    the subject.
7935
7936
(5) Some kind of error occurred.
7937
7938
*/
7939
7940
4.18k
ENDLOOP:
7941
7942
/* If end_subject != true_end_subject, it means we are handling invalid UTF,
7943
and have just processed a non-terminal fragment. If this resulted in no match
7944
or a partial match we must carry on to the next fragment (a partial match is
7945
returned to the caller only at the very end of the subject). A loop is used to
7946
avoid trying to match against empty fragments; if the pattern can match an
7947
empty string it would have done so already. */
7948
7949
4.18k
#ifdef SUPPORT_UNICODE
7950
4.18k
if (utf && end_subject != true_end_subject &&
7951
4.18k
    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
7952
0
  {
7953
0
  for (;;)
7954
0
    {
7955
    /* Advance past the first bad code unit, and then skip invalid character
7956
    starting code units in 8-bit and 16-bit modes. */
7957
7958
0
    start_match = end_subject + 1;
7959
7960
0
#if PCRE2_CODE_UNIT_WIDTH != 32
7961
0
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
7962
0
      start_match++;
7963
0
#endif
7964
7965
    /* If we have hit the end of the subject, there isn't another non-empty
7966
    fragment, so give up. */
7967
7968
0
    if (start_match >= true_end_subject)
7969
0
      {
7970
0
      rc = MATCH_NOMATCH;  /* In case it was partial */
7971
0
      match_partial = NULL;
7972
0
      break;
7973
0
      }
7974
7975
    /* Check the rest of the subject */
7976
7977
0
    mb->check_subject = start_match;
7978
0
    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
7979
0
      &(match_data->startchar));
7980
7981
    /* The rest of the subject is valid UTF. */
7982
7983
0
    if (rc == 0)
7984
0
      {
7985
0
      mb->end_subject = end_subject = true_end_subject;
7986
0
      fragment_options = PCRE2_NOTBOL;
7987
0
      goto FRAGMENT_RESTART;
7988
0
      }
7989
7990
    /* A subsequent UTF error has been found; if the next fragment is
7991
    non-empty, set up to process it. Otherwise, let the loop advance. */
7992
7993
0
    else if (rc < 0)
7994
0
      {
7995
0
      mb->end_subject = end_subject = start_match + match_data->startchar;
7996
0
      if (end_subject > start_match)
7997
0
        {
7998
0
        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
7999
0
        goto FRAGMENT_RESTART;
8000
0
        }
8001
0
      }
8002
0
    }
8003
0
  }
8004
4.18k
#endif  /* SUPPORT_UNICODE */
8005
8006
/* Fill in fields that are always returned in the match data. */
8007
8008
4.18k
match_data->code = re;
8009
4.18k
match_data->mark = mb->mark;
8010
4.18k
match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
8011
8012
/* Handle a fully successful match. Set the return code to the number of
8013
captured strings, or 0 if there were too many to fit into the ovector, and then
8014
set the remaining returned values before returning. Make a copy of the subject
8015
string if requested. */
8016
8017
4.18k
if (rc == MATCH_MATCH)
8018
559
  {
8019
559
  match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
8020
559
    0 : (int)mb->end_offset_top/2 + 1;
8021
559
  match_data->subject_length = length;
8022
559
  match_data->startchar = start_match - subject;
8023
559
  match_data->leftchar = mb->start_used_ptr - subject;
8024
559
  match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
8025
390
    mb->last_used_ptr : mb->end_match_ptr) - subject;
8026
559
  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
8027
0
    {
8028
0
    length = CU2BYTES(length + was_zero_terminated);
8029
0
    match_data->subject = match_data->memctl.malloc(length,
8030
0
      match_data->memctl.memory_data);
8031
0
    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
8032
0
    memcpy((void *)match_data->subject, subject, length);
8033
0
    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
8034
0
    }
8035
559
  else match_data->subject = subject;
8036
8037
559
  return match_data->rc;
8038
559
  }
8039
8040
/* Control gets here if there has been a partial match, an error, or if the
8041
overall match attempt has failed at all permitted starting positions. Any mark
8042
data is in the nomatch_mark field. */
8043
8044
3.62k
match_data->mark = mb->nomatch_mark;
8045
8046
/* For anything other than nomatch or partial match, just return the code. */
8047
8048
3.62k
if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
8049
8050
/* Handle a partial match. If a "soft" partial match was requested, searching
8051
for a complete match will have continued, and the value of rc at this point
8052
will be MATCH_NOMATCH. For a "hard" partial match, it will already be
8053
PCRE2_ERROR_PARTIAL. */
8054
8055
3.59k
else if (match_partial != NULL)
8056
0
  {
8057
0
  match_data->subject = subject;
8058
0
  match_data->subject_length = length;
8059
0
  match_data->ovector[0] = match_partial - subject;
8060
0
  match_data->ovector[1] = end_subject - subject;
8061
0
  match_data->startchar = match_partial - subject;
8062
0
  match_data->leftchar = start_partial - subject;
8063
0
  match_data->rightchar = end_subject - subject;
8064
0
  match_data->rc = PCRE2_ERROR_PARTIAL;
8065
0
  }
8066
8067
/* Else this is the classic nomatch case. */
8068
8069
3.59k
else match_data->rc = PCRE2_ERROR_NOMATCH;
8070
8071
3.62k
return match_data->rc;
8072
4.18k
}
8073
8074
/* These #undefs are here to enable unity builds with CMake. */
8075
8076
#undef NLBLOCK /* Block containing newline information */
8077
#undef PSSTART /* Field containing processed string start */
8078
#undef PSEND   /* Field containing processed string end */
8079
8080
/* End of pcre2_match.c */